* Added recompressTracked.php, the second part of the recompression project. Uses...
[lhc/web/wiklou.git] / includes / HistoryBlob.php
1 <?php
2
3 /**
4 * Base class for general text storage via the "object" flag in old_flags, or
5 * two-part external storage URLs. Used for represent efficient concatenated
6 * storage, and migration-related pointer objects.
7 */
8 interface HistoryBlob
9 {
10 /**
11 * Adds an item of text, returns a stub object which points to the item.
12 * You must call setLocation() on the stub object before storing it to the
13 * database
14 * Returns the key for getItem()
15 */
16 public function addItem( $text );
17
18 /**
19 * Get item by key, or false if the key is not present
20 */
21 public function getItem( $key );
22
23 /**
24 * Set the "default text"
25 * This concept is an odd property of the current DB schema, whereby each text item has a revision
26 * associated with it. The default text is the text of the associated revision. There may, however,
27 * be other revisions in the same object.
28 *
29 * Default text is not required for two-part external storage URLs.
30 */
31 public function setText( $text );
32
33 /**
34 * Get default text. This is called from Revision::getRevisionText()
35 */
36 function getText();
37 }
38
39 /**
40 * Concatenated gzip (CGZ) storage
41 * Improves compression ratio by concatenating like objects before gzipping
42 */
43 class ConcatenatedGzipHistoryBlob implements HistoryBlob
44 {
45 public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = '';
46 public $mSize = 0;
47 public $mMaxSize = 10000000;
48 public $mMaxCount = 100;
49
50 /** Constructor */
51 public function ConcatenatedGzipHistoryBlob() {
52 if ( !function_exists( 'gzdeflate' ) ) {
53 throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" );
54 }
55 }
56
57 public function addItem( $text ) {
58 $this->uncompress();
59 $hash = md5( $text );
60 $this->mItems[$hash] = $text;
61 $this->mSize += strlen( $text );
62
63 return $hash;
64 }
65
66 public function getItem( $hash ) {
67 $this->uncompress();
68 if ( array_key_exists( $hash, $this->mItems ) ) {
69 return $this->mItems[$hash];
70 } else {
71 return false;
72 }
73 }
74
75 public function setText( $text ) {
76 $this->uncompress();
77 $this->mDefaultHash = $this->addItem( $text );
78 }
79
80 public function getText() {
81 $this->uncompress();
82 return $this->getItem( $this->mDefaultHash );
83 }
84
85 /**
86 * Remove an item
87 */
88 public function removeItem( $hash ) {
89 $this->mSize -= strlen( $this->mItems[$hash] );
90 unset( $this->mItems[$hash] );
91 }
92
93 /**
94 * Compress the bulk data in the object
95 */
96 public function compress() {
97 if ( !$this->mCompressed ) {
98 $this->mItems = gzdeflate( serialize( $this->mItems ) );
99 $this->mCompressed = true;
100 }
101 }
102
103 /**
104 * Uncompress bulk data
105 */
106 public function uncompress() {
107 if ( $this->mCompressed ) {
108 $this->mItems = unserialize( gzinflate( $this->mItems ) );
109 $this->mCompressed = false;
110 }
111 }
112
113
114 function __sleep() {
115 $this->compress();
116 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
117 }
118
119 function __wakeup() {
120 $this->uncompress();
121 }
122
123 /**
124 * Helper function for compression jobs
125 * Returns true until the object is "full" and ready to be committed
126 */
127 public function isHappy() {
128 return $this->mSize < $this->mMaxSize
129 && count( $this->mItems ) < $this->mMaxCount;
130 }
131 }
132
133
134 /**
135 * One-step cache variable to hold base blobs; operations that
136 * pull multiple revisions may often pull multiple times from
137 * the same blob. By keeping the last-used one open, we avoid
138 * redundant unserialization and decompression overhead.
139 */
140 global $wgBlobCache;
141 $wgBlobCache = array();
142
143
144 /**
145 * Pointer object for an item within a CGZ blob stored in the text table.
146 */
147 class HistoryBlobStub {
148 var $mOldId, $mHash, $mRef;
149
150 /**
151 * @param string $hash The content hash of the text
152 * @param integer $oldid The old_id for the CGZ object
153 */
154 function HistoryBlobStub( $hash = '', $oldid = 0 ) {
155 $this->mHash = $hash;
156 }
157
158 /**
159 * Sets the location (old_id) of the main object to which this object
160 * points
161 */
162 function setLocation( $id ) {
163 $this->mOldId = $id;
164 }
165
166 /**
167 * Sets the location (old_id) of the referring object
168 */
169 function setReferrer( $id ) {
170 $this->mRef = $id;
171 }
172
173 /**
174 * Gets the location of the referring object
175 */
176 function getReferrer() {
177 return $this->mRef;
178 }
179
180 function getText() {
181 $fname = 'HistoryBlobStub::getText';
182 global $wgBlobCache;
183 if( isset( $wgBlobCache[$this->mOldId] ) ) {
184 $obj = $wgBlobCache[$this->mOldId];
185 } else {
186 $dbr = wfGetDB( DB_SLAVE );
187 $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) );
188 if( !$row ) {
189 return false;
190 }
191 $flags = explode( ',', $row->old_flags );
192 if( in_array( 'external', $flags ) ) {
193 $url=$row->old_text;
194 @list( /* $proto */ ,$path)=explode('://',$url,2);
195 if ($path=="") {
196 wfProfileOut( $fname );
197 return false;
198 }
199 $row->old_text=ExternalStore::fetchFromUrl($url);
200
201 }
202 if( !in_array( 'object', $flags ) ) {
203 return false;
204 }
205
206 if( in_array( 'gzip', $flags ) ) {
207 // This shouldn't happen, but a bug in the compress script
208 // may at times gzip-compress a HistoryBlob object row.
209 $obj = unserialize( gzinflate( $row->old_text ) );
210 } else {
211 $obj = unserialize( $row->old_text );
212 }
213
214 if( !is_object( $obj ) ) {
215 // Correct for old double-serialization bug.
216 $obj = unserialize( $obj );
217 }
218
219 // Save this item for reference; if pulling many
220 // items in a row we'll likely use it again.
221 $obj->uncompress();
222 $wgBlobCache = array( $this->mOldId => $obj );
223 }
224 return $obj->getItem( $this->mHash );
225 }
226
227 /**
228 * Get the content hash
229 */
230 function getHash() {
231 return $this->mHash;
232 }
233 }
234
235
236 /**
237 * To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the
238 * leftover cur table as the backend. This avoids expensively copying hundreds
239 * of megabytes of data during the conversion downtime.
240 *
241 * Serialized HistoryBlobCurStub objects will be inserted into the text table
242 * on conversion if $wgFastSchemaUpgrades is set to true.
243 */
244 class HistoryBlobCurStub {
245 var $mCurId;
246
247 /**
248 * @param integer $curid The cur_id pointed to
249 */
250 function HistoryBlobCurStub( $curid = 0 ) {
251 $this->mCurId = $curid;
252 }
253
254 /**
255 * Sets the location (cur_id) of the main object to which this object
256 * points
257 */
258 function setLocation( $id ) {
259 $this->mCurId = $id;
260 }
261
262 function getText() {
263 $dbr = wfGetDB( DB_SLAVE );
264 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
265 if( !$row ) {
266 return false;
267 }
268 return $row->cur_text;
269 }
270 }
271
272 /**
273 * Diff-based history compression
274 * Requires xdiff 1.5+ and zlib
275 */
276 class DiffHistoryBlob implements HistoryBlob {
277 /** Uncompressed item cache */
278 var $mItems = array();
279
280 /**
281 * Array of diffs, where $this->mDiffs[0] is the diff between
282 * $this->mDiffs[0] and $this->mDiffs[1]
283 */
284 var $mDiffs = array();
285
286 /**
287 * The key for getText()
288 */
289 var $mDefaultKey;
290
291 /**
292 * Compressed storage
293 */
294 var $mCompressed;
295
296 /**
297 * True if the object is locked against further writes
298 */
299 var $mFrozen = false;
300
301
302 /**
303 * The maximum uncompressed size before the object becomes sad
304 * Should be less than max_allowed_packet
305 */
306 var $mMaxSize = 10000000;
307
308 /**
309 * The maximum number of text items before the object becomes sad
310 */
311 var $mMaxCount = 100;
312
313 function __construct() {
314 if ( !function_exists( 'xdiff_string_bdiff' ) ){
315 throw new MWException( "Need xdiff 1.5+ support to read or write DiffHistoryBlob\n" );
316 }
317 if ( !function_exists( 'gzdeflate' ) ) {
318 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
319 }
320 }
321
322 function addItem( $text ) {
323 if ( $this->mFrozen ) {
324 throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" );
325 }
326
327 $this->mItems[] = $text;
328 $this->mSize += strlen( $text );
329 $i = count( $this->mItems ) - 1;
330 if ( $i > 0 ) {
331 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
332 # "String is not zero-terminated"
333 wfSuppressWarnings();
334 $this->mDiffs[] = xdiff_string_bdiff( $this->mItems[$i-1], $text ) . '';
335 wfRestoreWarnings();
336 }
337 return $i;
338 }
339
340 function getItem( $key ) {
341 if ( $key > count( $this->mDiffs ) + 1 ) {
342 return false;
343 }
344 $key = intval( $key );
345 if ( $key == 0 ) {
346 return $this->mItems[0];
347 }
348
349 $last = count( $this->mItems ) - 1;
350 for ( $i = $last + 1; $i <= $key; $i++ ) {
351 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
352 # "String is not zero-terminated"
353 wfSuppressWarnings();
354 $this->mItems[$i] = xdiff_string_bpatch( $this->mItems[$i - 1], $this->mDiffs[$i - 1] ) . '';
355 wfRestoreWarnings();
356 }
357 return $this->mItems[$key];
358 }
359
360 function setText( $text ) {
361 $this->mDefaultKey = $this->addItem( $text );
362 }
363
364 function getText() {
365 return $this->getItem( $this->mDefaultKey );
366 }
367
368 function __sleep() {
369 if ( !isset( $this->mItems[0] ) ) {
370 // Empty object
371 $info = false;
372 } else {
373 $info = array(
374 'base' => $this->mItems[0],
375 'diffs' => $this->mDiffs
376 );
377 }
378 if ( isset( $this->mDefaultKey ) ) {
379 $info['default'] = $this->mDefaultKey;
380 }
381 $this->mCompressed = gzdeflate( serialize( $info ) );
382 return array( 'mCompressed' );
383 }
384
385 function __wakeup() {
386 // addItem() doesn't work if mItems is partially filled from mDiffs
387 $this->mFrozen = true;
388 $info = unserialize( gzinflate( $this->mCompressed ) );
389 unset( $this->mCompressed );
390
391 if ( !$info ) {
392 // Empty object
393 return;
394 }
395
396 if ( isset( $info['default'] ) ) {
397 $this->mDefaultKey = $info['default'];
398 }
399 $this->mItems[0] = $info['base'];
400 $this->mDiffs = $info['diffs'];
401 }
402
403 /**
404 * Helper function for compression jobs
405 * Returns true until the object is "full" and ready to be committed
406 */
407 function isHappy() {
408 return $this->mSize < $this->mMaxSize
409 && count( $this->mItems ) < $this->mMaxCount;
410 }
411
412 }