3 * Service for storing and loading data blobs representing revision content.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * Attribution notice: when this file was created, much of its content was taken
21 * from the Revision.php file as present in release 1.30. Refer to the history
22 * of that file for original authorship.
27 namespace MediaWiki\Storage
;
29 use DBAccessObjectUtils
;
33 use InvalidArgumentException
;
37 use Wikimedia\Assert\Assert
;
38 use Wikimedia\Rdbms\Database
;
39 use Wikimedia\Rdbms\IDatabase
;
40 use Wikimedia\Rdbms\LoadBalancer
;
43 * Service for storing and loading Content objects.
47 * @note This was written to act as a drop-in replacement for the corresponding
48 * static methods in Revision.
50 class SqlBlobStore
implements IDBAccessObject
, BlobStore
{
52 // Note: the name has been taken unchanged from the Revision class.
53 const TEXT_CACHE_GROUP
= 'revisiontext:10';
58 private $dbLoadBalancer;
66 * @var bool|string Wiki ID
73 private $cacheExpiry = 604800; // 7 days
78 private $compressBlobs = false;
83 private $legacyEncoding = false;
88 private $legacyEncodingConversionLang = null;
93 private $useExternalStore = false;
96 * @param LoadBalancer $dbLoadBalancer A load balancer for acquiring database connections
97 * @param WANObjectCache $cache A cache manager for caching blobs
98 * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki.
100 public function __construct(
101 LoadBalancer
$dbLoadBalancer,
102 WANObjectCache
$cache,
105 $this->dbLoadBalancer
= $dbLoadBalancer;
106 $this->cache
= $cache;
107 $this->wikiId
= $wikiId;
111 * @return int time for which blobs can be cached, in seconds
113 public function getCacheExpiry() {
114 return $this->cacheExpiry
;
118 * @param int $cacheExpiry time for which blobs can be cached, in seconds
120 public function setCacheExpiry( $cacheExpiry ) {
121 Assert
::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
123 $this->cacheExpiry
= $cacheExpiry;
127 * @return bool whether blobs should be compressed for storage
129 public function getCompressBlobs() {
130 return $this->compressBlobs
;
134 * @param bool $compressBlobs whether blobs should be compressed for storage
136 public function setCompressBlobs( $compressBlobs ) {
137 $this->compressBlobs
= $compressBlobs;
141 * @return false|string The legacy encoding to assume for blobs that are not marked as utf8.
142 * False means handling of legacy encoding is disabled, and utf8 assumed.
144 public function getLegacyEncoding() {
145 return $this->legacyEncoding
;
149 * @return Language|null The locale to use when decoding from a legacy encoding, or null
150 * if handling of legacy encoding is disabled.
152 public function getLegacyEncodingConversionLang() {
153 return $this->legacyEncodingConversionLang
;
157 * @param string $legacyEncoding The legacy encoding to assume for blobs that are
158 * not marked as utf8.
159 * @param Language $language The locale to use when decoding from a legacy encoding.
161 public function setLegacyEncoding( $legacyEncoding, Language
$language ) {
162 Assert
::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
164 $this->legacyEncoding
= $legacyEncoding;
165 $this->legacyEncodingConversionLang
= $language;
169 * @return bool Whether to use the ExternalStore mechanism for storing blobs.
171 public function getUseExternalStore() {
172 return $this->useExternalStore
;
176 * @param bool $useExternalStore Whether to use the ExternalStore mechanism for storing blobs.
178 public function setUseExternalStore( $useExternalStore ) {
179 Assert
::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
181 $this->useExternalStore
= $useExternalStore;
185 * @return LoadBalancer
187 private function getDBLoadBalancer() {
188 return $this->dbLoadBalancer
;
192 * @param int $index A database index, like DB_MASTER or DB_REPLICA
196 private function getDBConnection( $index ) {
197 $lb = $this->getDBLoadBalancer();
198 return $lb->getConnection( $index, [], $this->wikiId
);
202 * Stores an arbitrary blob of data and returns an address that can be used with
203 * getBlob() to retrieve the same blob of data,
205 * @param string $data
206 * @param array $hints An array of hints.
208 * @throws BlobAccessException
209 * @return string an address that can be used with getBlob() to retrieve the data.
211 public function storeBlob( $data, $hints = [] ) {
213 $flags = $this->compressData( $data );
215 # Write to external storage if required
216 if ( $this->useExternalStore
) {
217 // Store and get the URL
218 $data = ExternalStore
::insertToDefault( $data );
220 throw new BlobAccessException( "Failed to store text to external storage" );
225 $flags .= 'external';
227 // TODO: we could also return an address for the external store directly here.
228 // That would mean bypassing the text table entirely when the external store is
229 // used. We'll need to assess expected fallout before doing that.
232 $dbw = $this->getDBConnection( DB_MASTER
);
234 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
240 'old_flags' => $flags,
245 $textId = $dbw->insertId();
247 return self
::makeAddressFromTextId( $textId );
248 } catch ( MWException
$e ) {
249 throw new BlobAccessException( $e->getMessage(), 0, $e );
254 * Retrieve a blob, given an address.
255 * Currently hardcoded to the 'text' table storage engine.
257 * MCR migration note: this replaces Revision::loadText
259 * @param string $blobAddress
260 * @param int $queryFlags
262 * @throws BlobAccessException
265 public function getBlob( $blobAddress, $queryFlags = 0 ) {
266 Assert
::parameterType( 'string', $blobAddress, '$blobAddress' );
268 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
269 $blob = $this->cache
->getWithSetCallback(
270 $this->getCacheKey( $blobAddress ),
271 $this->getCacheTTL(),
272 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
273 list( $index ) = DBAccessObjectUtils
::getDBOptions( $queryFlags );
274 $setOpts +
= Database
::getCacheSetOptions( $this->getDBConnection( $index ) );
276 return $this->fetchBlob( $blobAddress, $queryFlags );
278 [ 'pcGroup' => self
::TEXT_CACHE_GROUP
, 'pcTTL' => IExpiringStore
::TTL_PROC_LONG
]
281 if ( $blob === false ) {
282 throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
289 * MCR migration note: this corresponds to Revision::fetchText
291 * @param string $blobAddress
292 * @param int $queryFlags
294 * @throws BlobAccessException
295 * @return string|false
297 private function fetchBlob( $blobAddress, $queryFlags ) {
298 list( $schema, $id, ) = self
::splitBlobAddress( $blobAddress );
300 //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
301 if ( $schema === 'tt' ) {
302 $textId = intval( $id );
304 // XXX: change to better exceptions! That makes migration more difficult, though.
305 throw new BlobAccessException( "Unknown blob address schema: $schema" );
308 if ( !$textId ||
$id !== (string)$textId ) {
309 // XXX: change to better exceptions! That makes migration more difficult, though.
310 throw new BlobAccessException( "Bad blob address: $blobAddress" );
313 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
314 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
315 $queryFlags |
= DBAccessObjectUtils
::hasFlags( $queryFlags, self
::READ_LATEST
)
316 ? self
::READ_LATEST_IMMUTABLE
319 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
320 DBAccessObjectUtils
::getDBOptions( $queryFlags );
322 // Text data is immutable; check replica DBs first.
323 $row = $this->getDBConnection( $index )->selectRow(
325 [ 'old_text', 'old_flags' ],
326 [ 'old_id' => $textId ],
331 // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
332 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
333 if ( !$row && $fallbackIndex !== null ) {
334 $row = $this->getDBConnection( $fallbackIndex )->selectRow(
336 [ 'old_text', 'old_flags' ],
337 [ 'old_id' => $textId ],
344 wfWarn( __METHOD__
. ": No text row with ID $textId." );
348 $blob = $this->expandBlob( $row->old_text
, $row->old_flags
, $blobAddress );
350 if ( $blob === false ) {
351 wfLogWarning( __METHOD__
. ": Bad data in text row $textId." );
359 * Get a cache key for a given Blob address.
361 * The cache key is constructed in a way that allows cached blobs from the same database
362 * to be re-used between wikis. For example, enwiki and frwiki will use the same cache keys
363 * for blobs from the wikidatawiki database.
365 * @param string $blobAddress
368 private function getCacheKey( $blobAddress ) {
369 return $this->cache
->makeGlobalKey(
372 $this->dbLoadBalancer
->resolveDomainID( $this->wikiId
),
378 * Expand a raw data blob according to the flags given.
380 * MCR migration note: this replaces Revision::getRevisionText
382 * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead.
383 * @todo make this private, there should be no need to use this method outside this class.
385 * @param string $raw The raw blob data, to be processed according to $flags.
386 * May be the blob itself, or the blob compressed, or just the address
387 * of the actual blob, depending on $flags.
388 * @param string|string[] $flags Blob flags, such as 'external' or 'gzip'.
389 * Note that not including 'utf-8' in $flags will cause the data to be decoded
390 * according to the legacy encoding specified via setLegacyEncoding.
391 * @param string|null $cacheKey A blob address for use in the cache key. If not given,
392 * caching is disabled.
394 * @return false|string The expanded blob or false on failure
396 public function expandBlob( $raw, $flags, $cacheKey = null ) {
397 if ( is_string( $flags ) ) {
398 $flags = explode( ',', $flags );
401 // Use external methods for external objects, text in table is URL-only then
402 if ( in_array( 'external', $flags ) ) {
404 $parts = explode( '://', $url, 2 );
405 if ( count( $parts ) == 1 ||
$parts[1] == '' ) {
410 // The cached value should be decompressed, so handle that and return here.
411 return $this->cache
->getWithSetCallback(
412 $this->getCacheKey( $cacheKey ),
413 $this->getCacheTTL(),
414 function () use ( $url, $flags ) {
415 // No negative caching per BlobStore::getBlob()
416 $blob = ExternalStore
::fetchFromURL( $url, [ 'wiki' => $this->wikiId
] );
418 return $blob === false ?
false : $this->decompressData( $blob, $flags );
420 [ 'pcGroup' => self
::TEXT_CACHE_GROUP
, 'pcTTL' => WANObjectCache
::TTL_PROC_LONG
]
423 $blob = ExternalStore
::fetchFromURL( $url, [ 'wiki' => $this->wikiId
] );
424 return $blob === false ?
false : $this->decompressData( $blob, $flags );
427 return $this->decompressData( $raw, $flags );
432 * If $wgCompressRevisions is enabled, we will compress data.
433 * The input string is modified in place.
434 * Return value is the flags field: contains 'gzip' if the
435 * data is compressed, and 'utf-8' if we're saving in UTF-8
438 * MCR migration note: this replaces Revision::compressRevisionText
440 * @note direct use is deprecated!
441 * @todo make this private, there should be no need to use this method outside this class.
443 * @param mixed &$blob Reference to a text
447 public function compressData( &$blob ) {
450 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
451 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
452 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
453 $blobFlags[] = 'utf-8';
455 if ( $this->compressBlobs
) {
456 if ( function_exists( 'gzdeflate' ) ) {
457 $deflated = gzdeflate( $blob );
459 if ( $deflated === false ) {
460 wfLogWarning( __METHOD__
. ': gzdeflate() failed' );
463 $blobFlags[] = 'gzip';
466 wfDebug( __METHOD__
. " -- no zlib support, not compressing\n" );
469 return implode( ',', $blobFlags );
473 * Re-converts revision text according to its flags.
475 * MCR migration note: this replaces Revision::decompressRevisionText
477 * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead.
478 * @todo make this private, there should be no need to use this method outside this class.
480 * @param string $blob Blob in compressed/encoded form.
481 * @param array $blobFlags Compression flags, such as 'gzip'.
482 * Note that not including 'utf-8' in $blobFlags will cause the data to be decoded
483 * according to the legacy encoding specified via setLegacyEncoding.
485 * @return string|bool Decompressed text, or false on failure
487 public function decompressData( $blob, array $blobFlags ) {
488 // Revision::decompressRevisionText accepted false here, so defend against that
489 Assert
::parameterType( 'string', $blob, '$blob' );
491 if ( in_array( 'error', $blobFlags ) ) {
492 // Error row, return false
496 if ( in_array( 'gzip', $blobFlags ) ) {
497 # Deal with optional compression of archived pages.
498 # This can be done periodically via maintenance/compressOld.php, and
499 # as pages are saved if $wgCompressRevisions is set.
500 $blob = gzinflate( $blob );
502 if ( $blob === false ) {
503 wfWarn( __METHOD__
. ': gzinflate() failed' );
508 if ( in_array( 'object', $blobFlags ) ) {
509 # Generic compressed storage
510 $obj = unserialize( $blob );
511 if ( !is_object( $obj ) ) {
515 $blob = $obj->getText();
518 // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
519 if ( $blob !== false && $this->legacyEncoding
&& $this->legacyEncodingConversionLang
520 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
522 # Old revisions kept around in a legacy encoding?
523 # Upconvert on demand.
524 # ("utf8" checked for compatibility with some broken
525 # conversion scripts 2008-12-30)
526 $blob = $this->legacyEncodingConversionLang
->iconv( $this->legacyEncoding
, 'UTF-8', $blob );
533 * Get the text cache TTL
535 * MCR migration note: this replaces Revision::getCacheTTL
539 private function getCacheTTL() {
540 if ( $this->cache
->getQoS( WANObjectCache
::ATTR_EMULATION
)
541 <= WANObjectCache
::QOS_EMULATION_SQL
543 // Do not cache RDBMs blobs in...the RDBMs store
544 $ttl = WANObjectCache
::TTL_UNCACHEABLE
;
546 $ttl = $this->cacheExpiry ?
: WANObjectCache
::TTL_UNCACHEABLE
;
553 * Returns an ID corresponding to the old_id field in the text table, corresponding
554 * to the given $address.
556 * Currently, $address must start with 'tt:' followed by a decimal integer representing
557 * the old_id; if $address does not start with 'tt:', null is returned. However,
558 * the implementation may change to insert rows into the text table on the fly.
559 * This implies that this method cannot be static.
561 * @note This method exists for use with the text table based storage schema.
562 * It should not be assumed that is will function with all future kinds of content addresses.
564 * @deprecated since 1.31, so don't assume that all blob addresses refer to a row in the text
565 * table. This method should become private once the relevant refactoring in WikiPage is
568 * @param string $address
572 public function getTextIdFromAddress( $address ) {
573 list( $schema, $id, ) = self
::splitBlobAddress( $address );
575 if ( $schema !== 'tt' ) {
579 $textId = intval( $id );
581 if ( !$textId ||
$id !== (string)$textId ) {
582 throw new InvalidArgumentException( "Malformed text_id: $id" );
589 * Returns an address referring to content stored in the text table row with the given ID.
590 * The address schema for blobs stored in the text table is "tt:" followed by an integer
591 * that corresponds to a value of the old_id field.
593 * @deprecated since 1.31. This method should become private once the relevant refactoring
594 * in WikiPage is complete.
600 public static function makeAddressFromTextId( $id ) {
605 * Splits a blob address into three parts: the schema, the ID, and parameters/flags.
607 * @param string $address
609 * @throws InvalidArgumentException
610 * @return array [ $schema, $id, $parameters ], with $parameters being an assoc array.
612 private static function splitBlobAddress( $address ) {
613 if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
614 throw new InvalidArgumentException( "Bad blob address: $address" );
617 $schema = strtolower( $m[1] );
619 $parameters = isset( $m[4] ) ?
wfCgiToArray( $m[4] ) : [];
621 return [ $schema, $id, $parameters ];
624 public function isReadOnly() {
625 if ( $this->useExternalStore
&& ExternalStore
::defaultStoresAreReadOnly() ) {
629 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );