/**
* Splits a blob address into three parts: the schema, the ID, and parameters/flags.
*
+ * @since 1.33
+ *
* @param string $address
*
* @throws InvalidArgumentException
* @return array [ $schema, $id, $parameters ], with $parameters being an assoc array.
*/
- private static function splitBlobAddress( $address ) {
+ public static function splitBlobAddress( $address ) {
if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
throw new InvalidArgumentException( "Bad blob address: $address" );
}
}
$revOpts = [ 'page' ];
- if ( $this->text != self::STUB ) {
- // TODO: remove the text and make XmlDumpWriter use a RevisionStore instead! (T198706)
- $revOpts[] = 'text';
- }
+
$revQuery = Revision::getQueryInfo( $revOpts );
// We want page primary rather than revision
];
unset( $join['page'] );
- // TODO: remove rev_text_id and make XmlDumpWriter use a RevisionStore instead! (T198706)
- $fields = array_merge( $revQuery['fields'], [ 'page_restrictions, rev_text_id' ] );
+ $fields = $revQuery['fields'];
+ $fields[] = 'page_restrictions';
+
+ if ( $this->text != self::STUB ) {
+ $fields['_load_content'] = '1';
+ }
$conds = [];
if ( $cond !== '' ) {
*
* @file
*/
-
use MediaWiki\MediaWikiServices;
+use MediaWiki\Revision\RevisionStore;
+use MediaWiki\Storage\SqlBlobStore;
/**
* @ingroup Dump
*/
class XmlDumpWriter {
+
+ /**
+ * Title of the currently processed page
+ *
+ * @var Title|null
+ */
+ private $currentTitle = null;
+
/**
* Opens the XML output stream's root "<mediawiki>" element.
* This does not include an xml directive, so is safe to include
*/
public function openPage( $row ) {
$out = " <page>\n";
- $title = Title::makeTitle( $row->page_namespace, $row->page_title );
- $out .= ' ' . Xml::elementClean( 'title', [], self::canonicalTitle( $title ) ) . "\n";
+ $this->currentTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
+ $canonicalTitle = self::canonicalTitle( $this->currentTitle );
+ $out .= ' ' . Xml::elementClean( 'title', [], $canonicalTitle ) . "\n";
$out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n";
$out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n";
if ( $row->page_is_redirect ) {
- $page = WikiPage::factory( $title );
+ $page = WikiPage::factory( $this->currentTitle );
$redirect = $page->getRedirectTarget();
if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
$out .= ' ';
strval( $row->page_restrictions ) ) . "\n";
}
- Hooks::run( 'XmlDumpWriterOpenPage', [ $this, &$out, $row, $title ] );
+ Hooks::run( 'XmlDumpWriterOpenPage', [ $this, &$out, $row, $this->currentTitle ] );
return $out;
}
return " </page>\n";
}
+ /**
+ * @return RevisionStore
+ */
+ private function getRevisionStore() {
+ return MediaWikiServices::getInstance()->getRevisionStore();
+ }
+
+ /**
+ * @return SqlBlobStore
+ */
+ private function getBlobStore() {
+ return MediaWikiServices::getInstance()->getBlobStore();
+ }
+
/**
* Dumps a "<revision>" section on the output stream, with
* data filled in from the given database row.
}
}
+ // TODO: rev_content_model no longer exists with MCR, see T174031
if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
$content_model = strval( $row->rev_content_model );
} else {
// probably using $wgContentHandlerUseDB = false;
- $title = Title::makeTitle( $row->page_namespace, $row->page_title );
- $content_model = ContentHandler::getDefaultModelFor( $title );
+ $content_model = ContentHandler::getDefaultModelFor( $this->currentTitle );
}
$content_handler = ContentHandler::getForModelID( $content_model );
+ // TODO: rev_content_format no longer exists with MCR, see T174031
if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
$content_format = strval( $row->rev_content_format );
} else {
$out .= " " . Xml::elementClean( 'text',
[ 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ],
strval( $text ) ) . "\n";
- } else {
- // Stub output
+ } elseif ( isset( $row->_load_content ) ) {
+ // TODO: make this fully MCR aware, see T174031
+ $rev = $this->getRevisionStore()->newRevisionFromRow( $row, 0, $this->currentTitle );
+ $slot = $rev->getSlot( 'main' );
+ $content = $slot->getContent();
+
+ if ( $content instanceof TextContent ) {
+ // HACK: For text based models, bypass the serialization step.
+ // This allows extensions (like Flow)that use incompatible combinations
+ // of serialization format and content model.
+ $text = $content->getNativeData();
+ } else {
+ $text = $content->serialize( $content_format );
+ }
+
+ $text = $content_handler->exportTransform( $text, $content_format );
+ $out .= " " . Xml::elementClean( 'text',
+ [ 'xml:space' => 'preserve', 'bytes' => intval( $slot->getSize() ) ],
+ strval( $text ) ) . "\n";
+ } elseif ( isset( $row->rev_text_id ) ) {
+ // Stub output for pre-MCR schema
+ // TODO: MCR: rev_text_id only exists in the pre-MCR schema. Remove this when
+ // we drop support for the old schema.
$out .= " " . Xml::element( 'text',
[ 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ],
"" ) . "\n";
+ } else {
+ // Backwards-compatible stub output for MCR aware schema
+ // TODO: MCR: emit content addresses instead of text ids, see T174031, T199121
+ $rev = $this->getRevisionStore()->newRevisionFromRow( $row, 0, $this->currentTitle );
+ $slot = $rev->getSlot( 'main' );
+
+ // Note that this is currently the ONLY reason we have a BlobStore here at all.
+ // When removing this line, check whether the BlobStore has become unused.
+ $textId = $this->getBlobStore()->getTextIdFromAddress( $slot->getAddress() );
+ $out .= " " . Xml::element( 'text',
+ [ 'id' => $textId, 'bytes' => intval( $slot->getSize() ) ],
+ "" ) . "\n";
}
if ( isset( $row->rev_sha1 )
require_once __DIR__ . '/../includes/export/WikiExporter.php';
use MediaWiki\MediaWikiServices;
+use MediaWiki\Storage\BlobAccessException;
+use MediaWiki\Storage\BlobStore;
+use MediaWiki\Storage\SqlBlobStore;
use Wikimedia\Rdbms\IMaintainableDatabase;
/**
}
}
+ /**
+ * @return BlobStore
+ */
+ private function getBlobStore() {
+ return MediaWikiServices::getInstance()->getBlobStore();
+ }
+
function execute() {
$this->processOptions();
$this->dump( true );
}
/**
- * Tries to get the revision text for a revision id.
- * Export transformations are applied if the content model can is given or can be
+ * Tries to load revision text.
+ * Export transformations are applied if the content model is given or can be
* determined from the database.
*
* Upon errors, retries (Up to $this->maxFailures tries each call).
- * If still no good revision get could be found even after this retrying, "" is returned.
+ * If still no good revision could be found even after this retrying, "" is returned.
* If no good revision text could be returned for
* $this->maxConsecutiveFailedTextRetrievals consecutive calls to getText, MWException
* is thrown.
*
- * @param string $id The revision id to get the text for
+ * @param int|string $id Content address, or text row ID.
* @param string|bool|null $model The content model used to determine
* applicable export transformations.
* If $model is null, it will be determined from the database.
$consecutiveFailedTextRetrievals = 0;
if ( $model === null && $wgContentHandlerUseDB ) {
+ // TODO: MCR: use content table
$row = $this->db->selectRow(
'revision',
[ 'rev_content_model', 'rev_content_format' ],
}
/**
- * May throw a database error if, say, the server dies during query.
- * @param int $id
+ * Loads the serialized content from storage.
+ *
+ * @param int|string $id Content address, or text row ID.
* @return bool|string
- * @throws MWException
*/
private function getTextDb( $id ) {
- if ( !isset( $this->db ) ) {
- throw new MWException( __METHOD__ . "No database available" );
- }
- $row = $this->db->selectRow( 'text',
- [ 'old_text', 'old_flags' ],
- [ 'old_id' => $id ],
- __METHOD__ );
- $text = Revision::getRevisionText( $row );
- if ( $text === false ) {
+ $store = $this->getBlobStore();
+ $address = ( is_int( $id ) || strpos( $id, ':' ) === false )
+ ? SqlBlobStore::makeAddressFromTextId( (int)$id )
+ : $id;
+
+ try {
+ $text = $store->getBlob( $address );
+
+ $stripped = str_replace( "\r", "", $text );
+ $normalized = MediaWikiServices::getInstance()->getContentLanguage()
+ ->normalize( $stripped );
+
+ return $normalized;
+ } catch ( BlobAccessException $ex ) {
+ // XXX: log a warning?
return false;
}
- $stripped = str_replace( "\r", "", $text );
- $normalized = MediaWikiServices::getInstance()->getContentLanguage()->
- normalize( $stripped );
-
- return $normalized;
}
+ /**
+ * @param int|string $id Content address, or text row ID.
+ * @return bool|string
+ */
private function getTextSpawned( $id ) {
Wikimedia\suppressWarnings();
if ( !$this->spawnProc ) {
Wikimedia\restoreWarnings();
}
+ /**
+ * @param int|string $id Content address, or text row ID.
+ * @return bool|string
+ */
private function getTextSpawnedOnce( $id ) {
$ok = fwrite( $this->spawnWrite, "$id\n" );
// $this->progress( ">> $id" );
// check that the text id they are sending is the one we asked for
// this avoids out of sync revision text errors we have encountered in the past
- $newId = fgets( $this->spawnRead );
- if ( $newId === false ) {
+ $newAddress = fgets( $this->spawnRead );
+ if ( $newAddress === false ) {
return false;
}
+ if ( strpos( $newAddress, ':' ) === false ) {
+ $newId = intval( $newAddress );
+ if ( $newId === false ) {
+ return false;
+ }
+ } else {
+ try {
+ $newAddressFields = SqlBlobStore::splitBlobAddress( $newAddress );
+ $newId = $newAddressFields[ 1 ];
+ } catch ( InvalidArgumentException $ex ) {
+ return false;
+ }
+ }
if ( $id != intval( $newId ) ) {
return false;
}
require_once __DIR__ . '/Maintenance.php';
-use Wikimedia\Rdbms\IDatabase;
+use MediaWiki\MediaWikiServices;
+use MediaWiki\Storage\BlobAccessException;
+use MediaWiki\Storage\SqlBlobStore;
/**
* Maintenance script used to fetch page text in a subprocess.
* @ingroup Maintenance
*/
class FetchText extends Maintenance {
+
public function __construct() {
parent::__construct();
- $this->addDescription( "Fetch the raw revision blob from an old_id.\n" .
+
+ $this->addDescription( "Fetch the raw revision blob from a blob address.\n" .
+ "Integer IDs are interpreted as referring to text.old_id for backwards compatibility.\n" .
"NOTE: Export transformations are NOT applied. " .
- "This is left to backupTextPass.php"
+ "This is left to dumpTextPass.php"
);
}
+ /**
+ * @return SqlBlobStore
+ */
+ private function getBlobStore() {
+ return MediaWikiServices::getInstance()->getBlobStore();
+ }
+
/**
* returns a string containing the following in order:
* textid
* note that the text string itself is *not* followed by newline
*/
public function execute() {
- $db = $this->getDB( DB_REPLICA );
$stdin = $this->getStdin();
while ( !feof( $stdin ) ) {
$line = fgets( $stdin );
// We appear to have lost contact...
break;
}
- $textId = intval( $line );
- $text = $this->doGetText( $db, $textId );
- if ( $text === false ) {
- # actual error, not zero-length text
- $textLen = "-1";
- } else {
+ $blobAddress = trim( $line );
+
+ // Plain integers are supported for backwards compatibility with pre-MCR dumps.
+ if ( strpos( $blobAddress, ':' ) === false && is_numeric( $blobAddress ) ) {
+ $blobAddress = SqlBlobStore::makeAddressFromTextId( intval( $blobAddress ) );
+ }
+
+ try {
+ $text = $this->getBlobStore()->getBlob( $blobAddress );
$textLen = strlen( $text );
+ } catch ( BlobAccessException $ex ) {
+ // XXX: log $ex to stderr?
+ $textLen = '-1';
+ $text = '';
+ } catch ( InvalidArgumentException $ex ) {
+ // XXX: log $ex to stderr?
+ $textLen = '-1';
+ $text = '';
}
- $this->output( $textId . "\n" . $textLen . "\n" . $text );
- }
- }
- /**
- * May throw a database error if, say, the server dies during query.
- * @param IDatabase $db
- * @param int $id The old_id
- * @return string
- */
- private function doGetText( $db, $id ) {
- $id = intval( $id );
- $row = $db->selectRow( 'text',
- [ 'old_text', 'old_flags' ],
- [ 'old_id' => $id ],
- __METHOD__ );
- $text = Revision::getRevisionText( $row );
- if ( $text === false ) {
- return false;
+ $this->output( $blobAddress . "\n" . $textLen . "\n" . $text );
}
-
- return $text;
}
+
}
$maintClass = FetchText::class;
namespace MediaWiki\Tests\Maintenance;
use DumpBackup;
+use MediaWiki\MediaWikiServices;
+use MediaWikiTestCase;
+use MWException;
use Title;
use WikiExporter;
+use Wikimedia\Rdbms\IDatabase;
+use Wikimedia\Rdbms\LoadBalancer;
use WikiPage;
/**
private $revId4_1, $textId4_1;
private $namespace, $talk_namespace;
+ /**
+ * @var LoadBalancer|null
+ */
+ private $streamingLoadBalancer = null;
+
function addDBData() {
// be sure, titles created here using english namespace names
$this->setContentLang( 'en' );
"Page ids increasing without holes" );
}
+ function tearDown() {
+ parent::tearDown();
+
+ if ( isset( $this->streamingLoadBalancer ) ) {
+ $this->streamingLoadBalancer->closeAll();
+ }
+ }
+
+ /**
+ * Returns a new database connection which is separate from the conenctions returned
+ * by the default LoadBalancer instance.
+ *
+ * @return IDatabase
+ */
+ private function newStreamingDBConnection() {
+ // Create a *new* LoadBalancer, so no connections are shared
+ if ( !$this->streamingLoadBalancer ) {
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+
+ $this->streamingLoadBalancer = $lbFactory->newMainLB();
+ }
+
+ $db = $this->streamingLoadBalancer->getConnection( DB_REPLICA );
+
+ // Make sure the DB connection has the fake table clones and the fake table prefix
+ MediaWikiTestCase::setupDatabaseWithTestPrefix( $db );
+
+ // Make sure the DB connection has all the test data
+ $this->copyTestData( $this->db, $db );
+
+ return $db;
+ }
+
+ /**
+ * @param array $argv
+ * @param int $startId
+ * @param int $endId
+ *
+ * @return DumpBackup
+ */
+ private function newDumpBackup( $argv, $startId, $endId ) {
+ $dumper = new DumpBackup( $argv );
+ $dumper->startId = $startId;
+ $dumper->endId = $endId;
+ $dumper->reporting = false;
+
+ // NOTE: The copyTestData() method used by newStreamingDBConnection()
+ // doesn't work with SQLite (T217607).
+ // But DatabaseSqlite doesn't support streaming anyway, so just skip that part.
+ if ( $this->db->getType() === 'sqlite' ) {
+ $dumper->setDB( $this->db );
+ } else {
+ $dumper->setDB( $this->newStreamingDBConnection() );
+ }
+
+ return $dumper;
+ }
+
function testFullTextPlain() {
// Preparing the dump
$fname = $this->getNewTempFile();
- $dumper = new DumpBackup();
- $dumper->loadWithArgv( [ '--full', '--quiet', '--output', 'file:' . $fname ] );
- $dumper->startId = $this->pageId1;
- $dumper->endId = $this->pageId4 + 1;
- $dumper->setDB( $this->db );
+ $dumper = $this->newDumpBackup(
+ [ '--full', '--quiet', '--output', 'file:' . $fname ],
+ $this->pageId1,
+ $this->pageId4 + 1
+ );
// Performing the dump
$dumper->execute();
// Preparing the dump
$fname = $this->getNewTempFile();
- $dumper = new DumpBackup();
- $dumper->loadWithArgv( [ '--full', '--quiet', '--output', 'file:' . $fname, '--stub' ] );
- $dumper->startId = $this->pageId1;
- $dumper->endId = $this->pageId4 + 1;
- $dumper->setDB( $this->db );
+ $dumper = $this->newDumpBackup(
+ [ '--full', '--quiet', '--output', 'file:' . $fname, '--stub' ],
+ $this->pageId1,
+ $this->pageId4 + 1
+ );
// Performing the dump
$dumper->execute();
// Preparing the dump
$fname = $this->getNewTempFile();
- $dumper = new DumpBackup( [ '--output', 'file:' . $fname ] );
- $dumper->startId = $this->pageId1;
- $dumper->endId = $this->pageId4 + 1;
- $dumper->reporting = false;
- $dumper->setDB( $this->db );
+ $dumper = $this->newDumpBackup(
+ [ '--output', 'file:' . $fname ],
+ $this->pageId1,
+ $this->pageId4 + 1
+ );
// Performing the dump
$dumper->dump( WikiExporter::CURRENT, WikiExporter::STUB );
// Preparing the dump
$fname = $this->getNewTempFile();
- $dumper = new DumpBackup( [ '--output', 'gzip:' . $fname ] );
- $dumper->startId = $this->pageId1;
- $dumper->endId = $this->pageId4 + 1;
- $dumper->reporting = false;
- $dumper->setDB( $this->db );
+ $dumper = $this->newDumpBackup(
+ [ '--output', 'gzip:' . $fname ],
+ $this->pageId1,
+ $this->pageId4 + 1
+ );
// Performing the dump
$dumper->dump( WikiExporter::CURRENT, WikiExporter::STUB );
$fnameMetaCurrent = $this->getNewTempFile();
$fnameArticles = $this->getNewTempFile();
- $dumper = new DumpBackup( [ "--full", "--stub", "--output=gzip:" . $fnameMetaHistory,
- "--output=gzip:" . $fnameMetaCurrent, "--filter=latest",
- "--output=gzip:" . $fnameArticles, "--filter=latest",
- "--filter=notalk", "--filter=namespace:!NS_USER",
- "--reporting=1000" ] );
- $dumper->startId = $this->pageId1;
- $dumper->endId = $this->pageId4 + 1;
- $dumper->setDB( $this->db );
+ $dumper = $this->newDumpBackup(
+ [ "--full", "--stub", "--output=gzip:" . $fnameMetaHistory,
+ "--output=gzip:" . $fnameMetaCurrent, "--filter=latest",
+ "--output=gzip:" . $fnameArticles, "--filter=latest",
+ "--filter=notalk", "--filter=namespace:!NS_USER",
+ "--reporting=1000"
+ ],
+ $this->pageId1,
+ $this->pageId4 + 1
+ );
+ $dumper->reporting = true;
// xmldumps-backup uses reporting. We will not check the exact reported
// message, as they are dependent on the processing power of the used
use ContentHandler;
use FetchText;
+use MediaWiki\Storage\RevisionRecord;
use MediaWikiTestCase;
use MWException;
use Title;
private $fetchText;
/**
- * Adds a revision to a page, while returning the resuting text's id
+ * Adds a revision to a page and returns the main slot's blob address
*
* @param WikiPage $page The page to add the revision to
* @param string $text The revisions text
* @param string $summary The revisions summare
- * @return int
+ * @return string
* @throws MWException
*/
private function addRevision( $page, $text, $summary ) {
if ( $status->isGood() ) {
$value = $status->getValue();
- $revision = $value['revision'];
- $id = $revision->getTextId();
- if ( $id > 0 ) {
- return $id;
- }
+ /** @var RevisionRecord $revision */
+ $revision = $value['revision-record'];
+ $address = $revision->getSlot( 'main' )->getAddress();
+ return $address;
}
- throw new MWException( "Could not determine text id" );
+ throw new MWException( "Could not create revision" );
}
function addDBDataOnce() {
self::$textId2 . "\n23\nFetchTextTestPage2Text1" );
}
+ function testExistingInteger() {
+ $this->assertFilter( (int)preg_replace( '/^tt:/', '', self::$textId2 ),
+ self::$textId2 . "\n23\nFetchTextTestPage2Text1" );
+ }
+
function testExistingSeveral() {
$this->assertFilter(
implode( "\n", [
}
function testNonExisting() {
- $this->assertFilter( self::$textId5 + 10, ( self::$textId5 + 10 ) . "\n-1\n" );
+ \Wikimedia\suppressWarnings();
+ $this->assertFilter( 'tt:77889911', 'tt:77889911' . "\n-1\n" );
+ \Wikimedia\suppressWarnings( true );
+ }
+
+ function testNonExistingInteger() {
+ \Wikimedia\suppressWarnings();
+ $this->assertFilter( '77889911', 'tt:77889911' . "\n-1\n" );
+ \Wikimedia\suppressWarnings( true );
+ }
+
+ function testBadBlobAddressWithColon() {
+ $this->assertFilter( 'foo:bar', 'foo:bar' . "\n-1\n" );
}
function testNegativeInteger() {
- $this->assertFilter( "-42", "-42\n-1\n" );
+ $this->assertFilter( "-42", "tt:-42\n-1\n" );
}
function testFloatingPointNumberExisting() {
- // float -> int -> revision
- $this->assertFilter( self::$textId3 + 0.14159,
+ // float -> int -> address -> revision
+ $id = intval( preg_replace( '/^tt:/', '', self::$textId3 ) ) + 0.14159;
+ $this->assertFilter( 'tt:' . intval( $id ),
self::$textId3 . "\n23\nFetchTextTestPage2Text2" );
}
function testFloatingPointNumberNonExisting() {
- $this->assertFilter( self::$textId5 + 3.14159,
- ( self::$textId5 + 3 ) . "\n-1\n" );
+ \Wikimedia\suppressWarnings();
+ $id = intval( preg_replace( '/^tt:/', '', self::$textId5 ) ) + 3.14159;
+ $this->assertFilter( $id, 'tt:' . intval( $id ) . "\n-1\n" );
+ \Wikimedia\suppressWarnings( true );
}
function testCharacters() {
- $this->assertFilter( "abc", "0\n-1\n" );
+ $this->assertFilter( "abc", "abc\n-1\n" );
}
function testMix() {
- $this->assertFilter( "ab\n" . self::$textId4 . ".5cd\n\nefg\n" . self::$textId2
+ $this->assertFilter( "ab\n" . self::$textId4 . ".5cd\n\nefg\nfoo:bar\n" . self::$textId2
. "\n" . self::$textId3,
implode( "", [
- "0\n-1\n",
- self::$textId4 . "\n23\nFetchTextTestPage2Text3",
- "0\n-1\n",
- "0\n-1\n",
+ "ab\n-1\n",
+ self::$textId4 . ".5cd\n-1\n",
+ "\n-1\n",
+ "efg\n-1\n",
+ "foo:bar\n-1\n",
self::$textId2 . "\n23\nFetchTextTestPage2Text1",
self::$textId3 . "\n23\nFetchTextTestPage2Text2"
] ) );