From: addshore Date: Tue, 16 Jan 2018 13:53:22 +0000 (+0000) Subject: [MCR] NameTableStore X-Git-Tag: 1.31.0-rc.0~459 X-Git-Url: http://git.cyclocoop.org/?a=commitdiff_plain;h=4d3549ad71a9a7b95569e962c12ee7cb05bbb473;p=lhc%2Fweb%2Fwiklou.git [MCR] NameTableStore General purpose cached store for things like: - content_models (id,name) - slot_roles (id,name) And in the future possibly namespaces & content_formats as mentioned at: https://www.mediawiki.org/wiki/Multi-Content_Revisions/Schema_Migration#Name_tables Bug: T188518 Change-Id: Ia550ef7fe30af25ac3fee5ac8a89d032544563bf --- diff --git a/autoload.php b/autoload.php index 1aa8dc21ee..b5f3e4a067 100644 --- a/autoload.php +++ b/autoload.php @@ -953,6 +953,8 @@ $wgAutoloadLocalClasses = [ 'MediaWiki\\Storage\\IncompleteRevisionException' => __DIR__ . '/includes/Storage/IncompleteRevisionException.php', 'MediaWiki\\Storage\\MutableRevisionRecord' => __DIR__ . '/includes/Storage/MutableRevisionRecord.php', 'MediaWiki\\Storage\\MutableRevisionSlots' => __DIR__ . '/includes/Storage/MutableRevisionSlots.php', + 'MediaWiki\\Storage\\NameTableAccessException' => __DIR__ . '/includes/Storage/NameTableAccessException.php', + 'MediaWiki\\Storage\\NameTableStore' => __DIR__ . '/includes/Storage/NameTableStore.php', 'MediaWiki\\Storage\\RevisionAccessException' => __DIR__ . '/includes/Storage/RevisionAccessException.php', 'MediaWiki\\Storage\\RevisionArchiveRecord' => __DIR__ . '/includes/Storage/RevisionArchiveRecord.php', 'MediaWiki\\Storage\\RevisionFactory' => __DIR__ . '/includes/Storage/RevisionFactory.php', diff --git a/includes/MediaWikiServices.php b/includes/MediaWikiServices.php index 3c8ce65bdd..8bb0a400d8 100644 --- a/includes/MediaWikiServices.php +++ b/includes/MediaWikiServices.php @@ -16,6 +16,7 @@ use MediaWiki\Preferences\PreferencesFactory; use MediaWiki\Shell\CommandFactory; use MediaWiki\Storage\BlobStore; use MediaWiki\Storage\BlobStoreFactory; +use MediaWiki\Storage\NameTableStore; use MediaWiki\Storage\RevisionFactory; use MediaWiki\Storage\RevisionLookup; use MediaWiki\Storage\RevisionStore; @@ -770,6 +771,22 @@ class MediaWikiServices extends ServiceContainer { return $this->getService( 'RevisionFactory' ); } + /** + * @since 1.31 + * @return NameTableStore + */ + public function getContentModelStore() { + return $this->getService( 'ContentModelStore' ); + } + + /** + * @since 1.31 + * @return NameTableStore + */ + public function getSlotRoleStore() { + return $this->getService( 'SlotRoleStore' ); + } + /** * @since 1.31 * @return PreferencesFactory diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php index 3e3c8971d7..08d343b1b2 100644 --- a/includes/ServiceWiring.php +++ b/includes/ServiceWiring.php @@ -45,6 +45,7 @@ use MediaWiki\MediaWikiServices; use MediaWiki\Preferences\DefaultPreferencesFactory; use MediaWiki\Shell\CommandFactory; use MediaWiki\Storage\BlobStoreFactory; +use MediaWiki\Storage\NameTableStore; use MediaWiki\Storage\RevisionStore; use MediaWiki\Storage\SqlBlobStore; use Wikimedia\ObjectFactory; @@ -539,6 +540,35 @@ return [ return $services->getBlobStoreFactory()->newSqlBlobStore(); }, + 'ContentModelStore' => function ( MediaWikiServices $services ) { + return new NameTableStore( + $services->getDBLoadBalancer(), + $services->getMainWANObjectCache(), + LoggerFactory::getInstance( 'NameTableSqlStore' ), + 'content_models', + 'model_id', + 'model_name' + /** + * No strtolower normalization is added to the service as there are examples of + * extensions that do not stick to this assumption. + * - extensions/examples/DataPages define( 'CONTENT_MODEL_XML_DATA','XML_DATA' ); + * - extensions/Scribunto define( 'CONTENT_MODEL_SCRIBUNTO', 'Scribunto' ); + */ + ); + }, + + 'SlotRoleStore' => function ( MediaWikiServices $services ) { + return new NameTableStore( + $services->getDBLoadBalancer(), + $services->getMainWANObjectCache(), + LoggerFactory::getInstance( 'NameTableSqlStore' ), + 'slot_roles', + 'role_id', + 'role_name', + 'strtolower' + ); + }, + 'PreferencesFactory' => function ( MediaWikiServices $services ) { global $wgContLang; $authManager = AuthManager::singleton(); diff --git a/includes/Storage/NameTableAccessException.php b/includes/Storage/NameTableAccessException.php new file mode 100644 index 0000000000..393cb1fa7e --- /dev/null +++ b/includes/Storage/NameTableAccessException.php @@ -0,0 +1,45 @@ +loadBalancer = $dbLoadBalancer; + $this->cache = $cache; + $this->logger = $logger; + $this->table = $table; + $this->idField = $idField; + $this->nameField = $nameField; + $this->normalizationCallback = $normalizationCallback; + $this->wikiId = $wikiId; + $this->cacheTTL = IExpiringStore::TTL_MONTH; + } + + /** + * @param int $index A database index, like DB_MASTER or DB_REPLICA + * @param int $flags Database connection flags + * + * @return IDatabase + */ + private function getDBConnection( $index, $flags = 0 ) { + return $this->loadBalancer->getConnection( $index, [], $this->wikiId, $flags ); + } + + private function getCacheKey() { + return $this->cache->makeKey( 'NameTableSqlStore', $this->table, $this->wikiId ); + } + + /** + * @param string $name + * @return string + */ + private function normalizeName( $name ) { + if ( $this->normalizationCallback === null ) { + return $name; + } + return call_user_func( $this->normalizationCallback, $name ); + } + + /** + * Acquire the id of the given name. + * This creates a row in the table if it doesn't already exist. + * + * @param string $name + * @throws NameTableAccessException + * @return int + */ + public function acquireId( $name ) { + Assert::parameterType( 'string', $name, '$name' ); + $name = $this->normalizeName( $name ); + + $table = $this->getTableFromCachesOrReplica(); + $searchResult = array_search( $name, $table, true ); + if ( $searchResult === false ) { + $id = $this->store( $name ); + if ( $id === null ) { + // RACE: $name was already in the db, probably just inserted, so load from master + // Use DBO_TRX to avoid missing inserts due to other threads or REPEATABLE-READs + $table = $this->loadTable( + $this->getDBConnection( DB_MASTER, LoadBalancer::CONN_TRX_AUTO ) + ); + $searchResult = array_search( $name, $table, true ); + if ( $searchResult === false ) { + // Insert failed due to IGNORE flag, but DB_MASTER didn't give us the data + $m = "No insert possible but master didn't give us a record for " . + "'{$name}' in '{$this->table}'"; + $this->logger->error( $m ); + throw new NameTableAccessException( $m ); + } + $this->purgeWANCache( + function () { + $this->cache->reap( $this->getCacheKey(), INF ); + } + ); + } else { + $table[$id] = $name; + $searchResult = $id; + // As store returned an ID we know we inserted so delete from WAN cache + $this->purgeWANCache( + function () { + $this->cache->delete( $this->getCacheKey() ); + } + ); + } + $this->tableCache = $table; + } + + return $searchResult; + } + + /** + * Get the id of the given name. + * If the name doesn't exist this will throw. + * This should be used in cases where we believe the name already exists or want to check for + * existence. + * + * @param string $name + * @throws NameTableAccessException The name does not exist + * @return int Id + */ + public function getId( $name ) { + Assert::parameterType( 'string', $name, '$name' ); + $name = $this->normalizeName( $name ); + + $table = $this->getTableFromCachesOrReplica(); + $searchResult = array_search( $name, $table, true ); + + if ( $searchResult !== false ) { + return $searchResult; + } + + throw NameTableAccessException::newFromDetails( $this->table, 'name', $name ); + } + + /** + * Get the name of the given id. + * If the id doesn't exist this will throw. + * This should be used in cases where we believe the id already exists. + * + * Note: Calls to this method will result in a master select for non existing IDs. + * + * @param int $id + * @throws NameTableAccessException The id does not exist + * @return string name + */ + public function getName( $id ) { + Assert::parameterType( 'integer', $id, '$id' ); + + $table = $this->getTableFromCachesOrReplica(); + if ( array_key_exists( $id, $table ) ) { + return $table[$id]; + } + + $table = $this->cache->getWithSetCallback( + $this->getCacheKey(), + $this->cacheTTL, + function ( $oldValue, &$ttl, &$setOpts ) use ( $id ) { + // Check if cached value is up-to-date enough to have $id + if ( is_array( $oldValue ) && array_key_exists( $id, $oldValue ) ) { + // Completely leave the cache key alone + $ttl = WANObjectCache::TTL_UNCACHEABLE; + // Use the old value + return $oldValue; + } + // Regenerate from replica DB, and master DB if needed + foreach ( [ DB_REPLICA, DB_MASTER ] as $source ) { + // Log a fallback to master + if ( $source === DB_MASTER ) { + $this->logger->info( + __METHOD__ . 'falling back to master select from ' . + $this->table . ' with id ' . $id + ); + } + $db = $this->getDBConnection( $source ); + $cacheSetOpts = Database::getCacheSetOptions( $db ); + $table = $this->loadTable( $db ); + if ( array_key_exists( $id, $table ) ) { + break; // found it + } + } + // Use the value from last source checked + $setOpts += $cacheSetOpts; + + return $table; + }, + [ 'minAsOf' => INF ] // force callback run + ); + + $this->tableCache = $table; + + if ( array_key_exists( $id, $table ) ) { + return $table[$id]; + } + + throw NameTableAccessException::newFromDetails( $this->table, 'id', $id ); + } + + /** + * Get the whole table, in no particular order as a map of ids to names. + * This method could be subject to DB or cache lag. + * + * @return string[] keys are the name ids, values are the names themselves + * Example: [ 1 => 'foo', 3 => 'bar' ] + */ + public function getMap() { + return $this->getTableFromCachesOrReplica(); + } + + /** + * @return string[] + */ + private function getTableFromCachesOrReplica() { + if ( $this->tableCache !== null ) { + return $this->tableCache; + } + + $table = $this->cache->getWithSetCallback( + $this->getCacheKey(), + $this->cacheTTL, + function ( $oldValue, &$ttl, &$setOpts ) { + $dbr = $this->getDBConnection( DB_REPLICA ); + $setOpts += Database::getCacheSetOptions( $dbr ); + return $this->loadTable( $dbr ); + } + ); + + $this->tableCache = $table; + + return $table; + } + + /** + * Reap the WANCache entry for this table. + * + * @param callable $purgeCallback callback to 'purge' the WAN cache + */ + private function purgeWANCache( $purgeCallback ) { + // If the LB has no DB changes don't both with onTransactionPreCommitOrIdle + if ( !$this->loadBalancer->hasOrMadeRecentMasterChanges() ) { + $purgeCallback(); + return; + } + + $this->getDBConnection( DB_MASTER ) + ->onTransactionPreCommitOrIdle( $purgeCallback, __METHOD__ ); + } + + /** + * Gets the table from the db + * + * @param IDatabase $db + * + * @return string[] + */ + private function loadTable( IDatabase $db ) { + $result = $db->select( + $this->table, + [ + 'id' => $this->idField, + 'name' => $this->nameField + ], + [], + __METHOD__ + ); + + $assocArray = []; + foreach ( $result as $row ) { + $assocArray[$row->id] = $row->name; + } + + return $assocArray; + } + + /** + * Stores the given name in the DB, returning the ID when an insert occurs. + * + * @param string $name + * @return int|null int if we know the ID, null if we don't + */ + private function store( $name ) { + Assert::parameterType( 'string', $name, '$name' ); + Assert::parameter( $name !== '', '$name', 'should not be an empty string' ); + // Note: this is only called internally so normalization of $name has already occurred. + + $dbw = $this->getDBConnection( DB_MASTER ); + + $dbw->insert( + $this->table, + [ $this->nameField => $name ], + __METHOD__, + [ 'IGNORE' ] + ); + + if ( $dbw->affectedRows() === 0 ) { + $this->logger->info( + 'Tried to insert name into table ' . $this->table . ', but value already existed.' + ); + return null; + } + + return $dbw->insertId(); + } + +} diff --git a/tests/phpunit/includes/Storage/NameTableStoreTest.php b/tests/phpunit/includes/Storage/NameTableStoreTest.php new file mode 100644 index 0000000000..5276a140f6 --- /dev/null +++ b/tests/phpunit/includes/Storage/NameTableStoreTest.php @@ -0,0 +1,298 @@ +tablesUsed[] = 'slot_roles'; + parent::setUp(); + } + + private function populateTable( $values ) { + $insertValues = []; + foreach ( $values as $name ) { + $insertValues[] = [ 'role_name' => $name ]; + } + $this->db->insert( 'slot_roles', $insertValues ); + } + + private function getHashWANObjectCache( $cacheBag ) { + return new WANObjectCache( [ 'cache' => $cacheBag ] ); + } + + /** + * @param $db + * @return \PHPUnit_Framework_MockObject_MockObject|LoadBalancer + */ + private function getMockLoadBalancer( $db ) { + $mock = $this->getMockBuilder( LoadBalancer::class ) + ->disableOriginalConstructor() + ->getMock(); + $mock->expects( $this->any() ) + ->method( 'getConnection' ) + ->willReturn( $db ); + return $mock; + } + + private function getCallCheckingDb( $insertCalls, $selectCalls ) { + $mock = $this->getMockBuilder( Database::class ) + ->disableOriginalConstructor() + ->getMock(); + $mock->expects( $this->exactly( $insertCalls ) ) + ->method( 'insert' ) + ->willReturnCallback( function () { + return call_user_func_array( [ $this->db, 'insert' ], func_get_args() ); + } ); + $mock->expects( $this->exactly( $selectCalls ) ) + ->method( 'select' ) + ->willReturnCallback( function () { + return call_user_func_array( [ $this->db, 'select' ], func_get_args() ); + } ); + $mock->expects( $this->exactly( $insertCalls ) ) + ->method( 'affectedRows' ) + ->willReturnCallback( function () { + return call_user_func_array( [ $this->db, 'affectedRows' ], func_get_args() ); + } ); + $mock->expects( $this->any() ) + ->method( 'insertId' ) + ->willReturnCallback( function () { + return call_user_func_array( [ $this->db, 'insertId' ], func_get_args() ); + } ); + return $mock; + } + + private function getNameTableSqlStore( + BagOStuff $cacheBag, + $insertCalls, + $selectCalls, + $normalizationCallback = null + ) { + return new NameTableStore( + $this->getMockLoadBalancer( $this->getCallCheckingDb( $insertCalls, $selectCalls ) ), + $this->getHashWANObjectCache( $cacheBag ), + new NullLogger(), + 'slot_roles', 'role_id', 'role_name', + $normalizationCallback + ); + } + + public function provideGetAndAcquireId() { + return [ + 'no wancache, empty table' => + [ new EmptyBagOStuff(), true, 1, [], 'foo', 1 ], + 'no wancache, one matching value' => + [ new EmptyBagOStuff(), false, 1, [ 'foo' ], 'foo', 1 ], + 'no wancache, one not matching value' => + [ new EmptyBagOStuff(), true, 1, [ 'bar' ], 'foo', 2 ], + 'no wancache, multiple, one matching value' => + [ new EmptyBagOStuff(), false, 1, [ 'foo', 'bar' ], 'bar', 2 ], + 'no wancache, multiple, no matching value' => + [ new EmptyBagOStuff(), true, 1, [ 'foo', 'bar' ], 'baz', 3 ], + 'wancache, empty table' => + [ new HashBagOStuff(), true, 1, [], 'foo', 1 ], + 'wancache, one matching value' => + [ new HashBagOStuff(), false, 1, [ 'foo' ], 'foo', 1 ], + 'wancache, one not matching value' => + [ new HashBagOStuff(), true, 1, [ 'bar' ], 'foo', 2 ], + 'wancache, multiple, one matching value' => + [ new HashBagOStuff(), false, 1, [ 'foo', 'bar' ], 'bar', 2 ], + 'wancache, multiple, no matching value' => + [ new HashBagOStuff(), true, 1, [ 'foo', 'bar' ], 'baz', 3 ], + ]; + } + + /** + * @dataProvider provideGetAndAcquireId + * @param BagOStuff $cacheBag to use in the WANObjectCache service + * @param bool $needsInsert Does the value we are testing need to be inserted? + * @param int $selectCalls Number of times the select DB method will be called + * @param string[] $existingValues to be added to the db table + * @param string $name name to acquire + * @param int $expectedId the id we expect the name to have + */ + public function testGetAndAcquireId( + $cacheBag, + $needsInsert, + $selectCalls, + $existingValues, + $name, + $expectedId + ) { + $this->populateTable( $existingValues ); + $store = $this->getNameTableSqlStore( $cacheBag, (int)$needsInsert, $selectCalls ); + + // Some names will not initially exist + try { + $result = $store->getId( $name ); + $this->assertSame( $expectedId, $result ); + } catch ( NameTableAccessException $e ) { + if ( $needsInsert ) { + $this->assertTrue( true ); // Expected exception + } else { + $this->fail( 'Did not expect an exception, but got one: ' . $e->getMessage() ); + } + } + + // All names should return their id here + $this->assertSame( $expectedId, $store->acquireId( $name ) ); + + // acquireId inserted these names, so now everything should exist with getId + $this->assertSame( $expectedId, $store->getId( $name ) ); + + // calling getId again will also still work, and not result in more selects + $this->assertSame( $expectedId, $store->getId( $name ) ); + } + + public function provideTestGetAndAcquireIdNameNormalization() { + yield [ 'A', 'a', 'strtolower' ]; + yield [ 'b', 'B', 'strtoupper' ]; + yield [ + 'X', + 'X', + function ( $name ) { + return $name; + } + ]; + yield [ 'ZZ', 'ZZ-a', __CLASS__ . '::appendDashAToString' ]; + } + + public static function appendDashAToString( $string ) { + return $string . '-a'; + } + + /** + * @dataProvider provideTestGetAndAcquireIdNameNormalization + */ + public function testGetAndAcquireIdNameNormalization( + $nameIn, + $nameOut, + $normalizationCallback + ) { + $store = $this->getNameTableSqlStore( + new EmptyBagOStuff(), + 1, + 1, + $normalizationCallback + ); + $acquiredId = $store->acquireId( $nameIn ); + $this->assertSame( $nameOut, $store->getName( $acquiredId ) ); + } + + public function provideGetName() { + return [ + [ new HashBagOStuff(), 3, 3 ], + [ new EmptyBagOStuff(), 3, 3 ], + ]; + } + + /** + * @dataProvider provideGetName + */ + public function testGetName( $cacheBag, $insertCalls, $selectCalls ) { + $store = $this->getNameTableSqlStore( $cacheBag, $insertCalls, $selectCalls ); + + // Get 1 ID and make sure getName returns correctly + $fooId = $store->acquireId( 'foo' ); + $this->assertSame( 'foo', $store->getName( $fooId ) ); + + // Get another ID and make sure getName returns correctly + $barId = $store->acquireId( 'bar' ); + $this->assertSame( 'bar', $store->getName( $barId ) ); + + // Blitz the cache and make sure it still returns + TestingAccessWrapper::newFromObject( $store )->tableCache = null; + $this->assertSame( 'foo', $store->getName( $fooId ) ); + $this->assertSame( 'bar', $store->getName( $barId ) ); + + // Blitz the cache again and get another ID and make sure getName returns correctly + TestingAccessWrapper::newFromObject( $store )->tableCache = null; + $bazId = $store->acquireId( 'baz' ); + $this->assertSame( 'baz', $store->getName( $bazId ) ); + $this->assertSame( 'baz', $store->getName( $bazId ) ); + } + + public function testGetName_masterFallback() { + $store = $this->getNameTableSqlStore( new EmptyBagOStuff(), 1, 2 ); + + // Insert a new name + $fooId = $store->acquireId( 'foo' ); + + // Empty the process cache, getCachedTable() will now return this empty array + TestingAccessWrapper::newFromObject( $store )->tableCache = []; + + // getName should fallback to master, which is why we assert 2 selectCalls above + $this->assertSame( 'foo', $store->getName( $fooId ) ); + } + + public function testGetMap_empty() { + $this->populateTable( [] ); + $store = $this->getNameTableSqlStore( new HashBagOStuff(), 0, 1 ); + $table = $store->getMap(); + $this->assertSame( [], $table ); + } + + public function testGetMap_twoValues() { + $this->populateTable( [ 'foo', 'bar' ] ); + $store = $this->getNameTableSqlStore( new HashBagOStuff(), 0, 1 ); + + // We are using a cache, so 2 calls should only result in 1 select on the db + $store->getMap(); + $table = $store->getMap(); + + $expected = [ 2 => 'bar', 1 => 'foo' ]; + $this->assertSame( $expected, $table ); + // Make sure the table returned is the same as the cached table + $this->assertSame( $expected, TestingAccessWrapper::newFromObject( $store )->tableCache ); + } + + public function testCacheRaceCondition() { + $wanHashBag = new HashBagOStuff(); + $store1 = $this->getNameTableSqlStore( $wanHashBag, 1, 1 ); + $store2 = $this->getNameTableSqlStore( $wanHashBag, 1, 0 ); + $store3 = $this->getNameTableSqlStore( $wanHashBag, 1, 1 ); + + // Cache the current table in the instances we will use + // This simulates multiple requests running simultaneously + $store1->getMap(); + $store2->getMap(); + $store3->getMap(); + + // Store 2 separate names using different instances + $fooId = $store1->acquireId( 'foo' ); + $barId = $store2->acquireId( 'bar' ); + + // Each of these instances should be aware of what they have inserted + $this->assertSame( $fooId, $store1->acquireId( 'foo' ) ); + $this->assertSame( $barId, $store2->acquireId( 'bar' ) ); + + // A new store should be able to get both of these new Ids + // Note: before there was a race condition here where acquireId( 'bar' ) would update the + // cache with data missing the 'foo' key that it was not aware of + $store4 = $this->getNameTableSqlStore( $wanHashBag, 0, 1 ); + $this->assertSame( $fooId, $store4->getId( 'foo' ) ); + $this->assertSame( $barId, $store4->getId( 'bar' ) ); + + // If a store with old cached data tries to acquire these we will get the same ids. + $this->assertSame( $fooId, $store3->acquireId( 'foo' ) ); + $this->assertSame( $barId, $store3->acquireId( 'bar' ) ); + } + +}