From 1c1c74adef8a2f5597472e8c049010ff4d42ff64 Mon Sep 17 00:00:00 2001 From: Adrian Heine Date: Tue, 7 Jun 2016 15:15:36 +0200 Subject: [PATCH] Map dummy language codes in sites The script that populates the sites table assumes that the subdomain is the content language code. This is not true for all wikis. This patch introduces a mapping to fix this issue on the fly, based on $wgDummyLanguageCodes. This is driven by the need to avoid "bad" language codes when creating wikidata items from the client, when "linking" two pages on different wikis. When we do this, we use the language code from the sites table for the labels of the new item. We would like to forbid "dummy" languages in labels and descriptions, for consistency. Change-Id: I6452761e14d9902bb069e32d0f499bc39e680453 --- includes/ServiceWiring.php | 3 +++ includes/site/DBSiteStore.php | 24 +++++++++++++++---- includes/site/FileBasedSiteLookup.php | 21 +++++++++++++++- .../phpunit/includes/site/DBSiteStoreTest.php | 14 +++++++++++ .../includes/site/FileBasedSiteLookupTest.php | 11 +++++++++ 5 files changed, 68 insertions(+), 5 deletions(-) diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php index 293e6eb176..e53b9ed96e 100644 --- a/includes/ServiceWiring.php +++ b/includes/ServiceWiring.php @@ -59,6 +59,9 @@ return [ 'SiteStore' => function( MediaWikiServices $services ) { $rawSiteStore = new DBSiteStore( $services->getDBLoadBalancer() ); + $rawSiteStore->setLanguageCodeMapping( + $services->getMainConfig()->get( 'DummyLanguageCodes' ) ?: [] + ); // TODO: replace wfGetCache with a CacheFactory service. // TODO: replace wfIsHHVM with a capabilities service. diff --git a/includes/site/DBSiteStore.php b/includes/site/DBSiteStore.php index 974789f94f..c1c10c2136 100644 --- a/includes/site/DBSiteStore.php +++ b/includes/site/DBSiteStore.php @@ -40,6 +40,11 @@ class DBSiteStore implements SiteStore { */ private $dbLoadBalancer; + /** + * @var string[] + */ + private $languageCodeMapping = []; + /** * @since 1.27 * @@ -96,15 +101,17 @@ class DBSiteStore implements SiteStore { ); foreach ( $res as $row ) { + $languageCode = $row->site_language === '' ? null : $row->site_language; + if ( isset( $this->languageCodeMapping[ $languageCode ] ) ) { + $languageCode = $this->languageCodeMapping[ $languageCode ]; + } + $site = Site::newForType( $row->site_type ); $site->setGlobalId( $row->site_global_key ); $site->setInternalId( (int)$row->site_id ); $site->setForward( (bool)$row->site_forward ); $site->setGroup( $row->site_group ); - $site->setLanguageCode( $row->site_language === '' - ? null - : $row->site_language - ); + $site->setLanguageCode( $languageCode ); $site->setSource( $row->site_source ); $site->setExtraData( unserialize( $row->site_data ) ); $site->setExtraConfig( unserialize( $row->site_config ) ); @@ -287,4 +294,13 @@ class DBSiteStore implements SiteStore { return $ok; } + /** + * Provide an array that maps language codes + * + * @param string[] $newMapping + */ + public function setLanguageCodeMapping( array $newMapping ) { + $this->languageCodeMapping = $newMapping; + } + } diff --git a/includes/site/FileBasedSiteLookup.php b/includes/site/FileBasedSiteLookup.php index 965444038f..424d8e69b3 100644 --- a/includes/site/FileBasedSiteLookup.php +++ b/includes/site/FileBasedSiteLookup.php @@ -42,6 +42,11 @@ class FileBasedSiteLookup implements SiteLookup { */ private $cacheFile; + /** + * @var string[] + */ + private $languageCodeMapping = []; + /** * @param string $cacheFile */ @@ -118,13 +123,18 @@ class FileBasedSiteLookup implements SiteLookup { * @return Site */ private function newSiteFromArray( array $data ) { + $languageCode = $data['language']; + if ( isset( $this->languageCodeMapping[ $languageCode ] ) ) { + $languageCode = $this->languageCodeMapping[ $languageCode ]; + } + $siteType = array_key_exists( 'type', $data ) ? $data['type'] : Site::TYPE_UNKNOWN; $site = Site::newForType( $siteType ); $site->setGlobalId( $data['globalid'] ); $site->setForward( $data['forward'] ); $site->setGroup( $data['group'] ); - $site->setLanguageCode( $data['language'] ); + $site->setLanguageCode( $languageCode ); $site->setSource( $data['source'] ); $site->setExtraData( $data['data'] ); $site->setExtraConfig( $data['config'] ); @@ -136,4 +146,13 @@ class FileBasedSiteLookup implements SiteLookup { return $site; } + /** + * Provide an array that maps language codes + * + * @param string[] $newMapping + */ + public function setLanguageCodeMapping( array $newMapping ) { + $this->languageCodeMapping = $newMapping; + } + } diff --git a/tests/phpunit/includes/site/DBSiteStoreTest.php b/tests/phpunit/includes/site/DBSiteStoreTest.php index 32dd7f282c..316fd89076 100644 --- a/tests/phpunit/includes/site/DBSiteStoreTest.php +++ b/tests/phpunit/includes/site/DBSiteStoreTest.php @@ -67,6 +67,20 @@ class DBSiteStoreTest extends MediaWikiTestCase { } } + /** + * @covers DBSiteStore::getSites + * @covers DBSiteStore::setLanguageCodeMapping + */ + public function testLanguageCodeMapping() { + TestSites::insertIntoDb(); + + $store = $this->newDBSiteStore(); + $store->setLanguageCodeMapping( [ 'no' => 'nb' ] ); + + $site = $store->getSite( 'nowiki' ); + $this->assertEquals( $site->getLanguageCode(), 'nb' ); + } + /** * @covers DBSiteStore::saveSites */ diff --git a/tests/phpunit/includes/site/FileBasedSiteLookupTest.php b/tests/phpunit/includes/site/FileBasedSiteLookupTest.php index 7984795b99..bebda79c68 100644 --- a/tests/phpunit/includes/site/FileBasedSiteLookupTest.php +++ b/tests/phpunit/includes/site/FileBasedSiteLookupTest.php @@ -98,4 +98,15 @@ class FileBasedSiteLookupTest extends PHPUnit_Framework_TestCase { return tempnam( sys_get_temp_dir(), 'mw-test-sitelist' ); } + public function testLanguageCodeMapping() { + $sites = $this->getSites(); + $cacheBuilder = $this->newSitesCacheFileBuilder( $sites ); + $cacheBuilder->build(); + + $cache = new FileBasedSiteLookup( $this->cacheFile ); + $cache->setLanguageCodeMapping( [ 'en' => 'fa' ] ); + + $this->assertEquals( $cache->getSite( 'enwiktionary' )->getLanguageCode(), 'fa' ); + } + } -- 2.20.1