Implement SiteListFileCache and rebuild script
authoraude <aude.wiki@gmail.com>
Fri, 21 Nov 2014 00:24:39 +0000 (19:24 -0500)
committerTim Starling <tstarling@wikimedia.org>
Thu, 27 Nov 2014 22:32:25 +0000 (22:32 +0000)
Provides file-based cache of the SitesStore data,
using a static json file dump of the data from the
SiteSQLStore.

Includes a maintenance script to rebuild the sites cache.

Bug: 56602
Bug: 45532
Change-Id: Iaee4c1f9fb5d54efe01975f733ebd5c339ac106f

autoload.php
includes/DefaultSettings.php
includes/site/SiteListFileCache.php [new file with mode: 0644]
includes/site/SiteListFileCacheBuilder.php [new file with mode: 0644]
maintenance/rebuildSitesCache.php [new file with mode: 0644]
tests/phpunit/includes/site/SiteListFileCacheBuilderTest.php [new file with mode: 0644]
tests/phpunit/includes/site/SiteListFileCacheTest.php [new file with mode: 0644]

index 472d17e..58e62b9 100644 (file)
@@ -917,6 +917,7 @@ $wgAutoloadLocalClasses = array(
        'RebuildLocalisationCache' => __DIR__ . '/maintenance/rebuildLocalisationCache.php',
        'RebuildMessages' => __DIR__ . '/maintenance/rebuildmessages.php',
        'RebuildRecentchanges' => __DIR__ . '/maintenance/rebuildrecentchanges.php',
+       'RebuildSitesCache' => __DIR__ . '/maintenance/rebuildSitesCache.php',
        'RebuildTextIndex' => __DIR__ . '/maintenance/rebuildtextindex.php',
        'RecentChange' => __DIR__ . '/includes/changes/RecentChange.php',
        'RecompressTracked' => __DIR__ . '/maintenance/storage/recompressTracked.php',
@@ -1023,6 +1024,8 @@ $wgAutoloadLocalClasses = array(
        'SiteArray' => __DIR__ . '/includes/site/SiteList.php',
        'SiteConfiguration' => __DIR__ . '/includes/SiteConfiguration.php',
        'SiteList' => __DIR__ . '/includes/site/SiteList.php',
+       'SiteListFileCache' => __DIR__ . '/includes/site/SiteListFileCache.php',
+       'SiteListFileCacheBuilder' => __DIR__ . '/includes/site/SiteListFileCacheBuilder.php',
        'SiteObject' => __DIR__ . '/includes/site/Site.php',
        'SiteSQLStore' => __DIR__ . '/includes/site/SiteSQLStore.php',
        'SiteStats' => __DIR__ . '/includes/SiteStats.php',
index 85f25c2..7523193 100644 (file)
@@ -3754,6 +3754,18 @@ $wgInterwikiFallbackSite = 'wiki';
 
 /** @} */ # end of Interwiki caching settings.
 
+/**
+ * @name SiteStore caching settings.
+ * @{
+ */
+
+/**
+ * Specify the file location for the SiteStore json cache file.
+ */
+$wgSitesCacheFile = false;
+
+/** @} */ # end of SiteStore caching settings.
+
 /**
  * If local interwikis are set up which allow redirects,
  * set this regexp to restrict URLs which will be displayed
diff --git a/includes/site/SiteListFileCache.php b/includes/site/SiteListFileCache.php
new file mode 100644 (file)
index 0000000..c0ecab1
--- /dev/null
@@ -0,0 +1,126 @@
+<?php
+
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 1.25
+ *
+ * @file
+ *
+ * @license GNU GPL v2+
+ */
+class SiteListFileCache {
+
+       /**
+        * @var SiteList
+        */
+       private $sites = null;
+
+       /**
+        * @var string
+        */
+       private $cacheFile;
+
+       /**
+        * @param string $cacheFile
+        */
+       public function __construct( $cacheFile ) {
+               $this->cacheFile = $cacheFile;
+       }
+
+       /**
+        * @since 1.25
+        *
+        * @return SiteList
+        */
+       public function getSites() {
+               if ( $this->sites === null ) {
+                       $this->sites = $this->loadSitesFromCache();
+               }
+
+               return $this->sites;
+       }
+
+       /**
+        * @since 1.25
+        */
+       public function getSite( $globalId ) {
+               $sites = $this->getSites();
+
+               return $sites->hasSite( $globalId ) ? $sites->getSite( $globalId ) : null;
+       }
+
+       /**
+        * @return SiteList
+        */
+       private function loadSitesFromCache() {
+               $data = $this->loadJsonFile();
+
+               $sites = new SiteList();
+
+               // @todo lazy initialize the site objects in the site list (e.g. only when needed to access)
+               foreach( $data['sites'] as $siteArray ) {
+                       $sites[] = $this->newSiteFromArray( $siteArray );
+               }
+
+               return $sites;
+       }
+
+       /**
+        * @throws MWException
+        * @return array
+        */
+       private function loadJsonFile() {
+               if ( !is_readable( $this->cacheFile ) ) {
+                       throw new MWException( 'SiteList cache file not found.' );
+               }
+
+               $contents = file_get_contents( $this->cacheFile );
+               $data = json_decode( $contents, true );
+
+               if ( !is_array( $data ) || !array_key_exists( 'sites', $data ) ) {
+                       throw new MWException( 'SiteStore json cache data is invalid.' );
+               }
+
+               return $data;
+       }
+
+       /**
+        * @param array $data
+        *
+        * @return Site
+        */
+       private function newSiteFromArray( array $data ) {
+               $siteType = array_key_exists( 'type', $data ) ? $data['type'] : Site::TYPE_UNKNOWN;
+               $site = Site::newForType( $siteType );
+
+               $site->setGlobalId( $data['globalid'] );
+               $site->setInternalId( $data['internalid'] );
+               $site->setForward( $data['forward'] );
+               $site->setGroup( $data['group'] );
+               $site->setLanguageCode( $data['language'] );
+               $site->setSource( $data['source'] );
+               $site->setExtraData( $data['data'] );
+               $site->setExtraConfig( $data['config'] );
+
+               foreach( $data['identifiers'] as $identifier ) {
+                       $site->addLocalId( $identifier['type'], $identifier['key'] );
+               }
+
+               return $site;
+       }
+
+}
diff --git a/includes/site/SiteListFileCacheBuilder.php b/includes/site/SiteListFileCacheBuilder.php
new file mode 100644 (file)
index 0000000..0a71aae
--- /dev/null
@@ -0,0 +1,113 @@
+<?php
+
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 1.25
+ *
+ * @file
+ *
+ * @license GNU GPL v2+
+ */
+class SiteListFileCacheBuilder {
+
+       /**
+        * @var SiteStore
+        */
+       private $siteStore;
+
+       /**
+        * @var string
+        */
+       private $cacheFile;
+
+       /**
+        * @param SiteStore $siteStore
+        * @param string $cacheFile
+        */
+       public function __construct( SiteStore $siteStore, $cacheFile ) {
+               $this->siteStore = $siteStore;
+               $this->cacheFile = $cacheFile;
+       }
+
+       public function build() {
+               $this->sites = $this->siteStore->getSites( 'recache' );
+               $this->cacheSites( $this->sites->getArrayCopy() );
+       }
+
+       /**
+        * @param Site[] $sites
+        *
+        * @throws MWException if in manualRecache mode
+        * @return bool
+        */
+       private function cacheSites( array $sites ) {
+               $sitesArray = array();
+
+               foreach ( $sites as $site ) {
+                       $globalId = $site->getGlobalId();
+                       $sitesArray[$globalId] = $this->getSiteAsArray( $site );
+               }
+
+               $json = json_encode( array(
+                       'sites' => $sitesArray
+               ) );
+
+               $result = file_put_contents( $this->cacheFile, $json );
+
+               return $result !== false;
+       }
+
+       /**
+        * @param Site $site
+        *
+        * @return array
+        */
+       private function getSiteAsArray( Site $site ) {
+               $siteEntry = unserialize( $site->serialize() );
+               $siteIdentifiers = $this->buildLocalIdentifiers( $site );
+               $identifiersArray = array();
+
+               foreach( $siteIdentifiers as $identifier ) {
+                       $identifiersArray[] = $identifier;
+               }
+
+               $siteEntry['identifiers'] = $identifiersArray;
+
+               return $siteEntry;
+       }
+
+       /**
+        * @param Site $site
+        *
+        * @return array Site local identifiers
+        */
+       private function buildLocalIdentifiers( Site $site ) {
+               $localIds = array();
+
+               foreach ( $site->getLocalIds() as $idType => $ids ) {
+                       foreach ( $ids as $id ) {
+                               $localIds[] = array(
+                                       'type' => $idType,
+                                       'key' => $id
+                               );
+                       }
+               }
+
+               return $localIds;
+       }
+
+}
diff --git a/maintenance/rebuildSitesCache.php b/maintenance/rebuildSitesCache.php
new file mode 100644 (file)
index 0000000..862a983
--- /dev/null
@@ -0,0 +1,68 @@
+<?php
+
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script to dump the SiteStore as a static json file.
+ *
+ * @ingroup Maintenance
+ */
+class RebuildSitesCache extends Maintenance {
+
+       public function __construct() {
+               parent::__construct();
+
+               $this->mDescription = "Dumps site store as json";
+               $this->addOption( 'file', 'File to output the json to', false, true );
+       }
+
+       public function execute() {
+               $siteListFileCacheBuilder = new SiteListFileCacheBuilder(
+                       SiteSQLStore::newInstance(),
+                       $this->getCacheFile()
+               );
+
+               $siteListFileCacheBuilder->build();
+       }
+
+       /**
+        * @return string
+        */
+       private function getCacheFile() {
+               if ( $this->hasOption( 'file' ) ) {
+                       $jsonFile = $this->getOption( 'file' );
+               } else {
+                       $jsonFile = $this->getConfig()->get( 'SitesCacheFile' );
+
+                       if ( $jsonFile === false ) {
+                               $this->error( 'Error: No sites cache file is set in configuration.', 1 );
+                       }
+               }
+
+               return $jsonFile;
+       }
+
+}
+
+$maintClass = "RebuildSitesCache";
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/tests/phpunit/includes/site/SiteListFileCacheBuilderTest.php b/tests/phpunit/includes/site/SiteListFileCacheBuilderTest.php
new file mode 100644 (file)
index 0000000..af02429
--- /dev/null
@@ -0,0 +1,130 @@
+<?php
+
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @since 1.25
+ *
+ * @ingroup Site
+ * @ingroup Test
+ *
+ * @covers SiteListFileCacheBuilder
+ * @group Site
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < aude.wiki@gmail.com >
+ */
+class SiteListFileCacheBuilderTest extends PHPUnit_Framework_TestCase {
+
+       public function testBuild() {
+               $cacheFile = $this->getCacheFile();
+
+               $cacheBuilder = $this->newSiteListFileCacheBuilder( $this->getSites(), $cacheFile );
+               $cacheBuilder->build();
+
+               $contents = file_get_contents( $cacheFile );
+               $this->assertEquals( json_encode( $this->getExpectedData() ), $contents );
+       }
+
+       private function getExpectedData() {
+               return array(
+                       'sites' => array(
+                               'foobar' => array(
+                                       'globalid' => 'foobar',
+                                       'type' => 'unknown',
+                                       'group' => 'none',
+                                       'source' => 'local',
+                                       'language' => null,
+                                       'localids' => array(),
+                                       'config' => array(),
+                                       'data' => array(),
+                                       'forward' => false,
+                                       'internalid' => null,
+                                       'identifiers' => array()
+                               ),
+                               'enwiktionary' => array(
+                                       'globalid' => 'enwiktionary',
+                                       'type' => 'mediawiki',
+                                       'group' => 'wiktionary',
+                                       'source' => 'local',
+                                       'language' => 'en',
+                                       'localids' => array(
+                                               'equivalent' => array( 'enwiktionary' )
+                                       ),
+                                       'config' => array(),
+                                       'data' => array(
+                                               'paths' => array(
+                                                       'page_path' => 'https://en.wiktionary.org/wiki/$1',
+                                                       'file_path' => 'https://en.wiktionary.org/w/$1'
+                                               )
+                                       ),
+                                       'forward' => false,
+                                       'internalid' => null,
+                                       'identifiers' => array(
+                                               array(
+                                                       'type' => 'equivalent',
+                                                       'key' => 'enwiktionary'
+                                               )
+                                       )
+                               )
+                       )
+               );
+       }
+
+       private function newSiteListFileCacheBuilder( SiteList $sites, $cacheFile ) {
+               return new SiteListFileCacheBuilder(
+                       $this->getSiteSQLStore( $sites ),
+                       $cacheFile
+               );
+       }
+
+       private function getSiteSQLStore( SiteList $sites ) {
+               $siteSQLStore = $this->getMockBuilder( 'SiteSQLStore' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $siteSQLStore->expects( $this->any() )
+                       ->method( 'getSites' )
+                       ->will( $this->returnValue( $sites ) );
+
+               return $siteSQLStore;
+       }
+
+       private function getSites() {
+               $sites = array();
+
+               $site = new Site();
+               $site->setGlobalId( 'foobar' );
+               $sites[] = $site;
+
+               $site = new MediaWikiSite();
+               $site->setGlobalId( 'enwiktionary' );
+               $site->setGroup( 'wiktionary' );
+               $site->setLanguageCode( 'en' );
+               $site->addNavigationId( 'enwiktionary' );
+               $site->setPath( MediaWikiSite::PATH_PAGE, "https://en.wiktionary.org/wiki/$1" );
+               $site->setPath( MediaWikiSite::PATH_FILE, "https://en.wiktionary.org/w/$1" );
+               $sites[] = $site;
+
+               return new SiteList( $sites );
+       }
+
+       private function getCacheFile() {
+               return sys_get_temp_dir() . '/sites-' . time() . '.json';
+       }
+
+}
diff --git a/tests/phpunit/includes/site/SiteListFileCacheTest.php b/tests/phpunit/includes/site/SiteListFileCacheTest.php
new file mode 100644 (file)
index 0000000..b598eed
--- /dev/null
@@ -0,0 +1,98 @@
+<?php
+
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @since 1.25
+ *
+ * @ingroup Site
+ * @ingroup Test
+ *
+ * @covers SiteListFileCache
+ * @group Site
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < aude.wiki@gmail.com >
+ */
+class SiteListFileCacheTest extends PHPUnit_Framework_TestCase {
+
+       public function testGetSites() {
+               $cacheFile = $this->getCacheFile();
+
+               $sites = $this->getSites();
+               $cacheBuilder = $this->newSiteListFileCacheBuilder( $sites, $cacheFile );
+               $cacheBuilder->build();
+
+               $cache = new SiteListFileCache( $cacheFile );
+               $this->assertEquals( $sites, $cache->getSites() );
+       }
+
+       public function testGetSite() {
+               $cacheFile = $this->getCacheFile();
+
+               $sites = $this->getSites();
+               $cacheBuilder = $this->newSiteListFileCacheBuilder( $sites, $cacheFile );
+               $cacheBuilder->build();
+
+               $cache = new SiteListFileCache( $cacheFile );
+
+               $this->assertEquals( $sites->getSite( 'enwiktionary' ), $cache->getSite( 'enwiktionary' ) );
+       }
+
+       private function newSiteListFileCacheBuilder( SiteList $sites, $cacheFile ) {
+               return new SiteListFileCacheBuilder(
+                       $this->getSiteSQLStore( $sites ),
+                       $cacheFile
+               );
+       }
+
+       private function getSiteSQLStore( SiteList $sites ) {
+               $siteSQLStore = $this->getMockBuilder( 'SiteSQLStore' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $siteSQLStore->expects( $this->any() )
+                       ->method( 'getSites' )
+                       ->will( $this->returnValue( $sites ) );
+
+               return $siteSQLStore;
+       }
+
+       private function getSites() {
+               $sites = array();
+
+               $site = new Site();
+               $site->setGlobalId( 'foobar' );
+               $sites[] = $site;
+
+               $site = new MediaWikiSite();
+               $site->setGlobalId( 'enwiktionary' );
+               $site->setGroup( 'wiktionary' );
+               $site->setLanguageCode( 'en' );
+               $site->addNavigationId( 'enwiktionary' );
+               $site->setPath( MediaWikiSite::PATH_PAGE, "https://en.wiktionary.org/wiki/$1" );
+               $site->setPath( MediaWikiSite::PATH_FILE, "https://en.wiktionary.org/w/$1" );
+               $sites[] = $site;
+
+               return new SiteList( $sites );
+       }
+
+       private function getCacheFile() {
+               return sys_get_temp_dir() . '/sites-' . time() . '.json';
+       }
+
+}