From 7c5ac43dc454a048540a8d0d93d2d2be291c95dc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Niklas=20Laxstr=C3=B6m?= Date: Sat, 21 Aug 2010 16:41:53 +0000 Subject: [PATCH] Added $wgAdaptiveMessageCache to avoid caching huge pile of never used messages at twn. --- includes/DefaultSettings.php | 7 ++ includes/MessageCache.php | 148 ++++++++++++++++++++++++++++++----- includes/Wiki.php | 1 + 3 files changed, 135 insertions(+), 21 deletions(-) diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 5947fc50e2..7898429872 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -1508,6 +1508,13 @@ $wgUseLocalMessageCache = false; */ $wgLocalMessageCacheSerialized = true; +/** + * Instead of caching everything, keep track which messages are requested and + * load only most used messages. This only makes sense if there is lots of + * interface messages customised in the wiki (like hundreds in many languages). + */ +$wgAdaptiveMessageCache = false; + /** * Localisation cache configuration. Associative array with keys: * class: The class to use. May be overridden by extensions. diff --git a/includes/MessageCache.php b/includes/MessageCache.php index 34e13887c9..0968ae57b0 100644 --- a/includes/MessageCache.php +++ b/includes/MessageCache.php @@ -42,6 +42,26 @@ class MessageCache { /// Variable for tracking which variables are already loaded protected $mLoadedLanguages = array(); + /** + * Used for automatic detection of most used messages. + */ + protected $mRequestedMessages = array(); + + /** + * How long the message request counts are stored. Longer period gives + * better sample, but also takes longer to adapt changes. The counts + * are aggregrated per day, regardless of the value of this variable. + */ + protected static $mAdaptiveDataAge = 604800; + + /** + * Filter the tail of less used messages that are requested more seldom + * than this factor times the number of request of most requested message. + * These messages are not loaded in the default set, but are still cached + * individually on demand with the normal cache expiry time. + */ + protected static $mAdaptiveInclusionThreshold = 0.05; + function __construct( $memCached, $useDB, $expiry ) { if ( !$memCached ) { $memCached = wfGetCache( CACHE_NONE ); @@ -309,12 +329,12 @@ class MessageCache { * $wgMaxMsgCacheEntrySize are assigned a special value, and are loaded * on-demand from the database later. * - * @param $code Optional language code, see documenation of load(). - * @return Array: Loaded messages for storing in caches. + * @param $code \string Language code. + * @return \array Loaded messages for storing in caches. */ - function loadFromDB( $code = false ) { + function loadFromDB( $code ) { wfProfileIn( __METHOD__ ); - global $wgMaxMsgCacheEntrySize, $wgContLanguageCode; + global $wgMaxMsgCacheEntrySize, $wgContLanguageCode, $wgAdaptiveMessageCache; $dbr = wfGetDB( DB_SLAVE ); $cache = array(); @@ -324,19 +344,24 @@ class MessageCache { 'page_namespace' => NS_MEDIAWIKI, ); - if ( $code ) { - # Is this fast enough. Should not matter if the filtering is done in the - # database or in code. + $mostused = array(); + if ( $wgAdaptiveMessageCache ) { + $mostused = $this->getMostUsedMessages(); if ( $code !== $wgContLanguageCode ) { - # Messages for particular language - $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), "/$code" ); - } else { - # Effectively disallows use of '/' character in NS_MEDIAWIKI for uses - # other than language code. - $conds[] = 'page_title NOT' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ); + foreach ( $mostused as $key => $value ) $mostused[$key] = "$value/$code"; } } + if ( count( $mostused ) ) { + $conds['page_title'] = $mostused; + } elseif ( $code !== $wgContLanguageCode ) { + $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), "/$code" ); + } else { + # Effectively disallows use of '/' character in NS_MEDIAWIKI for uses + # other than language code. + $conds[] = 'page_title NOT' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ); + } + # Conditions to fetch oversized pages to ignore them $bigConds = $conds; $bigConds[] = 'page_len > ' . intval( $wgMaxMsgCacheEntrySize ); @@ -361,6 +386,12 @@ class MessageCache { $cache[$row->page_title] = ' ' . Revision::getRevisionText( $row ); } + foreach ( $mostused as $key ) { + if ( !isset( $cache[$key] ) ) { + $cache[$key] = '!NONEXISTENT'; + } + } + $cache['VERSION'] = MSG_CACHE_VERSION; wfProfileOut( __METHOD__ ); return $cache; @@ -525,6 +556,11 @@ class MessageCache { $uckey = $wgContLang->ucfirst( $lckey ); } + /* Record each message request, but only once per request. + * This information is not used unless $wgAdaptiveMessageCache + * is enabled. */ + $this->mRequestedMessages[$uckey] = true; + # Try the MediaWiki namespace if( !$this->mDisable && $useDB ) { $title = $uckey; @@ -589,8 +625,8 @@ class MessageCache { * @param $code String: code denoting the language to try. */ function getMsgFromNamespace( $title, $code ) { - $type = false; - $message = false; + global $wgAdaptiveMessageCache; + $big = false; $this->load( $code ); if ( isset( $this->mCache[$code][$title] ) ) { @@ -599,13 +635,26 @@ class MessageCache { return substr( $entry, 1 ); } elseif ( $entry === '!NONEXISTENT' ) { return false; - } - } + } elseif( $entry === '!TOO BIG' ) { + // Fall through and try invididual message cache below - # Call message hooks, in case they are defined - wfRunHooks('MessagesPreLoad', array( $title, &$message ) ); - if ( $message !== false ) { - return $message; + } else { + // XXX: This is not cached in process cache, should it? + $message = false; + wfRunHooks('MessagesPreLoad', array( $title, &$message ) ); + if ( $message !== false ) { + return $message; + } + + /* If message cache is in normal mode, it is guaranteed + * (except bugs) that there is always entry (or placeholder) + * in the cache if message exists. Thus we can do minor + * performance improvement and return false early. + */ + if ( !$wgAdaptiveMessageCache ) { + return false; + } + } } # Try the individual message cache @@ -631,6 +680,7 @@ class MessageCache { $this->mCache[$code][$title] = ' ' . $message; $this->mMemc->set( $titleKey, ' ' . $message, $this->mExpiry ); } else { + $message = false; $this->mCache[$code][$title] = '!NONEXISTENT'; $this->mMemc->set( $titleKey, '!NONEXISTENT', $this->mExpiry ); } @@ -771,4 +821,60 @@ class MessageCache { return array( $message, $lang ); } + public static function logMessages() { + global $wgMessageCache, $wgAdaptiveMessageCache; + if ( !$wgAdaptiveMessageCache || !$wgMessageCache instanceof MessageCache ) { + return; + } + + $cachekey = wfMemckey( 'message-profiling' ); + $cache = wfGetCache( CACHE_DB ); + $data = $cache->get( $cachekey ); + + if ( !$data ) $data = array(); + + $age = self::$mAdaptiveDataAge; + $filterDate = substr( wfTimestamp( TS_MW, time()-$age ), 0, 8 ); + foreach ( array_keys( $data ) as $key ) { + if ( $key < $filterDate ) unset( $data[$key] ); + } + + $index = substr( wfTimestampNow(), 0, 8 ); + if ( !isset( $data[$index] ) ) $data[$index] = array(); + + foreach ( $wgMessageCache->mRequestedMessages as $message => $_ ) { + if ( !isset( $data[$index][$message] ) ) $data[$index][$message] = 0; + $data[$index][$message]++; + } + + $cache->set( $cachekey, $data ); + } + + public function getMostUsedMessages() { + global $wgContLang; + $cachekey = wfMemckey( 'message-profiling' ); + $cache = wfGetCache( CACHE_DB ); + $data = $cache->get( $cachekey ); + if ( !$data ) return array(); + + $list = array(); + + foreach( $data as $date => $messages ) { + foreach( $messages as $message => $count ) { + $key = $message; + if ( !isset( $list[$key] ) ) $list[$key] = 0; + $list[$key] += $count; + } + } + + $max = max( $list ); + foreach ( $list as $message => $count ) { + if ( $count < intval( $max * self::$mAdaptiveInclusionThreshold ) ) { + unset( $list[$message] ); + } + } + + return array_keys( $list ); + } + } diff --git a/includes/Wiki.php b/includes/Wiki.php index 9e16535f81..cd671ce283 100644 --- a/includes/Wiki.php +++ b/includes/Wiki.php @@ -449,6 +449,7 @@ class MediaWiki { * Ends this task peacefully */ function restInPeace() { + MessageCache::logMessages(); wfLogProfilingData(); // Commit and close up! $factory = wfGetLBFactory(); -- 2.20.1