X-Git-Url: https://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2Fparser%2FLinkHolderArray.php;h=fb013047344ed8dc9f0429f4d532b6131739db62;hb=95e857198b6f754b174a3dbf9b9282d3789003a7;hp=9f085cefebfe9a9572de325d6c710c57fecabace;hpb=3ad5bfb7495a78a273f62eefd0db5246783ae3ba;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php index 9f085cefeb..fb01304734 100644 --- a/includes/parser/LinkHolderArray.php +++ b/includes/parser/LinkHolderArray.php @@ -1,18 +1,77 @@ parent = $parent; } + /** + * Reduce memory usage to reduce the impact of circular references + */ + function __destruct() { + foreach ( $this as $name => $value ) { + unset( $this->$name ); + } + } + + /** + * Don't serialize the parent object, it is big, and not needed when it is + * a parameter to mergeForeign(), which is the only application of + * serializing at present. + * + * Compact the titles, only serialize the text form. + */ + function __sleep() { + foreach ( $this->internals as &$nsLinks ) { + foreach ( $nsLinks as &$entry ) { + unset( $entry['title'] ); + } + } + unset( $nsLinks ); + unset( $entry ); + + foreach ( $this->interwikis as &$entry ) { + unset( $entry['title'] ); + } + unset( $entry ); + + return array( 'internals', 'interwikis', 'size' ); + } + + /** + * Recreate the Title objects + */ + function __wakeup() { + foreach ( $this->internals as &$nsLinks ) { + foreach ( $nsLinks as &$entry ) { + $entry['title'] = Title::newFromText( $entry['pdbk'] ); + } + } + unset( $nsLinks ); + unset( $entry ); + + foreach ( $this->interwikis as &$entry ) { + $entry['title'] = Title::newFromText( $entry['pdbk'] ); + } + unset( $entry ); + } + /** * Merge another LinkHolderArray into this one + * @param $other LinkHolderArray */ function merge( $other ) { foreach ( $other->internals as $ns => $entries ) { @@ -26,11 +85,92 @@ class LinkHolderArray { $this->interwikis += $other->interwikis; } + /** + * Merge a LinkHolderArray from another parser instance into this one. The + * keys will not be preserved. Any text which went with the old + * LinkHolderArray and needs to work with the new one should be passed in + * the $texts array. The strings in this array will have their link holders + * converted for use in the destination link holder. The resulting array of + * strings will be returned. + * + * @param $other LinkHolderArray + * @param $texts Array of strings + * @return Array + */ + function mergeForeign( $other, $texts ) { + $this->tempIdOffset = $idOffset = $this->parent->nextLinkID(); + $maxId = 0; + + # Renumber internal links + foreach ( $other->internals as $ns => $nsLinks ) { + foreach ( $nsLinks as $key => $entry ) { + $newKey = $idOffset + $key; + $this->internals[$ns][$newKey] = $entry; + $maxId = $newKey > $maxId ? $newKey : $maxId; + } + } + $texts = preg_replace_callback( '/()/', + array( $this, 'mergeForeignCallback' ), $texts ); + + # Renumber interwiki links + foreach ( $other->interwikis as $key => $entry ) { + $newKey = $idOffset + $key; + $this->interwikis[$newKey] = $entry; + $maxId = $newKey > $maxId ? $newKey : $maxId; + } + $texts = preg_replace_callback( '/()/', + array( $this, 'mergeForeignCallback' ), $texts ); + + # Set the parent link ID to be beyond the highest used ID + $this->parent->setLinkID( $maxId + 1 ); + $this->tempIdOffset = null; + return $texts; + } + + protected function mergeForeignCallback( $m ) { + return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3]; + } + + /** + * Get a subset of the current LinkHolderArray which is sufficient to + * interpret the given text. + */ + function getSubArray( $text ) { + $sub = new LinkHolderArray( $this->parent ); + + # Internal links + $pos = 0; + while ( $pos < strlen( $text ) ) { + if ( !preg_match( '//', + $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) + { + break; + } + $ns = $m[1][0]; + $key = $m[2][0]; + $sub->internals[$ns][$key] = $this->internals[$ns][$key]; + $pos = $m[0][1] + strlen( $m[0][0] ); + } + + # Interwiki links + $pos = 0; + while ( $pos < strlen( $text ) ) { + if ( !preg_match( '//', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) { + break; + } + $key = $m[1][0]; + $sub->interwikis[$key] = $this->interwikis[$key]; + $pos = $m[0][1] + strlen( $m[0][0] ); + } + return $sub; + } + /** * Returns true if the memory requirements of this object are getting large */ function isBig() { - return $this->size > $this->batchSize; + global $wgLinkHolderBatchSize; + return $this->size > $wgLinkHolderBatchSize; } /** @@ -49,8 +189,9 @@ class LinkHolderArray { * parsing of interwiki links, and secondly to allow all existence checks and * article length checks (for stub links) to be bundled into a single query. * + * @param $nt Title */ - function makeHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) { + function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { wfProfileIn( __METHOD__ ); if ( ! is_object($nt) ) { # Fail gracefully @@ -64,7 +205,7 @@ class LinkHolderArray { 'text' => $prefix.$text.$inside, 'pdbk' => $nt->getPrefixedDBkey(), ); - if ( $query !== '' ) { + if ( $query !== array() ) { $entry['query'] = $query; } @@ -86,6 +227,7 @@ class LinkHolderArray { } /** + * @todo FIXME: Update documentation. makeLinkObj() is deprecated. * Replace link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. @@ -109,29 +251,25 @@ class LinkHolderArray { } wfProfileIn( __METHOD__ ); - global $wgUser, $wgContLang; + global $wgContLang; - $pdbks = array(); $colours = array(); - $linkcolour_ids = array(); - $sk = $this->parent->getOptions()->getSkin(); $linkCache = LinkCache::singleton(); $output = $this->parent->getOutput(); wfProfileIn( __METHOD__.'-check' ); $dbr = wfGetDB( DB_SLAVE ); - $page = $dbr->tableName( 'page' ); - $threshold = $wgUser->getOption('stubthreshold'); + $threshold = $this->parent->getOptions()->getStubThreshold(); # Sort by namespace ksort( $this->internals ); + $linkcolour_ids = array(); + # Generate query - $query = false; - $current = null; + $queries = array(); foreach ( $this->internals as $ns => $entries ) { - foreach ( $entries as $index => $entry ) { - $key = "$ns:$index"; + foreach ( $entries as $entry ) { $title = $entry['title']; $pdbk = $entry['pdbk']; @@ -144,163 +282,64 @@ class LinkHolderArray { # Check if it's a static known link, e.g. interwiki if ( $title->isAlwaysKnown() ) { $colours[$pdbk] = ''; + } elseif ( $ns == NS_SPECIAL ) { + $colours[$pdbk] = 'new'; } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { - $colours[$pdbk] = ''; + $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); $output->addLink( $title, $id ); + $linkcolour_ids[$id] = $pdbk; } elseif ( $linkCache->isBadLink( $pdbk ) ) { $colours[$pdbk] = 'new'; - } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) { - $colours[$pdbk] = 'new'; } else { # Not in the link cache, add it to the query - if ( !isset( $current ) ) { - $current = $ns; - $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; - $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; - } elseif ( $current != $ns ) { - $current = $ns; - $query .= ")) OR (page_namespace=$ns AND page_title IN("; - } else { - $query .= ', '; - } - - $query .= $dbr->addQuotes( $title->getDBkey() ); + $queries[$ns][] = $title->getDBkey(); } } } - if ( $query ) { - $query .= '))'; + if ( $queries ) { + $where = array(); + foreach( $queries as $ns => $pages ){ + $where[] = $dbr->makeList( + array( + 'page_namespace' => $ns, + 'page_title' => $pages, + ), + LIST_AND + ); + } - $res = $dbr->query( $query, __METHOD__ ); + $res = $dbr->select( + 'page', + array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ), + $dbr->makeList( $where, LIST_OR ), + __METHOD__ + ); # Fetch data and form into an associative array # non-existent = broken - while ( $s = $dbr->fetchObject($res) ) { + foreach ( $res as $s ) { $title = Title::makeTitle( $s->page_namespace, $s->page_title ); $pdbk = $title->getPrefixedDBkey(); - $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); + $linkCache->addGoodLinkObjFromRow( $title, $s ); $output->addLink( $title, $s->page_id ); - $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + # @todo FIXME: Convoluted data flow + # The redirect status and length is passed to getLinkColour via the LinkCache + # Use formal parameters instead + $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); //add id to the extension todolist $linkcolour_ids[$s->page_id] = $pdbk; } unset( $res ); + } + if ( count($linkcolour_ids) ) { //pass an array of page_ids to an extension wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); } wfProfileOut( __METHOD__.'-check' ); # Do a second query for different language variants of links and categories - if($wgContLang->hasVariants()){ - $linkBatch = new LinkBatch(); - $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) - $categoryMap = array(); // maps $category_variant => $category (dbkeys) - $varCategories = array(); // category replacements oldDBkey => newDBkey - - $categories = $output->getCategoryLinks(); - - // Add variants of links to link batch - foreach ( $this->internals as $ns => $entries ) { - foreach ( $entries as $index => $entry ) { - $key = "$ns:$index"; - $pdbk = $entry['pdbk']; - $title = $entry['title']; - $titleText = $title->getText(); - - // generate all variants of the link title text - $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText); - - // if link was not found (in first query), add all variants to query - if ( !isset($colours[$pdbk]) ){ - foreach($allTextVariants as $textVariant){ - if($textVariant != $titleText){ - $variantTitle = Title::makeTitle( $ns, $textVariant ); - if(is_null($variantTitle)) continue; - $linkBatch->addObj( $variantTitle ); - $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; - } - } - } - } - } - - // process categories, check if a category exists in some variant - foreach( $categories as $category ){ - $variants = $wgContLang->convertLinkToAllVariants($category); - foreach($variants as $variant){ - if($variant != $category){ - $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); - if(is_null($variantTitle)) continue; - $linkBatch->addObj( $variantTitle ); - $categoryMap[$variant] = $category; - } - } - } - - - if(!$linkBatch->isEmpty()){ - // construct query - $titleClause = $linkBatch->constructSet('page', $dbr); - - $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; - - $variantQuery .= " FROM $page WHERE $titleClause"; - - $varRes = $dbr->query( $variantQuery, __METHOD__ ); - - // for each found variants, figure out link holders and replace - while ( $s = $dbr->fetchObject($varRes) ) { - - $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); - $varPdbk = $variantTitle->getPrefixedDBkey(); - $vardbk = $variantTitle->getDBkey(); - - $holderKeys = array(); - if(isset($variantMap[$varPdbk])){ - $holderKeys = $variantMap[$varPdbk]; - $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); - $output->addLink( $variantTitle, $s->page_id ); - } - - // loop over link holders - foreach($holderKeys as $key){ - list( $ns, $index ) = explode( ':', $key, 2 ); - $entry =& $this->internals[$ns][$index]; - $pdbk = $entry['pdbk']; - - if(!isset($colours[$pdbk])){ - // found link in some of the variants, replace the link holder data - $entry['title'] = $variantTitle; - $entry['pdbk'] = $varPdbk; - - // set pdbk and colour - $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); - $linkcolour_ids[$s->page_id] = $pdbk; - } - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); - } - - // check if the object is a variant of a category - if(isset($categoryMap[$vardbk])){ - $oldkey = $categoryMap[$vardbk]; - if($oldkey != $vardbk) - $varCategories[$oldkey]=$vardbk; - } - } - - // rebuild the categories in original order (if there are replacements) - if(count($varCategories)>0){ - $newCats = array(); - $originalCats = $output->getCategories(); - foreach($originalCats as $cat => $sortkey){ - // make the replacement - if( array_key_exists($cat,$varCategories) ) - $newCats[$varCategories[$cat]] = $sortkey; - else $newCats[$cat] = $sortkey; - } - $this->mOutput->parent->setCategoryLinks($newCats); - } - } + if($wgContLang->hasVariants()) { + $this->doVariants( $colours ); } # Construct search and replace arrays @@ -310,21 +349,29 @@ class LinkHolderArray { foreach ( $entries as $index => $entry ) { $pdbk = $entry['pdbk']; $title = $entry['title']; - $query = isset( $entry['query'] ) ? $entry['query'] : ''; + $query = isset( $entry['query'] ) ? $entry['query'] : array(); $key = "$ns:$index"; $searchkey = ""; - if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) { - $linkCache->addBadLinkObj( $title ); + $displayText = $entry['text']; + if ( $displayText === '' ) { + $displayText = null; + } + if ( !isset( $colours[$pdbk] ) ) { $colours[$pdbk] = 'new'; + } + $attribs = array(); + if ( $colours[$pdbk] == 'new' ) { + $linkCache->addBadLinkObj( $title ); $output->addLink( $title, 0 ); - $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, - $entry['text'], - $query ); + $type = array( 'broken' ); } else { - $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk], - $entry['text'], - $query ); + if ( $colours[$pdbk] != '' ) { + $attribs['class'] = $colours[$pdbk]; + } + $type = array( 'known', 'noclasses' ); } + $replacePairs[$searchkey] = Linker::link( $title, $displayText, + $attribs, $query, $type ); } } $replacer = new HashtableReplacer( $replacePairs, 1 ); @@ -351,10 +398,11 @@ class LinkHolderArray { wfProfileIn( __METHOD__ ); # Make interwiki link HTML - $sk = $this->parent->getOptions()->getSkin(); + $output = $this->parent->getOutput(); $replacePairs = array(); foreach( $this->interwikis as $key => $link ) { - $replacePairs[$key] = $sk->link( $link['title'], $link['text'] ); + $replacePairs[$key] = Linker::link( $link['title'], $link['text'] ); + $output->addInterwikiLink( $link['title'] ); } $replacer = new HashtableReplacer( $replacePairs, 1 ); @@ -365,11 +413,154 @@ class LinkHolderArray { wfProfileOut( __METHOD__ ); } + /** + * Modify $this->internals and $colours according to language variant linking rules + */ + protected function doVariants( &$colours ) { + global $wgContLang; + $linkBatch = new LinkBatch(); + $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) + $output = $this->parent->getOutput(); + $linkCache = LinkCache::singleton(); + $threshold = $this->parent->getOptions()->getStubThreshold(); + $titlesToBeConverted = ''; + $titlesAttrs = array(); + + // Concatenate titles to a single string, thus we only need auto convert the + // single string to all variants. This would improve parser's performance + // significantly. + foreach ( $this->internals as $ns => $entries ) { + foreach ( $entries as $index => $entry ) { + $pdbk = $entry['pdbk']; + // we only deal with new links (in its first query) + if ( !isset( $colours[$pdbk] ) ) { + $title = $entry['title']; + $titleText = $title->getText(); + $titlesAttrs[] = array( + 'ns' => $ns, + 'key' => "$ns:$index", + 'titleText' => $titleText, + ); + // separate titles with \0 because it would never appears + // in a valid title + $titlesToBeConverted .= $titleText . "\0"; + } + } + } + + // Now do the conversion and explode string to text of titles + $titlesAllVariants = $wgContLang->autoConvertToAllVariants( $titlesToBeConverted ); + $allVariantsName = array_keys( $titlesAllVariants ); + foreach ( $titlesAllVariants as &$titlesVariant ) { + $titlesVariant = explode( "\0", $titlesVariant ); + } + $l = count( $titlesAttrs ); + // Then add variants of links to link batch + for ( $i = 0; $i < $l; $i ++ ) { + foreach ( $allVariantsName as $variantName ) { + $textVariant = $titlesAllVariants[$variantName][$i]; + if ( $textVariant != $titlesAttrs[$i]['titleText'] ) { + $variantTitle = Title::makeTitle( $titlesAttrs[$i]['ns'], $textVariant ); + if( is_null( $variantTitle ) ) { + continue; + } + $linkBatch->addObj( $variantTitle ); + $variantMap[$variantTitle->getPrefixedDBkey()][] = $titlesAttrs[$i]['key']; + } + } + } + + // process categories, check if a category exists in some variant + $categoryMap = array(); // maps $category_variant => $category (dbkeys) + $varCategories = array(); // category replacements oldDBkey => newDBkey + foreach( $output->getCategoryLinks() as $category ){ + $variants = $wgContLang->autoConvertToAllVariants( $category ); + foreach($variants as $variant){ + if($variant != $category){ + $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); + if(is_null($variantTitle)) continue; + $linkBatch->addObj( $variantTitle ); + $categoryMap[$variant] = $category; + } + } + } + + + if(!$linkBatch->isEmpty()){ + // construct query + $dbr = wfGetDB( DB_SLAVE ); + $varRes = $dbr->select( 'page', + array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ), + $linkBatch->constructSet( 'page', $dbr ), + __METHOD__ + ); + + $linkcolour_ids = array(); + + // for each found variants, figure out link holders and replace + foreach ( $varRes as $s ) { + + $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); + $varPdbk = $variantTitle->getPrefixedDBkey(); + $vardbk = $variantTitle->getDBkey(); + + $holderKeys = array(); + if( isset( $variantMap[$varPdbk] ) ) { + $holderKeys = $variantMap[$varPdbk]; + $linkCache->addGoodLinkObjFromRow( $variantTitle, $s ); + $output->addLink( $variantTitle, $s->page_id ); + } + + // loop over link holders + foreach( $holderKeys as $key ) { + list( $ns, $index ) = explode( ':', $key, 2 ); + $entry =& $this->internals[$ns][$index]; + $pdbk = $entry['pdbk']; + + if(!isset($colours[$pdbk])){ + // found link in some of the variants, replace the link holder data + $entry['title'] = $variantTitle; + $entry['pdbk'] = $varPdbk; + + // set pdbk and colour + # @todo FIXME: Convoluted data flow + # The redirect status and length is passed to getLinkColour via the LinkCache + # Use formal parameters instead + $colours[$varPdbk] = Linker::getLinkColour( $variantTitle, $threshold ); + $linkcolour_ids[$s->page_id] = $pdbk; + } + } + + // check if the object is a variant of a category + if(isset($categoryMap[$vardbk])){ + $oldkey = $categoryMap[$vardbk]; + if($oldkey != $vardbk) + $varCategories[$oldkey]=$vardbk; + } + } + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + + // rebuild the categories in original order (if there are replacements) + if(count($varCategories)>0){ + $newCats = array(); + $originalCats = $output->getCategories(); + foreach($originalCats as $cat => $sortkey){ + // make the replacement + if( array_key_exists($cat,$varCategories) ) + $newCats[$varCategories[$cat]] = $sortkey; + else $newCats[$cat] = $sortkey; + } + $output->setCategoryLinks($newCats); + } + } + } + /** * Replace link placeholders with plain text of links * (not HTML-formatted). - * @param string $text - * @return string + * + * @param $text String + * @return String */ function replaceText( $text ) { wfProfileIn( __METHOD__ ); @@ -384,7 +575,9 @@ class LinkHolderArray { } /** - * @param array $matches + * Callback for replaceText() + * + * @param $matches Array * @return string * @private */