X-Git-Url: https://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2Fparser%2FLinkHolderArray.php;h=fb013047344ed8dc9f0429f4d532b6131739db62;hb=95e857198b6f754b174a3dbf9b9282d3789003a7;hp=3fa88bca8f5ec7e4f08ec3596394f1c2fc74a84e;hpb=8eac509ab5ebb7276dffffa0092f8722146c5003;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php index 3fa88bca8f..fb01304734 100644 --- a/includes/parser/LinkHolderArray.php +++ b/includes/parser/LinkHolderArray.php @@ -1,9 +1,18 @@ parent = $parent; @@ -18,8 +27,51 @@ class LinkHolderArray { } } + /** + * Don't serialize the parent object, it is big, and not needed when it is + * a parameter to mergeForeign(), which is the only application of + * serializing at present. + * + * Compact the titles, only serialize the text form. + */ + function __sleep() { + foreach ( $this->internals as &$nsLinks ) { + foreach ( $nsLinks as &$entry ) { + unset( $entry['title'] ); + } + } + unset( $nsLinks ); + unset( $entry ); + + foreach ( $this->interwikis as &$entry ) { + unset( $entry['title'] ); + } + unset( $entry ); + + return array( 'internals', 'interwikis', 'size' ); + } + + /** + * Recreate the Title objects + */ + function __wakeup() { + foreach ( $this->internals as &$nsLinks ) { + foreach ( $nsLinks as &$entry ) { + $entry['title'] = Title::newFromText( $entry['pdbk'] ); + } + } + unset( $nsLinks ); + unset( $entry ); + + foreach ( $this->interwikis as &$entry ) { + $entry['title'] = Title::newFromText( $entry['pdbk'] ); + } + unset( $entry ); + } + /** * Merge another LinkHolderArray into this one + * @param $other LinkHolderArray */ function merge( $other ) { foreach ( $other->internals as $ns => $entries ) { @@ -33,6 +85,86 @@ class LinkHolderArray { $this->interwikis += $other->interwikis; } + /** + * Merge a LinkHolderArray from another parser instance into this one. The + * keys will not be preserved. Any text which went with the old + * LinkHolderArray and needs to work with the new one should be passed in + * the $texts array. The strings in this array will have their link holders + * converted for use in the destination link holder. The resulting array of + * strings will be returned. + * + * @param $other LinkHolderArray + * @param $texts Array of strings + * @return Array + */ + function mergeForeign( $other, $texts ) { + $this->tempIdOffset = $idOffset = $this->parent->nextLinkID(); + $maxId = 0; + + # Renumber internal links + foreach ( $other->internals as $ns => $nsLinks ) { + foreach ( $nsLinks as $key => $entry ) { + $newKey = $idOffset + $key; + $this->internals[$ns][$newKey] = $entry; + $maxId = $newKey > $maxId ? $newKey : $maxId; + } + } + $texts = preg_replace_callback( '/()/', + array( $this, 'mergeForeignCallback' ), $texts ); + + # Renumber interwiki links + foreach ( $other->interwikis as $key => $entry ) { + $newKey = $idOffset + $key; + $this->interwikis[$newKey] = $entry; + $maxId = $newKey > $maxId ? $newKey : $maxId; + } + $texts = preg_replace_callback( '/()/', + array( $this, 'mergeForeignCallback' ), $texts ); + + # Set the parent link ID to be beyond the highest used ID + $this->parent->setLinkID( $maxId + 1 ); + $this->tempIdOffset = null; + return $texts; + } + + protected function mergeForeignCallback( $m ) { + return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3]; + } + + /** + * Get a subset of the current LinkHolderArray which is sufficient to + * interpret the given text. + */ + function getSubArray( $text ) { + $sub = new LinkHolderArray( $this->parent ); + + # Internal links + $pos = 0; + while ( $pos < strlen( $text ) ) { + if ( !preg_match( '//', + $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) + { + break; + } + $ns = $m[1][0]; + $key = $m[2][0]; + $sub->internals[$ns][$key] = $this->internals[$ns][$key]; + $pos = $m[0][1] + strlen( $m[0][0] ); + } + + # Interwiki links + $pos = 0; + while ( $pos < strlen( $text ) ) { + if ( !preg_match( '//', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) { + break; + } + $key = $m[1][0]; + $sub->interwikis[$key] = $this->interwikis[$key]; + $pos = $m[0][1] + strlen( $m[0][0] ); + } + return $sub; + } + /** * Returns true if the memory requirements of this object are getting large */ @@ -57,8 +189,9 @@ class LinkHolderArray { * parsing of interwiki links, and secondly to allow all existence checks and * article length checks (for stub links) to be bundled into a single query. * + * @param $nt Title */ - function makeHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) { + function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { wfProfileIn( __METHOD__ ); if ( ! is_object($nt) ) { # Fail gracefully @@ -72,7 +205,7 @@ class LinkHolderArray { 'text' => $prefix.$text.$inside, 'pdbk' => $nt->getPrefixedDBkey(), ); - if ( $query !== '' ) { + if ( $query !== array() ) { $entry['query'] = $query; } @@ -94,18 +227,7 @@ class LinkHolderArray { } /** - * Get the stub threshold - */ - function getStubThreshold() { - global $wgUser; - if ( !isset( $this->stubThreshold ) ) { - $this->stubThreshold = $wgUser->getOption('stubthreshold'); - } - return $this->stubThreshold; - } - - /** - * FIXME: update documentation. makeLinkObj() is deprecated. + * @todo FIXME: Update documentation. makeLinkObj() is deprecated. * Replace link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. @@ -132,24 +254,22 @@ class LinkHolderArray { global $wgContLang; $colours = array(); - $sk = $this->parent->getOptions()->getSkin(); $linkCache = LinkCache::singleton(); $output = $this->parent->getOutput(); wfProfileIn( __METHOD__.'-check' ); $dbr = wfGetDB( DB_SLAVE ); - $page = $dbr->tableName( 'page' ); - $threshold = $this->getStubThreshold(); + $threshold = $this->parent->getOptions()->getStubThreshold(); # Sort by namespace ksort( $this->internals ); + $linkcolour_ids = array(); + # Generate query - $query = false; - $current = null; + $queries = array(); foreach ( $this->internals as $ns => $entries ) { - foreach ( $entries as $index => $entry ) { - $key = "$ns:$index"; + foreach ( $entries as $entry ) { $title = $entry['title']; $pdbk = $entry['pdbk']; @@ -162,52 +282,56 @@ class LinkHolderArray { # Check if it's a static known link, e.g. interwiki if ( $title->isAlwaysKnown() ) { $colours[$pdbk] = ''; - if( $title->getInterwiki() != '' ) { - $output->addInterwikiLink( $title ); - } + } elseif ( $ns == NS_SPECIAL ) { + $colours[$pdbk] = 'new'; } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { - $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); $output->addLink( $title, $id ); + $linkcolour_ids[$id] = $pdbk; } elseif ( $linkCache->isBadLink( $pdbk ) ) { $colours[$pdbk] = 'new'; } else { # Not in the link cache, add it to the query - if ( !isset( $current ) ) { - $current = $ns; - $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; - $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; - } elseif ( $current != $ns ) { - $current = $ns; - $query .= ")) OR (page_namespace=$ns AND page_title IN("; - } else { - $query .= ', '; - } - - $query .= $dbr->addQuotes( $title->getDBkey() ); + $queries[$ns][] = $title->getDBkey(); } } } - if ( $query ) { - $query .= '))'; + if ( $queries ) { + $where = array(); + foreach( $queries as $ns => $pages ){ + $where[] = $dbr->makeList( + array( + 'page_namespace' => $ns, + 'page_title' => $pages, + ), + LIST_AND + ); + } - $res = $dbr->query( $query, __METHOD__ ); + $res = $dbr->select( + 'page', + array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ), + $dbr->makeList( $where, LIST_OR ), + __METHOD__ + ); # Fetch data and form into an associative array # non-existent = broken - $linkcolour_ids = array(); - while ( $s = $dbr->fetchObject($res) ) { + foreach ( $res as $s ) { $title = Title::makeTitle( $s->page_namespace, $s->page_title ); $pdbk = $title->getPrefixedDBkey(); - $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); + $linkCache->addGoodLinkObjFromRow( $title, $s ); $output->addLink( $title, $s->page_id ); - # FIXME: convoluted data flow + # @todo FIXME: Convoluted data flow # The redirect status and length is passed to getLinkColour via the LinkCache # Use formal parameters instead - $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); //add id to the extension todolist $linkcolour_ids[$s->page_id] = $pdbk; } unset( $res ); + } + if ( count($linkcolour_ids) ) { //pass an array of page_ids to an extension wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); } @@ -225,23 +349,29 @@ class LinkHolderArray { foreach ( $entries as $index => $entry ) { $pdbk = $entry['pdbk']; $title = $entry['title']; - $query = isset( $entry['query'] ) ? $entry['query'] : ''; + $query = isset( $entry['query'] ) ? $entry['query'] : array(); $key = "$ns:$index"; $searchkey = ""; - if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) { - $linkCache->addBadLinkObj( $title ); + $displayText = $entry['text']; + if ( $displayText === '' ) { + $displayText = null; + } + if ( !isset( $colours[$pdbk] ) ) { $colours[$pdbk] = 'new'; + } + $attribs = array(); + if ( $colours[$pdbk] == 'new' ) { + $linkCache->addBadLinkObj( $title ); $output->addLink( $title, 0 ); - // FIXME: replace deprecated makeBrokenLinkObj() by link() - $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, - $entry['text'], - $query ); + $type = array( 'broken' ); } else { - // FIXME: replace deprecated makeColouredLinkObj() by link() - $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk], - $entry['text'], - $query ); + if ( $colours[$pdbk] != '' ) { + $attribs['class'] = $colours[$pdbk]; + } + $type = array( 'known', 'noclasses' ); } + $replacePairs[$searchkey] = Linker::link( $title, $displayText, + $attribs, $query, $type ); } } $replacer = new HashtableReplacer( $replacePairs, 1 ); @@ -268,10 +398,11 @@ class LinkHolderArray { wfProfileIn( __METHOD__ ); # Make interwiki link HTML - $sk = $this->parent->getOptions()->getSkin(); + $output = $this->parent->getOutput(); $replacePairs = array(); foreach( $this->interwikis as $key => $link ) { - $replacePairs[$key] = $sk->link( $link['title'], $link['text'] ); + $replacePairs[$key] = Linker::link( $link['title'], $link['text'] ); + $output->addInterwikiLink( $link['title'] ); } $replacer = new HashtableReplacer( $replacePairs, 1 ); @@ -291,30 +422,50 @@ class LinkHolderArray { $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) $output = $this->parent->getOutput(); $linkCache = LinkCache::singleton(); - $sk = $this->parent->getOptions()->getSkin(); - $threshold = $this->getStubThreshold(); + $threshold = $this->parent->getOptions()->getStubThreshold(); + $titlesToBeConverted = ''; + $titlesAttrs = array(); - // Add variants of links to link batch + // Concatenate titles to a single string, thus we only need auto convert the + // single string to all variants. This would improve parser's performance + // significantly. foreach ( $this->internals as $ns => $entries ) { foreach ( $entries as $index => $entry ) { - $key = "$ns:$index"; $pdbk = $entry['pdbk']; - $title = $entry['title']; - $titleText = $title->getText(); - - // generate all variants of the link title text - $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText); - - // if link was not found (in first query), add all variants to query - if ( !isset($colours[$pdbk]) ){ - foreach($allTextVariants as $textVariant){ - if($textVariant != $titleText){ - $variantTitle = Title::makeTitle( $ns, $textVariant ); - if(is_null($variantTitle)) continue; - $linkBatch->addObj( $variantTitle ); - $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; - } + // we only deal with new links (in its first query) + if ( !isset( $colours[$pdbk] ) ) { + $title = $entry['title']; + $titleText = $title->getText(); + $titlesAttrs[] = array( + 'ns' => $ns, + 'key' => "$ns:$index", + 'titleText' => $titleText, + ); + // separate titles with \0 because it would never appears + // in a valid title + $titlesToBeConverted .= $titleText . "\0"; + } + } + } + + // Now do the conversion and explode string to text of titles + $titlesAllVariants = $wgContLang->autoConvertToAllVariants( $titlesToBeConverted ); + $allVariantsName = array_keys( $titlesAllVariants ); + foreach ( $titlesAllVariants as &$titlesVariant ) { + $titlesVariant = explode( "\0", $titlesVariant ); + } + $l = count( $titlesAttrs ); + // Then add variants of links to link batch + for ( $i = 0; $i < $l; $i ++ ) { + foreach ( $allVariantsName as $variantName ) { + $textVariant = $titlesAllVariants[$variantName][$i]; + if ( $textVariant != $titlesAttrs[$i]['titleText'] ) { + $variantTitle = Title::makeTitle( $titlesAttrs[$i]['ns'], $textVariant ); + if( is_null( $variantTitle ) ) { + continue; } + $linkBatch->addObj( $variantTitle ); + $variantMap[$variantTitle->getPrefixedDBkey()][] = $titlesAttrs[$i]['key']; } } } @@ -323,7 +474,7 @@ class LinkHolderArray { $categoryMap = array(); // maps $category_variant => $category (dbkeys) $varCategories = array(); // category replacements oldDBkey => newDBkey foreach( $output->getCategoryLinks() as $category ){ - $variants = $wgContLang->convertLinkToAllVariants($category); + $variants = $wgContLang->autoConvertToAllVariants( $category ); foreach($variants as $variant){ if($variant != $category){ $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); @@ -338,29 +489,30 @@ class LinkHolderArray { if(!$linkBatch->isEmpty()){ // construct query $dbr = wfGetDB( DB_SLAVE ); - $page = $dbr->tableName( 'page' ); - $titleClause = $linkBatch->constructSet('page', $dbr); - $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; - $variantQuery .= " FROM $page WHERE $titleClause"; - $varRes = $dbr->query( $variantQuery, __METHOD__ ); + $varRes = $dbr->select( 'page', + array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ), + $linkBatch->constructSet( 'page', $dbr ), + __METHOD__ + ); + $linkcolour_ids = array(); // for each found variants, figure out link holders and replace - while ( $s = $dbr->fetchObject($varRes) ) { + foreach ( $varRes as $s ) { $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); $varPdbk = $variantTitle->getPrefixedDBkey(); $vardbk = $variantTitle->getDBkey(); $holderKeys = array(); - if(isset($variantMap[$varPdbk])){ + if( isset( $variantMap[$varPdbk] ) ) { $holderKeys = $variantMap[$varPdbk]; - $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); + $linkCache->addGoodLinkObjFromRow( $variantTitle, $s ); $output->addLink( $variantTitle, $s->page_id ); } // loop over link holders - foreach($holderKeys as $key){ + foreach( $holderKeys as $key ) { list( $ns, $index ) = explode( ':', $key, 2 ); $entry =& $this->internals[$ns][$index]; $pdbk = $entry['pdbk']; @@ -371,10 +523,10 @@ class LinkHolderArray { $entry['pdbk'] = $varPdbk; // set pdbk and colour - # FIXME: convoluted data flow + # @todo FIXME: Convoluted data flow # The redirect status and length is passed to getLinkColour via the LinkCache # Use formal parameters instead - $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); + $colours[$varPdbk] = Linker::getLinkColour( $variantTitle, $threshold ); $linkcolour_ids[$s->page_id] = $pdbk; } } @@ -406,8 +558,9 @@ class LinkHolderArray { /** * Replace link placeholders with plain text of links * (not HTML-formatted). - * @param string $text - * @return string + * + * @param $text String + * @return String */ function replaceText( $text ) { wfProfileIn( __METHOD__ ); @@ -422,7 +575,9 @@ class LinkHolderArray { } /** - * @param array $matches + * Callback for replaceText() + * + * @param $matches Array * @return string * @private */