From 940fb88ba44d670d5fc12fa2e58e684ff41c3fdc Mon Sep 17 00:00:00 2001 From: Kevin Israel Date: Tue, 15 Oct 2013 09:42:48 -0400 Subject: [PATCH] Parser::replaceInternalLinks: fix batching The parser unnecessarily made individual checks for existence of pages that were neither in LinkCache nor linked only with a fragment. A Title::isKnown() call in Parser::replaceInternalLinks2() (added in bca8b8ad7d2f) caused this. Title::isKnown() was used to avoid treating a link to a distinct page as a self-link even when the title happened to match one of the variants returned by Language::autoConvertToAllVariants(). This change fixes the bug by moving the problematic portion of the self-link check into LinkHolderArray::doVariants(). Change-Id: I586e11e8b47308980ea04087ebc4246c397a8f53 --- includes/parser/LinkHolderArray.php | 51 +++++++++++++++++++---------- includes/parser/Parser.php | 16 ++++----- tests/parser/parserTests.txt | 11 +++++++ 3 files changed, 51 insertions(+), 27 deletions(-) diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php index 6fc457d741..4600402091 100644 --- a/includes/parser/LinkHolderArray.php +++ b/includes/parser/LinkHolderArray.php @@ -377,6 +377,10 @@ class LinkHolderArray { $key = "$ns:$index"; $searchkey = ""; $displayText = $entry['text']; + if ( isset( $entry['selflink'] ) ) { + $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query ); + continue; + } if ( $displayText === '' ) { $displayText = null; } @@ -455,20 +459,17 @@ class LinkHolderArray { // single string to all variants. This would improve parser's performance // significantly. foreach ( $this->internals as $ns => $entries ) { + if ( $ns == NS_SPECIAL ) { + continue; + } foreach ( $entries as $index => $entry ) { $pdbk = $entry['pdbk']; // we only deal with new links (in its first query) if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) { - $title = $entry['title']; - $titleText = $title->getText(); - $titlesAttrs[] = array( - 'ns' => $ns, - 'key' => "$ns:$index", - 'titleText' => $titleText, - ); + $titlesAttrs[] = array( $index, $entry['title'] ); // separate titles with \0 because it would never appears // in a valid title - $titlesToBeConverted .= $titleText . "\0"; + $titlesToBeConverted .= $entry['title']->getText() . "\0"; } } } @@ -479,19 +480,35 @@ class LinkHolderArray { foreach ( $titlesAllVariants as &$titlesVariant ) { $titlesVariant = explode( "\0", $titlesVariant ); } - $l = count( $titlesAttrs ); + // Then add variants of links to link batch - for ( $i = 0; $i < $l; $i ++ ) { + $parentTitle = $this->parent->getTitle(); + foreach ( $titlesAttrs as $i => $attrs ) { + list( $index, $title ) = $attrs; + $ns = $title->getNamespace(); + $text = $title->getText(); + foreach ( $allVariantsName as $variantName ) { $textVariant = $titlesAllVariants[$variantName][$i]; - if ( $textVariant != $titlesAttrs[$i]['titleText'] ) { - $variantTitle = Title::makeTitle( $titlesAttrs[$i]['ns'], $textVariant ); - if ( is_null( $variantTitle ) ) { - continue; - } - $linkBatch->addObj( $variantTitle ); - $variantMap[$variantTitle->getPrefixedDBkey()][] = $titlesAttrs[$i]['key']; + if ( $textVariant === $text ) { + continue; + } + + $variantTitle = Title::makeTitle( $ns, $textVariant ); + if ( is_null( $variantTitle ) ) { + continue; + } + + // Self-link checking for mixed/different variant titles. At this point, we + // already know the exact title does not exist, so the link cannot be to a + // variant of the current title that exists as a separate page. + if ( $variantTitle->equals( $parentTitle ) && $title->getFragment() === '' ) { + $this->internals[$ns][$index]['selflink'] = true; + continue 2; } + + $linkBatch->addObj( $variantTitle ); + $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index"; } } diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 7e939eeaad..5e8620968e 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -2113,16 +2113,12 @@ class Parser { } } - # Self-link checking - if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { - if ( $nt->equals( $this->mTitle ) || ( !$nt->isKnown() && in_array( - $this->mTitle->getPrefixedText(), - $this->getConverterLanguage()->autoConvertToAllVariants( $nt->getPrefixedText() ), - true - ) ) ) { - $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); - continue; - } + # Self-link checking. For some languages, variants of the title are checked in + # LinkHolderArray::doVariants() to allow batching the existence checks necessary + # for linking to a different variant. + if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && $nt->getFragment() === '' ) { + $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); + continue; } # NS_MEDIA is a pseudo-namespace for linking directly to a file diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 6ae5fb429b..3882185237 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -14438,6 +14438,17 @@ title=[[Duna]] language=sr

!! end +!! test +Link to a section of a variant of this title shouldn't be parsed as self-link +!! options +title=[[Duna]] language=sr +!! input +[[Dуна]] is a self-link while [[Dunа#Foo]] and [[Dуна#Foo]] are not self-links. +!! result +

Dуна is a self-link while Dunа#Foo and Dуна#Foo are not self-links. +

+!! end + !! test Link to pages in language variants !! options -- 2.20.1