Parser::replaceInternalLinks: fix batching
authorKevin Israel <pleasestand@live.com>
Tue, 15 Oct 2013 13:42:48 +0000 (09:42 -0400)
committerTim Starling <tstarling@wikimedia.org>
Wed, 23 Oct 2013 05:48:24 +0000 (05:48 +0000)
The parser unnecessarily made individual checks for existence of
pages that were neither in LinkCache nor linked only with a fragment.
A Title::isKnown() call in Parser::replaceInternalLinks2() (added in
bca8b8ad7d2f) caused this.

Title::isKnown() was used to avoid treating a link to a distinct page
as a self-link even when the title happened to match one of the variants
returned by Language::autoConvertToAllVariants(). This change fixes
the bug by moving the problematic portion of the self-link check into
LinkHolderArray::doVariants().

Change-Id: I586e11e8b47308980ea04087ebc4246c397a8f53

includes/parser/LinkHolderArray.php
includes/parser/Parser.php
tests/parser/parserTests.txt

index 6fc457d..4600402 100644 (file)
@@ -377,6 +377,10 @@ class LinkHolderArray {
                                $key = "$ns:$index";
                                $searchkey = "<!--LINK $key-->";
                                $displayText = $entry['text'];
+                               if ( isset( $entry['selflink'] ) ) {
+                                       $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
+                                       continue;
+                               }
                                if ( $displayText === '' ) {
                                        $displayText = null;
                                }
@@ -455,20 +459,17 @@ class LinkHolderArray {
                // single string to all variants. This would improve parser's performance
                // significantly.
                foreach ( $this->internals as $ns => $entries ) {
+                       if ( $ns == NS_SPECIAL ) {
+                               continue;
+                       }
                        foreach ( $entries as $index => $entry ) {
                                $pdbk = $entry['pdbk'];
                                // we only deal with new links (in its first query)
                                if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
-                                       $title = $entry['title'];
-                                       $titleText = $title->getText();
-                                       $titlesAttrs[] = array(
-                                               'ns' => $ns,
-                                               'key' => "$ns:$index",
-                                               'titleText' => $titleText,
-                                       );
+                                       $titlesAttrs[] = array( $index, $entry['title'] );
                                        // separate titles with \0 because it would never appears
                                        // in a valid title
-                                       $titlesToBeConverted .= $titleText . "\0";
+                                       $titlesToBeConverted .= $entry['title']->getText() . "\0";
                                }
                        }
                }
@@ -479,19 +480,35 @@ class LinkHolderArray {
                foreach ( $titlesAllVariants as &$titlesVariant ) {
                        $titlesVariant = explode( "\0", $titlesVariant );
                }
-               $l = count( $titlesAttrs );
+
                // Then add variants of links to link batch
-               for ( $i = 0; $i < $l; $i ++ ) {
+               $parentTitle = $this->parent->getTitle();
+               foreach ( $titlesAttrs as $i => $attrs ) {
+                       list( $index, $title ) = $attrs;
+                       $ns = $title->getNamespace();
+                       $text = $title->getText();
+
                        foreach ( $allVariantsName as $variantName ) {
                                $textVariant = $titlesAllVariants[$variantName][$i];
-                               if ( $textVariant != $titlesAttrs[$i]['titleText'] ) {
-                                       $variantTitle = Title::makeTitle( $titlesAttrs[$i]['ns'], $textVariant );
-                                       if ( is_null( $variantTitle ) ) {
-                                               continue;
-                                       }
-                                       $linkBatch->addObj( $variantTitle );
-                                       $variantMap[$variantTitle->getPrefixedDBkey()][] = $titlesAttrs[$i]['key'];
+                               if ( $textVariant === $text ) {
+                                       continue;
+                               }
+
+                               $variantTitle = Title::makeTitle( $ns, $textVariant );
+                               if ( is_null( $variantTitle ) ) {
+                                       continue;
+                               }
+
+                               // Self-link checking for mixed/different variant titles. At this point, we
+                               // already know the exact title does not exist, so the link cannot be to a
+                               // variant of the current title that exists as a separate page.
+                               if ( $variantTitle->equals( $parentTitle ) && $title->getFragment() === '' ) {
+                                       $this->internals[$ns][$index]['selflink'] = true;
+                                       continue 2;
                                }
+
+                               $linkBatch->addObj( $variantTitle );
+                               $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index";
                        }
                }
 
index 7e939ee..5e86209 100644 (file)
@@ -2113,16 +2113,12 @@ class Parser {
                                }
                        }
 
-                       # Self-link checking
-                       if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) {
-                               if ( $nt->equals( $this->mTitle ) || ( !$nt->isKnown() && in_array(
-                                       $this->mTitle->getPrefixedText(),
-                                       $this->getConverterLanguage()->autoConvertToAllVariants( $nt->getPrefixedText() ),
-                                       true
-                               ) ) ) {
-                                       $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
-                                       continue;
-                               }
+                       # Self-link checking. For some languages, variants of the title are checked in
+                       # LinkHolderArray::doVariants() to allow batching the existence checks necessary
+                       # for linking to a different variant.
+                       if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && $nt->getFragment() === '' ) {
+                               $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
+                               continue;
                        }
 
                        # NS_MEDIA is a pseudo-namespace for linking directly to a file
index 6ae5fb4..3882185 100644 (file)
@@ -14438,6 +14438,17 @@ title=[[Duna]] language=sr
 </p>
 !! end
 
+!! test
+Link to a section of a variant of this title shouldn't be parsed as self-link
+!! options
+title=[[Duna]] language=sr
+!! input
+[[Dуна]] is a self-link while [[Dunа#Foo]] and [[Dуна#Foo]] are not self-links.
+!! result
+<p><strong class="selflink">Dуна</strong> is a self-link while <a href="/wiki/%D0%94%D1%83%D0%BD%D0%B0" title="Дуна">Dunа#Foo</a> and <a href="/wiki/%D0%94%D1%83%D0%BD%D0%B0" title="Дуна">Dуна#Foo</a> are not self-links.
+</p>
+!! end
+
 !! test
 Link to pages in language variants
 !! options