From 13cc94a6c0c1d15bfaa59e820e86365bb68e4bd3 Mon Sep 17 00:00:00 2001
From: Conrad Irwin <conrad@users.mediawiki.org>
Date: Sat, 6 Feb 2010 15:00:45 +0000
Subject: [PATCH] Allow pipe trick to work after PST.

Fixes bug 4099, bug 8785, partially bug 16714, bug 2700.
---
 RELEASE-NOTES               |   2 +
 includes/parser/Parser.php  | 146 +++++++++++++++++++++++++++---------
 maintenance/parserTests.txt |  59 +++++++++++++++
 3 files changed, 170 insertions(+), 37 deletions(-)
diff --git a/RELEASE-NOTES b/RELEASE-NOTES
index b2e221bbcd..c8021619c1 100644
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -830,6 +830,8 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * (bug 20809) Expose EditFormPreloadText via the API
 * (bug 18427) Comment (edit summary) parser option for API
 * (bug 5210)  preload parser should parse <noinclude> (as well as <includeonly>)
+* (bug 8785)  Pipe trick should work with colon functions 
+* (bug 4099)  Pipe trick doesn't work when emptiness is only provided by empty template parameter 
 
 === Languages updated in 1.16 ===
 
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index cac07a1de6..fa8b8364c2 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -1511,7 +1511,7 @@ class Parser
 		if ( !$tc ) {
 			$tc = Title::legalChars() . '#%';
 			# Match a link having the form [[namespace:link|alternate]]trail
-			$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
+			$e1 = "/^([{$tc}]*)(\\|.*?)?]](.*)\$/sD";
 			# Match cases where there is no "]]", which might still be images
 			$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
 		}
@@ -1591,7 +1591,15 @@ class Parser
 
 			wfProfileIn( __METHOD__."-e1" );
 			if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
-				$text = $m[2];
+
+				if( $m[2] === '' ) {
+					$text = '';
+				} elseif( $m[2] === '|' ) { 
+					$text = $this->getPipeTrickText( $m[1] );
+				} else {
+					$text = substr( $m[2], 1 );
+				}
+
 				# If we get a ] at the beginning of $m[3] that means we have a link that's something like:
 				# [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
 				# the real problem is with the $e1 regex
@@ -1608,18 +1616,20 @@ class Parser
 					$text .= ']'; # so that replaceExternalLinks($text) works later
 					$m[3] = substr( $m[3], 1 );
 				}
+
+				# Handle pipe-trick for [[|<blah>]]
+				$lnk = $m[1] === '' ? $this->getPipeTrickLink( $text ) : $m[1];
 				# fix up urlencoded title texts
-				if( strpos( $m[1], '%' ) !== false ) {
+				if( strpos( $lnk, '%' ) !== false ) {
 					# Should anchors '#' also be rejected?
-					$m[1] = str_replace( array('<', '>'), array('&lt;', '&gt;'), urldecode($m[1]) );
+					$lnk = str_replace( array('<', '>'), array('&lt;', '&gt;'), urldecode($lnk) );
 				}
+
 				$trail = $m[3];
 			} elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption
 				$might_be_img = true;
 				$text = $m[2];
-				if ( strpos( $m[1], '%' ) !== false ) {
-					$m[1] = urldecode($m[1]);
-				}
+				$lnk = strpos( $m[1], '%' ) === false ? $m[1] : urldecode( $m[1] );
 				$trail = "";
 			} else { # Invalid form; output directly
 				$s .= $prefix . '[[' . $line ;
@@ -1632,7 +1642,7 @@ class Parser
 			# Don't allow internal links to pages containing
 			# PROTO: where PROTO is a valid URL protocol; these
 			# should be external links.
-			if ( preg_match( '/^\b(?:' . wfUrlProtocols() . ')/', $m[1] ) ) {
+			if ( preg_match( '/^\b(?:' . wfUrlProtocols() . ')/', $lnk ) ) {
 				$s .= $prefix . '[[' . $line ;
 				wfProfileOut( __METHOD__."-misc" );
 				continue;
@@ -1640,12 +1650,12 @@ class Parser
 
 			# Make subpage if necessary
 			if ( $useSubpages ) {
-				$link = $this->maybeDoSubpageLink( $m[1], $text );
+				$link = $this->maybeDoSubpageLink( $lnk, $text );
 			} else {
-				$link = $m[1];
+				$link = $lnk;
 			}
 
-			$noforce = (substr( $m[1], 0, 1 ) !== ':');
+			$noforce = (substr( $lnk, 0, 1 ) !== ':');
 			if (!$noforce) {
 				# Strip off leading ':'
 				$link = substr( $link, 1 );
@@ -1893,6 +1903,71 @@ class Parser
 		return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
 	}
 
+	/**
+	 * Returns valid title characters and namespace characters for pipe trick.
+	 *
+	 * FIXME: the namespace characters should not be specified like this...
+	 */
+	static function getPipeTrickCharacterClasses() {
+		global $wgLegalTitleChars;
+		return  array( "[$wgLegalTitleChars]", '[ _0-9A-Za-z\x80-\xff-]' );
+	}
+
+	/**
+	 * From the [[title|]] return link-text as though the used typed [[title|link-text]]
+	 *
+	 * For most links this be as though the user typed [[ns:title|title]]
+	 * However [[ns:title (context)]], [[ns:title, context]] and [[ns:title (context), context]]
+	 * all return the |title]] with no context or indicative punctuation.
+	 */
+	function getPipeTrickText( $link ) {
+		static $rexps = FALSE;
+		if( !$rexps ) {
+			list( $tc, $nc ) = Parser::getPipeTrickCharacterClasses();
+			$rexps = array (
+				# try this first, to turn "[[A, B (C)|]]" into "A, B"
+			        "/^(:?$nc+:|:|)($tc+?)( \\($tc+\\)| ï¼$tc+ï¼)$/", # [[ns:page (context)|]]
+				"/^(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)$/"  # [[ns:page (context), context|]]
+			);  
+		}
+		$text = urldecode( $link );
+
+		for( $i = 0; $i < count( $rexps ); $i++) {
+			if( preg_match( $rexps[$i], $text, $m ) ) 
+				return $m[2];
+		}
+		return $text;
+	}
+
+	/**
+	 * From the [[|link-text]] return the title as though the user typed [[title|link-text]]
+	 *
+	 * On most pages this will return link-text or "" if the link-text is not a valid title
+	 * On pages like [[ns:title (context)]] and [[ns:title, context]] it will act like
+	 * [[ns:link-text (context)|link-text]] and [[ns:link-text, context|link-text]]
+	 */
+	function getPipeTrickLink( $text ) {
+		static $rexps = FALSE, $tc;
+		if( !$rexps ) {
+			list( $tc, $nc ) = Parser::getPipeTrickCharacterClasses();
+			$rexps = array (
+				"/^($nc+:|)$tc+?( \\($tc+\\))$/", # [[ns:page (context)]]
+				"/^($nc+:|)$tc+?(, $tc+|)$/"      # [[ns:page, context]]
+			);
+		}
+
+		if( !preg_match( "/^$tc+$/", $text ) )
+			return '';
+
+		$t = $this->mTitle->getText();
+
+		for( $i = 0; $i < count( $rexps ); $i++) {
+			if( preg_match( $rexps[$i], $t, $m ) )
+				return "$m[1]$text$m[2]";
+		}
+		return $text;
+	}
+
 	/**#@+
 	 * Used by doBlockLevels()
 	 * @private
@@ -3986,32 +4061,10 @@ class Parser
 			'~~~' => $sigText
 		) );
 
-		# Context links: [[|name]] and [[name (context)|]]
-		#
-		global $wgLegalTitleChars;
-		$tc = "[$wgLegalTitleChars]";
-		$nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
-
-		$p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/";		# [[ns:page (context)|]]
-		$p4 = "/\[\[(:?$nc+:|:|)($tc+?)(ï¼$tc+ï¼)\\|]]/";		# [[ns:pageï¼contextï¼|]]
-		$p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/";	# [[ns:page (context), context|]]
-		$p2 = "/\[\[\\|($tc+)]]/";					# [[|page]]
-
-		# try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
-		$text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
-		$text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
-		$text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
-
-		$t = $this->mTitle->getText();
-		$m = array();
-		if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
-			$text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
-		} elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
-			$text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
-		} else {
-			# if there's no context, don't bother duplicating the title
-			$text = preg_replace( $p2, '[[\\1]]', $text );
-		}
+		# Links of the form [[|<blah>]] or [[<blah>|]] perform pipe tricks
+		list( $tc, $nc ) = Parser::getPipeTrickCharacterClasses();
+		$pipeTrickRe = "/\[\[(?:(\\|)($tc+)|($tc+)\\|)\]\]/";
+		$text = preg_replace_callback( $pipeTrickRe, array( $this, 'pstPipeTrickCallback' ), $text);
 
 		# Trim trailing whitespace
 		$text = rtrim( $text );
@@ -4019,6 +4072,25 @@ class Parser
 		return $text;
 	}
 
+	/**
+	 * Called from pstPass2 to perform the pipe trick on links.
+	 * Original was either [[|text]] or [[link|]]
+	 *
+	 * @param Array ("|" or "", text, link) $m
+	 */
+	function pstPipeTrickCallback($m)
+	{
+		if( $m[1] ) { # [[|<blah>]]
+			$text = $m[2];
+			$link = $this->getPipeTrickLink( $text );
+		} else { # [[<blah>|]]
+			$link = $m[3];
+			$text = $this->getPipeTrickText( $link );
+		}
+
+		return $link === $text ? "[[$link]]" : "[[$link|$text]]";
+	}
+
 	/**
 	 * Fetch the user's signature text, if any, and normalize to
 	 * validated, ready-to-insert wikitext.
diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt
index 21e9da3e68..12615e01eb 100644
--- a/maintenance/parserTests.txt
+++ b/maintenance/parserTests.txt
@@ -3112,6 +3112,65 @@ pst title=[[Ns:Somearticle (IGNORED), Context]]
 [[Ns:Article, Context|Article]]
 !! end
 
+!! test
+pre-save transform: context links ("pipe trick") with url escaped page names
+!! options
+pst
+!! input
+[[Hello wo%52ld|]]
+[[Hello wo%52ld (again)|]]
+!! result
+[[Hello wo%52ld|Hello woRld]]
+[[Hello wo%52ld (again)|Hello woRld]]
+!! end
+
+!! test
+pre-save transform: context links ("pipe trick") with variables are not pre-empted
+!! options
+pst title=[[Test (page)]]
+!! input
+[[{{{1|}}}|]]
+[[|{{{1|}}}]]
+[[{{subst:PAGENAME}}|]]
+!! result
+[[{{{1|}}}|]]
+[[|{{{1|}}}]]
+[[Test (page)|Test]]
+!! end
+
+!! article
+Template:pipetest
+!! text
+[[{{{1}}}|]]
+!! endarticle
+
+!! article
+Template:testpipe
+!! text
+[[|{{{1}}}]]
+!! endarticle
+
+!! test
+("pipe trick") should work outside PST
+!!options
+title=[[Help:hello (world)]]
+!! input 
+{{pipetest|hi (world)}}
+{{pipetest|hi (world), world}}
+{{pipetest|Help:hi (world), world}}
+{{pipetest|:Help:hi (world), world}}
+{{testpipe|hi}}
+[[{{PAGENAME}}|]]
+!! result
+<p><a href="/index.php?title=Hi_(world)&amp;action=edit&amp;redlink=1" class="new" title="Hi (world) (page does not exist)">hi</a>
+<a href="/index.php?title=Hi_(world),_world&amp;action=edit&amp;redlink=1" class="new" title="Hi (world), world (page does not exist)">hi</a>
+<a href="/index.php?title=Help:Hi_(world),_world&amp;action=edit&amp;redlink=1" class="new" title="Help:Hi (world), world (page does not exist)">hi</a>
+<a href="/index.php?title=Help:Hi_(world),_world&amp;action=edit&amp;redlink=1" class="new" title="Help:Hi (world), world (page does not exist)">hi</a>
+<a href="/index.php?title=Hi_(world)&amp;action=edit&amp;redlink=1" class="new" title="Hi (world) (page does not exist)">hi</a>
+<a href="/index.php?title=Hello_(world)&amp;action=edit&amp;redlink=1" class="new" title="Hello (world) (page does not exist)">Hello</a>
+</p>
+!! end
+
 
 ###
 ### Message transform tests
-- 
2.20.1