From 16047ca2b774ca2e025cc566b0b3c5d20a99f676 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 17 Mar 2006 01:02:14 +0000 Subject: [PATCH] * Don't URL-decode in the title attribute for URL links; it can produce false results that don't code back to their original values. --- RELEASE-NOTES | 6 ++++ includes/Linker.php | 13 ++++++++- includes/Parser.php | 22 +++++++++------ maintenance/parserTests.txt | 56 +++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 10 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 8f6a4e106b..0d9e3d60d9 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -695,6 +695,12 @@ fully support the editing toolbar, but was found to be too confusing. * (bug 5236) Load wikibits.js before site-customized javascript * (bug 4119) Workaround for following link in Walloon; remove capitals from linktrail, as they're not used anywhere else. +* (bug 4781) Output links with the percent-encoding they're supplied with; + save the normalization for internal link storage. The normalization is a bit + buggy and can make incorrect foldings in the query string and such, so isn't + reliable beyond the hostname where it's used for the spam bulk checker. +* Don't URL-decode in the title attribute for URL links; it can produce false + results that don't code back to their original values. === Caveats === diff --git a/includes/Linker.php b/includes/Linker.php index 32b7cb476b..30654572e1 100644 --- a/includes/Linker.php +++ b/includes/Linker.php @@ -30,6 +30,17 @@ class Linker { function getExternalLinkAttributes( $link, $text, $class='' ) { global $wgContLang; + $link = htmlspecialchars( $link ); + + $r = ($class != '') ? " class='$class'" : " class='external'"; + + $r .= " title=\"{$link}\""; + return $r; + } + + function getInterwikiLinkAttributes( $link, $text, $class='' ) { + global $wgContLang; + $same = ($link == $text); $link = urldecode( $link ); $link = $wgContLang->checkTitleEncoding( $link ); @@ -194,7 +205,7 @@ class Linker { $u = $nt->getFullURL(); $link = $nt->getPrefixedURL(); if ( '' == $text ) { $text = $nt->getPrefixedText(); } - $style = $this->getExternalLinkAttributes( $link, $text, 'extiw' ); + $style = $this->getInterwikiLinkAttributes( $link, $text, 'extiw' ); $inside = ''; if ( '' != $trail ) { diff --git a/includes/Parser.php b/includes/Parser.php index 3b5d119f40..195d280e26 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -1132,9 +1132,6 @@ class Parser # Replace & from obsolete syntax with &. # All HTML entities will be escaped by makeExternalLink() $url = str_replace( '&', '&', $url ); - # Replace unnecessary URL escape codes with the referenced character - # This prevents spammers from hiding links from the filters - $url = Parser::replaceUnusualEscapes( $url ); # Process the trail (i.e. everything after this link up until start of the next link), # replacing any non-bracketed links @@ -1146,8 +1143,11 @@ class Parser # This was changed in August 2004 $s .= $sk->makeExternalLink( $url, $text, false, $linktype ) . $dtrail . $trail; - # Register link in the output object - $this->mOutput->addExternalLink( $url ); + # Register link in the output object. + # Replace unnecessary URL escape codes with the referenced character + # This prevents spammers from hiding links from the filters + $pasteurized = Parser::replaceUnusualEscapes( $url ); + $this->mOutput->addExternalLink( $pasteurized ); } wfProfileOut( $fname ); @@ -1203,16 +1203,16 @@ class Parser # All HTML entities will be escaped by makeExternalLink() # or maybeMakeExternalImage() $url = str_replace( '&', '&', $url ); - # Replace unnecessary URL escape codes with their equivalent characters - $url = Parser::replaceUnusualEscapes( $url ); # Is this an external image? $text = $this->maybeMakeExternalImage( $url ); if ( $text === false ) { # Not an image, make a link $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free' ); - # Register it in the output object - $this->mOutput->addExternalLink( $url ); + # Register it in the output object... + # Replace unnecessary URL escape codes with their equivalent characters + $pasteurized = Parser::replaceUnusualEscapes( $url ); + $this->mOutput->addExternalLink( $pasteurized ); } $s .= $text . $trail; } else { @@ -1228,6 +1228,10 @@ class Parser * @param string * @return string * @static + * @fixme This can merge genuinely required bits in the path or query string, + * breaking legit URLs. A proper fix would treat the various parts of + * the URL differently; as a workaround, just use the output for + * statistical records, not for actual linking/output. */ function replaceUnusualEscapes( $url ) { return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index d3125613c7..c0d8f6f716 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -735,6 +735,62 @@ http://www.example.com/?title=Ben-Hur_%281959_film%29 !! end +!! test +Bug 4781: %26 in autonumber URL +!! input +[http://www.example.com/?title=AT%26T] +!! result +

[1] +

+!! end + +!! test +Bug 4781, 5267: %26 in autonumber URL +!! input +[http://www.example.com/?title=100%25_Bran] +!! result +

[1] +

+!! end + +!! test +Bug 4781, 5267: %28, %29 in autonumber URL +!! input +[http://www.example.com/?title=Ben-Hur_%281959_film%29] +!! result +

[1] +

+!! end + + +!! test +Bug 4781: %26 in bracketed URL +!! input +[http://www.example.com/?title=AT%26T link] +!! result +

link +

+!! end + +!! test +Bug 4781, 5267: %26 in bracketed URL +!! input +[http://www.example.com/?title=100%25_Bran link] +!! result +

link +

+!! end + +!! test +Bug 4781, 5267: %28, %29 in bracketed URL +!! input +[http://www.example.com/?title=Ben-Hur_%281959_film%29 link] +!! result +

link +

+!! end + + ### ### Quotes ### -- 2.20.1