From: Aryeh Gregor Date: Wed, 7 Jan 2009 00:41:03 +0000 (+0000) Subject: Reduce code duplication correctly this time X-Git-Tag: 1.31.0-rc.0~43539 X-Git-Url: https://git.cyclocoop.org/%7B%24admin_url%7Dmembres/modifier.php?a=commitdiff_plain;h=da5d1b714fce037a9911f72671e947db8a5c1e7b;p=lhc%2Fweb%2Fwiklou.git Reduce code duplication correctly this time This reverts r45470 and fixes the problems it identified. Things should now work as they always used to, but with less code duplication, and with $wgEnforceHtmlIds = false working correctly as well. --- diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 7fcfb90a35..1a4901c392 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -3448,7 +3448,7 @@ class Parser * @private */ function formatHeadings( $text, $isMain=true ) { - global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds; + global $wgMaxTocLevel, $wgContLang; $doNumberHeadings = $this->mOptions->getNumberHeadings(); $showEditLink = $this->mOptions->getEditSection(); @@ -3593,71 +3593,17 @@ class Parser } } - # The safe header is a version of the header text safe to use for links - # Avoid insertion of weird stuff like by expanding the relevant sections - $safeHeadline = $this->mStripState->unstripBoth( $headline ); - - # Remove link placeholders by the link text. - # - # turns into - # link text with suffix - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); - - # Strip out HTML (other than plain and : bug 8393) - $tocline = preg_replace( - array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ), - array( '', '<$1>'), - $safeHeadline - ); - $tocline = trim( $tocline ); - - # For the anchor, strip out HTML-y stuff period - $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); - $safeHeadline = trim( $safeHeadline ); - - # Save headline for section edit hint before it's escaped - $headlineHint = $safeHeadline; - - if ( $wgEnforceHtmlIds ) { - $legacyHeadline = false; - $safeHeadline = Sanitizer::escapeId( $safeHeadline, - 'noninitial' ); - } else { - # For reverse compatibility, provide an id that's - # HTML4-compatible, like we used to. - # - # It may be worth noting, academically, that it's possible for - # the legacy anchor to conflict with a non-legacy headline - # anchor on the page. In this case likely the "correct" thing - # would be to either drop the legacy anchors or make sure - # they're numbered first. However, this would require people - # to type in section names like "abc_.D7.93.D7.90.D7.A4" - # manually, so let's not bother worrying about it. - $legacyHeadline = Sanitizer::escapeId( $safeHeadline, - 'noninitial' ); - $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' ); - - if ( $legacyHeadline == $safeHeadline ) { - # No reason to have both (in fact, we can't) - $legacyHeadline = false; - } elseif ( $legacyHeadline != Sanitizer::escapeId( - $legacyHeadline, 'xml' ) ) { - # The legacy id is invalid XML. We used to allow this, but - # there's no reason to do so anymore. Backward - # compatibility will fail slightly in this case, but it's - # no big deal. - $legacyHeadline = false; - } - } + list( $anchor, $legacyAnchor, $tocline, $headlineHint ) = + $this->processHeadingText( $headline ); # HTML names must be case-insensitively unique (bug 10721). FIXME: # Does this apply to Unicode characters? Because we aren't # handling those here. - $arrayKey = strtolower( $safeHeadline ); - if ( $legacyHeadline === false ) { + $arrayKey = strtolower( $anchor ); + if ( $legacyAnchor === false ) { $legacyArrayKey = false; } else { - $legacyArrayKey = strtolower( $legacyHeadline ); + $legacyArrayKey = strtolower( $legacyAnchor ); } # count how many in assoc. array so we can track dupes in anchors @@ -3679,12 +3625,10 @@ class Parser } # Create the anchor for linking from the TOC to the section - $anchor = $safeHeadline; - $legacyAnchor = $legacyHeadline; if ( $refers[$arrayKey] > 1 ) { $anchor .= '_' . $refers[$arrayKey]; } - if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) { + if ( $legacyAnchor !== false && $refers[$legacyArrayKey] > 1 ) { $legacyAnchor .= '_' . $refers[$legacyArrayKey]; } if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { @@ -3756,6 +3700,70 @@ class Parser } } + private function processHeadingText( $headline ) { + global $wgEnforceHtmlIds; + + # The safe header is a version of the header text safe to use for links + # Avoid insertion of weird stuff like by expanding the relevant sections + $safeHeadline = $this->mStripState->unstripBoth( $headline ); + + # Remove link placeholders by the link text. + # + # turns into + # link text with suffix + $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); + + # Strip out HTML (other than plain and : bug 8393) + $tocline = preg_replace( + array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ), + array( '', '<$1>'), + $safeHeadline + ); + $tocline = trim( $tocline ); + + # For the anchor, strip out HTML-y stuff period + $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); + $safeHeadline = trim( $safeHeadline ); + + # Save headline for section edit hint before it's escaped + $headlineHint = $safeHeadline; + + if ( $wgEnforceHtmlIds ) { + $legacyHeadline = false; + $safeHeadline = Sanitizer::escapeId( $safeHeadline, + 'noninitial' ); + } else { + # For reverse compatibility, provide an id that's + # HTML4-compatible, like we used to. + # + # It may be worth noting, academically, that it's possible for + # the legacy anchor to conflict with a non-legacy headline + # anchor on the page. In this case likely the "correct" thing + # would be to either drop the legacy anchors or make sure + # they're numbered first. However, this would require people + # to type in section names like "abc_.D7.93.D7.90.D7.A4" + # manually, so let's not bother worrying about it. + $legacyHeadline = Sanitizer::escapeId( $safeHeadline, + 'noninitial' ); + $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' ); + + if ( $legacyHeadline == $safeHeadline ) { + # No reason to have both (in fact, we can't) + $legacyHeadline = false; + } elseif ( $legacyHeadline != Sanitizer::escapeId( + $legacyHeadline, 'xml' ) ) { + # The legacy id is invalid XML. We used to allow this, but + # there's no reason to do so anymore. Backward + # compatibility will fail slightly in this case, but it's + # no big deal. + $legacyHeadline = false; + } + } + + return array( $safeHeadline, $legacyHeadline, $tocline, + $headlineHint ); + } + /** * Transform wiki markup when saving a page by doing \r\n -> \n * conversion, substitting signatures, {{subst:}} templates, etc. @@ -4736,21 +4744,9 @@ class Parser * "== Header ==". */ public function guessSectionNameFromWikiText( $text ) { - # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); - $headline = Sanitizer::decodeCharReferences( $text ); - # strip out HTML - $headline = StringUtils::delimiterReplace( '<', '>', '', $headline ); - $headline = trim( $headline ); - $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) ); - $replacearray = array( - '%3A' => ':', - '%' => '.' - ); - return str_replace( - array_keys( $replacearray ), - array_values( $replacearray ), - $sectionanchor ); + list( $text, /* unneeded here */ ) = $this->processHeadingText( $text ); + return "#$text"; } /**