From: Roan Kattouw Date: Wed, 22 Nov 2017 23:06:21 +0000 (-0500) Subject: Parser: Add guessSectionNameFromStrippedText() and refactor X-Git-Tag: 1.31.0-rc.0~1226^2 X-Git-Url: http://git.cyclocoop.org/%27.parametre_url%28%20%20%20generer_action_auteur%28%27charger_plugin%27%2C%20%27update_flux%27%29%2C%27update_flux%27%2C%20%27oui%27%29.%27?a=commitdiff_plain;h=6f07389ef2eb;p=lhc%2Fweb%2Fwiklou.git Parser: Add guessSectionNameFromStrippedText() and refactor Split up guessSectionNameFromWikiText() into pieces to reduce code duplication, and provide guessSectionNameFromStrippedText() which doesn't do link stripping. Really these should be named guessSection*ANCHOR*From... because they return an anchor (with encoding and a '#' prefix) instead of a section name, but I didn't want to rename the existing one. Also make normalizeSectionName static (it doesn't use $this) so that guessSectionNameFromStrippedText() can be static as well. Change-Id: I56b9dda805a51517549c5ed709f4bd747ca04577 --- diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 3548da9581..87d76fa949 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -4207,7 +4207,7 @@ class Parser { # Decode HTML entities $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline ); - $safeHeadline = $this->normalizeSectionName( $safeHeadline ); + $safeHeadline = self::normalizeSectionName( $safeHeadline ); $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK ); $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline ); @@ -5756,23 +5756,42 @@ class Parser { return $this->mDefaultSort; } + private static function getSectionNameFromStrippedText( $text ) { + $text = Sanitizer::normalizeSectionNameWhitespace( $text ); + $text = Sanitizer::decodeCharReferences( $text ); + $text = self::normalizeSectionName( $text ); + return $text; + } + + private static function makeAnchor( $sectionName ) { + return '#' . Sanitizer::escapeIdForLink( $sectionName ); + } + + private static function makeLegacyAnchor( $sectionName ) { + global $wgFragmentMode; + if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) { + // ForAttribute() and ForLink() are the same for legacy encoding + $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK ); + } else { + $id = Sanitizer::escapeIdForLink( $text ); + } + + return "#$id"; + } + /** * Try to guess the section anchor name based on a wikitext fragment * presumably extracted from a heading, for example "Header" from * "== Header ==". * * @param string $text - * - * @return string + * @return string Anchor (starting with '#') */ public function guessSectionNameFromWikiText( $text ) { # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); - $text = Sanitizer::normalizeSectionNameWhitespace( $text ); - $text = Sanitizer::decodeCharReferences( $text ); - $text = $this->normalizeSectionName( $text ); - - return '#' . Sanitizer::escapeIdForLink( $text ); + $sectionName = self::getSectionNameFromStrippedText( $text ); + return self::makeAnchor( $sectionName ); } /** @@ -5782,25 +5801,23 @@ class Parser { * than UTF-8, resulting in breakage. * * @param string $text The section name - * @return string An anchor + * @return string Anchor (starting with '#') */ public function guessLegacySectionNameFromWikiText( $text ) { - global $wgFragmentMode; - # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); - $text = Sanitizer::normalizeSectionNameWhitespace( $text ); - $text = Sanitizer::decodeCharReferences( $text ); - $text = $this->normalizeSectionName( $text ); - - if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) { - // ForAttribute() and ForLink() are the same for legacy encoding - $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK ); - } else { - $id = Sanitizer::escapeIdForLink( $text ); - } + $sectionName = self::getSectionNameFromStrippedText( $text ); + return self::makeLegacyAnchor( $sectionName ); + } - return "#$id"; + /** + * Like guessSectionNameFromWikiText(), but takes already-stripped text as input. + * @param string $text Section name (plain text) + * @return string Anchor (starting with '#') + */ + public static function guessSectionNameFromStrippedText( $text ) { + $sectionName = self::getSectionNameFromStrippedText( $text ); + return self::makeAnchor( $sectionName ); } /** @@ -5809,7 +5826,7 @@ class Parser { * @param string $text * @return string */ - private function normalizeSectionName( $text ) { + private static function normalizeSectionName( $text ) { # T90902: ensure the same normalization is applied for IDs as to links $titleParser = MediaWikiServices::getInstance()->getTitleParser(); try {