From 6f07389ef2eba048c17c7e343fb8fbb026a4dbd6 Mon Sep 17 00:00:00 2001 From: Roan Kattouw Date: Wed, 22 Nov 2017 18:06:21 -0500 Subject: [PATCH] Parser: Add guessSectionNameFromStrippedText() and refactor Split up guessSectionNameFromWikiText() into pieces to reduce code duplication, and provide guessSectionNameFromStrippedText() which doesn't do link stripping. Really these should be named guessSection*ANCHOR*From... because they return an anchor (with encoding and a '#' prefix) instead of a section name, but I didn't want to rename the existing one. Also make normalizeSectionName static (it doesn't use $this) so that guessSectionNameFromStrippedText() can be static as well. Change-Id: I56b9dda805a51517549c5ed709f4bd747ca04577 --- includes/parser/Parser.php | 63 ++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 3548da9581..87d76fa949 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -4207,7 +4207,7 @@ class Parser { # Decode HTML entities $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline ); - $safeHeadline = $this->normalizeSectionName( $safeHeadline ); + $safeHeadline = self::normalizeSectionName( $safeHeadline ); $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK ); $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline ); @@ -5756,23 +5756,42 @@ class Parser { return $this->mDefaultSort; } + private static function getSectionNameFromStrippedText( $text ) { + $text = Sanitizer::normalizeSectionNameWhitespace( $text ); + $text = Sanitizer::decodeCharReferences( $text ); + $text = self::normalizeSectionName( $text ); + return $text; + } + + private static function makeAnchor( $sectionName ) { + return '#' . Sanitizer::escapeIdForLink( $sectionName ); + } + + private static function makeLegacyAnchor( $sectionName ) { + global $wgFragmentMode; + if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) { + // ForAttribute() and ForLink() are the same for legacy encoding + $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK ); + } else { + $id = Sanitizer::escapeIdForLink( $text ); + } + + return "#$id"; + } + /** * Try to guess the section anchor name based on a wikitext fragment * presumably extracted from a heading, for example "Header" from * "== Header ==". * * @param string $text - * - * @return string + * @return string Anchor (starting with '#') */ public function guessSectionNameFromWikiText( $text ) { # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); - $text = Sanitizer::normalizeSectionNameWhitespace( $text ); - $text = Sanitizer::decodeCharReferences( $text ); - $text = $this->normalizeSectionName( $text ); - - return '#' . Sanitizer::escapeIdForLink( $text ); + $sectionName = self::getSectionNameFromStrippedText( $text ); + return self::makeAnchor( $sectionName ); } /** @@ -5782,25 +5801,23 @@ class Parser { * than UTF-8, resulting in breakage. * * @param string $text The section name - * @return string An anchor + * @return string Anchor (starting with '#') */ public function guessLegacySectionNameFromWikiText( $text ) { - global $wgFragmentMode; - # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); - $text = Sanitizer::normalizeSectionNameWhitespace( $text ); - $text = Sanitizer::decodeCharReferences( $text ); - $text = $this->normalizeSectionName( $text ); - - if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) { - // ForAttribute() and ForLink() are the same for legacy encoding - $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK ); - } else { - $id = Sanitizer::escapeIdForLink( $text ); - } + $sectionName = self::getSectionNameFromStrippedText( $text ); + return self::makeLegacyAnchor( $sectionName ); + } - return "#$id"; + /** + * Like guessSectionNameFromWikiText(), but takes already-stripped text as input. + * @param string $text Section name (plain text) + * @return string Anchor (starting with '#') + */ + public static function guessSectionNameFromStrippedText( $text ) { + $sectionName = self::getSectionNameFromStrippedText( $text ); + return self::makeAnchor( $sectionName ); } /** @@ -5809,7 +5826,7 @@ class Parser { * @param string $text * @return string */ - private function normalizeSectionName( $text ) { + private static function normalizeSectionName( $text ) { # T90902: ensure the same normalization is applied for IDs as to links $titleParser = MediaWikiServices::getInstance()->getTitleParser(); try { -- 2.20.1