array. This allows dependency injection to be used for ResourceLoader modules.
* $wgExceptionHooks has been removed.
* (T45547) $wgUsePigLatinVariant added (off by default).
+* (T152540) MediaWiki now supports a section ID escaping style that allows to display
+ non-Latin characters verbatim on many modern browsers. This is controlled by the
+ new configuration setting, $wgFragmentMode.
+* $wgExperimentalHtmlIds is now deprecated and will be removed in a future version,
+ use $wgFragmentMode to migrate off it to a modern alternative.
+* $wgExternalInterwikiFragmentMode was introduced to control how fragments in
+ sinterwikis going outside of current wiki farm are encoded.
=== New features in 1.30 ===
* (T37247) Output from Parser::parse() will now be wrapped in a div with
MediaWikiServices instead. Access to the underlying BagOStuff is possible
through the new ParserCache::getCacheStorage() method.
* .mw-ui-constructive CSS class (deprecated in 1.27) was removed.
+* Sanitizer::escapeId() was deprecated, use escapeIdForAttribute(),
+ escapeIdForLink() or escapeIdForExternalInterwiki() instead.
+* Title::escapeFragmentForURL() was deprecated, use one of the aforementioned
+ Sanitizer functions or, if possible, Title::getFragmentForURL().
+* Second parameter to Sanitizer::escapeIdReferenceList() ($options) now does
+ nothing and is deprecated.
+* mw.util.escapeId() was deprecated, use escapeIdForAttribute() or
+ escapeIdForLink().
== Compatibility ==
MediaWiki 1.30 requires PHP 5.5.9 or later. There is experimental support for
'ResourceLoaderJqueryMsgModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderJqueryMsgModule.php',
'ResourceLoaderLanguageDataModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderLanguageDataModule.php',
'ResourceLoaderLanguageNamesModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderLanguageNamesModule.php',
+ 'ResourceLoaderMediaWikiUtilModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderMediaWikiUtilModule.php',
'ResourceLoaderModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderModule.php',
'ResourceLoaderOOUIFileModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderOOUIFileModule.php',
'ResourceLoaderOOUIImageModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderOOUIImageModule.php',
$wgDisableOutputCompression = false;
/**
- * Should we allow a broader set of characters in id attributes, per HTML5? If
- * not, use only HTML 4-compatible IDs. This option is for testing -- when the
- * functionality is ready, it will be on by default with no option.
+ * Abandoned experiment with HTML5-style ID escaping. Normalized IDs a bit
+ * too aggressively, breaking preexisting content (particularly Cite).
+ * See T29733, T29694, T29474.
*
- * Currently this appears to work fine in all browsers, but it's disabled by
- * default because it normalizes id's a bit too aggressively, breaking preexisting
- * content (particularly Cite). See T29733, T29694, T29474.
+ * @deprecated since 1.30, use $wgFragmentMode
*/
$wgExperimentalHtmlIds = false;
+/**
+ * How should section IDs be encoded?
+ * This array can contain 1 or 2 elements, each of them can be one of:
+ * - 'html5' is modern HTML5 style encoding with minimal escaping. Allows to
+ * display Unicode characters in many browsers' address bars.
+ * - 'legacy' is old MediaWiki-style encoding, e.g. 啤酒 turns into .E5.95.A4.E9.85.92
+ * - 'html5-legacy' corresponds to DEPRECATED $wgExperimentalHtmlIds mode. DO NOT use
+ * it for anything but migration off that mode (see below).
+ *
+ * The first element of this array specifies the primary mode of escaping IDs. This
+ * is what users will see when they e.g. follow an [[#internal link]] to a section of
+ * a page.
+ *
+ * The optional second element defines a fallback mode, useful for migrations.
+ * If present, it will direct MediaWiki to add empty <span>s to every section with its
+ * id attribute set to fallback encoded title so that links using the previous encoding
+ * would still work.
+ *
+ * Example: you want to migrate your wiki from 'legacy' to 'html5'
+ *
+ * On the first step, set this variable to [ 'legacy', 'html5' ]. After a while, when
+ * all caches (parser, HTTP, etc.) contain only pages generated with this setting,
+ * flip the value to [ 'html5', 'legacy' ]. This will result in all internal links being
+ * generated in the new encoding while old links (both external and cached internal) will
+ * still work. After a long time, you might want to ditch backwards compatibility and
+ * set it to [ 'html5' ]. After all, pages get edited, breaking incoming links no matter which
+ * fragment mode is used.
+ *
+ * @since 1.30
+ */
+$wgFragmentMode = [ 'legacy' ];
+
+/**
+ * Which ID escaping mode should be used for external interwiki links? See documentation
+ * for $wgFragmentMode above for details of each mode. Because you can't control external sites,
+ * this setting should probably always be 'legacy', unless every wiki you link to has converted
+ * to 'html5'.
+ *
+ * @since 1.30
+ */
+$wgExternalInterwikiFragmentMode = 'legacy';
+
/**
* Abstract list of footer icons for skins in place of old copyrightico and poweredbyico code
* You can add new icons to the built in copyright or poweredby, or you can create
global $wgParser;
if ( $this->sectiontitle !== '' ) {
- $sectionanchor = $wgParser->guessLegacySectionNameFromWikiText( $this->sectiontitle );
+ $sectionanchor = $this->guessSectionName( $this->sectiontitle );
// If no edit summary was specified, create one automatically from the section
// title and have it link to the new section. Otherwise, respect the summary as
// passed.
->rawParams( $cleanSectionTitle )->inContentLanguage()->text();
}
} elseif ( $this->summary !== '' ) {
- $sectionanchor = $wgParser->guessLegacySectionNameFromWikiText( $this->summary );
+ $sectionanchor = $this->guessSectionName( $this->summary );
# This is a new section, so create a link to the new section
# in the revision summary.
$cleanSummary = $wgParser->stripSectionName( $this->summary );
* time.
*/
public function internalAttemptSave( &$result, $bot = false ) {
- global $wgUser, $wgRequest, $wgParser, $wgMaxArticleSize;
+ global $wgUser, $wgRequest, $wgMaxArticleSize;
global $wgContentHandlerUseDB;
$status = Status::newGood();
# We can't deal with anchors, includes, html etc in the header for now,
# headline would need to be parsed to improve this.
if ( $hasmatch && strlen( $matches[2] ) > 0 ) {
- $sectionanchor = $wgParser->guessLegacySectionNameFromWikiText( $matches[2] );
+ $sectionanchor = $this->guessSectionName( $matches[2] );
}
}
$result['sectionanchor'] = $sectionanchor;
}
return $wikitext;
}
+
+ /**
+ * Turns section name wikitext into anchors for use in HTTP redirects. Various
+ * versions of Microsoft browsers misinterpret fragment encoding of Location: headers
+ * resulting in mojibake in address bar. Redirect them to legacy section IDs,
+ * if possible. All the other browsers get HTML5 if the wiki is configured for it, to
+ * spread the new style links more efficiently.
+ *
+ * @param string $text
+ * @return string
+ */
+ private function guessSectionName( $text ) {
+ global $wgParser;
+
+ // Detect Microsoft browsers
+ $userAgent = $this->context->getRequest()->getHeader( 'User-Agent' );
+ if ( $userAgent && preg_match( '/MSIE|Edge/', $userAgent ) ) {
+ // ...and redirect them to legacy encoding, if available
+ return $wgParser->guessLegacySectionNameFromWikiText( $text );
+ }
+ // Meanwhile, real browsers get real anchors
+ return $wgParser->guessSectionNameFromWikiText( $text );
+ }
}
* a space and ending with '>'
* This *must* be at least '>' for no attribs
* @param string $anchor The anchor to give the headline (the bit after the #)
- * @param string $html Html for the text of the header
+ * @param string $html HTML for the text of the header
* @param string $link HTML to add for the section edit link
- * @param bool|string $legacyAnchor A second, optional anchor to give for
+ * @param string|bool $fallbackAnchor A second, optional anchor to give for
* backward compatibility (false to omit)
*
* @return string HTML headline
*/
public static function makeHeadline( $level, $attribs, $anchor, $html,
- $link, $legacyAnchor = false
+ $link, $fallbackAnchor = false
) {
+ $anchorEscaped = htmlspecialchars( $anchor );
$ret = "<h$level$attribs"
- . "<span class=\"mw-headline\" id=\"$anchor\">$html</span>"
+ . "<span class=\"mw-headline\" id=\"$anchorEscaped\">$html</span>"
. $link
. "</h$level>";
- if ( $legacyAnchor !== false ) {
- $ret = "<div id=\"$legacyAnchor\"></div>$ret";
+ if ( $fallbackAnchor !== false && $fallbackAnchor !== $anchor ) {
+ $fallbackAnchor = htmlspecialchars( $fallbackAnchor );
+ $ret = "<div id=\"$fallbackAnchor\"></div>$ret";
}
return $ret;
}
const EVIL_URI_PATTERN = '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i';
const XMLNS_ATTRIBUTE_PATTERN = "/^xmlns:[:A-Z_a-z-.0-9]+$/";
+ /**
+ * Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
+ *
+ * @since 1.30
+ */
+ const ID_PRIMARY = 0;
+
+ /**
+ * Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false
+ * if no fallback is configured.
+ *
+ * @since 1.30
+ */
+ const ID_FALLBACK = 1;
+
/**
* List of all named character entities defined in HTML 4.01
* https://www.w3.org/TR/html4/sgml/entities.html
# Escape HTML id attributes
if ( $attribute === 'id' ) {
- $value = self::escapeId( $value, 'noninitial' );
+ $value = self::escapeIdForAttribute( $value, Sanitizer::ID_PRIMARY );
}
# Escape HTML id reference lists
* ambiguous if it's part of something that looks like a percent escape
* (which don't work reliably in fragments cross-browser).
*
+ * @deprecated since 1.30, use one of this class' escapeIdFor*() functions
+ *
* @see https://www.w3.org/TR/html401/types.html#type-name Valid characters
* in the id and name attributes
* @see https://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with
return $id;
}
+ /**
+ * Given a section name or other user-generated or otherwise unsafe string, escapes it to be
+ * a valid HTML id attribute.
+ *
+ * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe,
+ * be sure to use proper escaping.
+ *
+ * @param string $id String to escape
+ * @param int $mode One of ID_* constants, specifying whether the primary or fallback encoding
+ * should be used.
+ * @return string|bool Escaped ID or false if fallback encoding is requested but it's not
+ * configured.
+ *
+ * @since 1.30
+ */
+ public static function escapeIdForAttribute( $id, $mode = self::ID_PRIMARY ) {
+ global $wgFragmentMode;
+
+ if ( !isset( $wgFragmentMode[$mode] ) ) {
+ if ( $mode === self::ID_PRIMARY ) {
+ throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' );
+ }
+ return false;
+ }
+
+ $internalMode = $wgFragmentMode[$mode];
+
+ return self::escapeIdInternal( $id, $internalMode );
+ }
+
+ /**
+ * Given a section name or other user-generated or otherwise unsafe string, escapes it to be
+ * a valid URL fragment.
+ *
+ * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe,
+ * be sure to use proper escaping.
+ *
+ * @param string $id String to escape
+ * @return string Escaped ID
+ *
+ * @since 1.30
+ */
+ public static function escapeIdForLink( $id ) {
+ global $wgFragmentMode;
+
+ if ( !isset( $wgFragmentMode[self::ID_PRIMARY] ) ) {
+ throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' );
+ }
+
+ $mode = $wgFragmentMode[self::ID_PRIMARY];
+
+ $id = self::escapeIdInternal( $id, $mode );
+ $id = self::urlEscapeId( $id, $mode );
+
+ return $id;
+ }
+
+ /**
+ * Given a section name or other user-generated or otherwise unsafe string, escapes it to be
+ * a valid URL fragment for external interwikis.
+ *
+ * @param string $id String to escape
+ * @return string Escaped ID
+ *
+ * @since 1.30
+ */
+ public static function escapeIdForExternalInterwiki( $id ) {
+ global $wgExternalInterwikiFragmentMode;
+
+ $id = self::escapeIdInternal( $id, $wgExternalInterwikiFragmentMode );
+ $id = self::urlEscapeId( $id, $wgExternalInterwikiFragmentMode );
+
+ return $id;
+ }
+
+ /**
+ * Helper for escapeIdFor*() functions. URL-escapes the ID if needed.
+ *
+ * @param string $id String to escape
+ * @param string $mode One of modes from $wgFragmentMode
+ * @return string
+ */
+ private static function urlEscapeId( $id, $mode ) {
+ if ( $mode === 'html5' ) {
+ $id = urlencode( $id );
+ $id = str_replace( '%3A', ':', $id );
+ }
+
+ return $id;
+ }
+
+ /**
+ * Helper for escapeIdFor*() functions. Performs most of the actual escaping.
+ *
+ * @param string $id String to escape
+ * @param string $mode One of modes from $wgFragmentMode
+ * @return string
+ */
+ private static function escapeIdInternal( $id, $mode ) {
+ $id = Sanitizer::decodeCharReferences( $id );
+
+ switch ( $mode ) {
+ case 'html5':
+ $id = str_replace( ' ', '_', $id );
+ break;
+ case 'legacy':
+ // This corresponds to 'noninitial' mode of the old escapeId()
+ static $replace = [
+ '%3A' => ':',
+ '%' => '.'
+ ];
+
+ $id = urlencode( str_replace( ' ', '_', $id ) );
+ $id = strtr( $id, $replace );
+ break;
+ case 'html5-legacy':
+ $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
+ $id = trim( $id, '_' );
+ if ( $id === '' ) {
+ // Must have been all whitespace to start with.
+ $id = '_';
+ }
+ break;
+ default:
+ throw new InvalidArgumentException( "Invalid mode '$mode' passed to '" . __METHOD__ );
+ }
+
+ return $id;
+ }
+
/**
* Given a string containing a space delimited list of ids, escape each id
* to match ids escaped by the escapeId() function.
*
+ * @todo wfDeprecated() uses of $options in 1.31, remove completely in 1.32
+ *
* @since 1.27
*
* @param string $referenceString Space delimited list of ids
- * @param string|array $options String or array of strings (default is array()):
- * 'noninitial': This is a non-initial fragment of an id, not a full id,
- * so don't pay attention if the first character isn't valid at the
- * beginning of an id. Only matters if $wgExperimentalHtmlIds is
- * false.
- * 'legacy': Behave the way the old HTML 4-based ID escaping worked even
- * if $wgExperimentalHtmlIds is used, so we can generate extra
- * anchors and links won't break.
+ * @param string|array $options Deprecated and does nothing.
* @return string
*/
static function escapeIdReferenceList( $referenceString, $options = [] ) {
# Escape each token as an id
foreach ( $references as &$ref ) {
- $ref = self::escapeId( $ref, $options );
+ $ref = self::escapeIdForAttribute( $ref );
}
# Merge the array back to a space delimited list string
}
unset( $repo ); // no global pollution; destroy reference
+// Convert this deprecated setting to modern system
+if ( $wgExperimentalHtmlIds ) {
+ $wgFragmentMode = [ 'html5-legacy', 'legacy' ];
+}
+
$rcMaxAgeDays = $wgRCMaxAge / ( 3600 * 24 );
if ( $wgRCFilterByAge ) {
// Trim down $wgRCLinkDays so that it only lists links which are valid
/**
* Escape a text fragment, say from a link, for a URL
*
+ * @deprecated since 1.30, use Sanitizer::escapeIdForLink() or escapeIdForExternalInterwiki()
+ *
* @param string $fragment Containing a URL or link fragment (after the "#")
* @return string Escaped string
*/
/**
* Get the fragment in URL form, including the "#" character if there is one
+ *
* @return string Fragment in URL form
*/
public function getFragmentForURL() {
if ( !$this->hasFragment() ) {
return '';
- } else {
- return '#' . self::escapeFragmentForURL( $this->getFragment() );
+ } elseif ( $this->isExternal() && !$this->getTransWikiID() ) {
+ return '#' . Sanitizer::escapeIdForExternalInterwiki( $this->getFragment() );
}
+ return '#' . Sanitizer::escapeIdForLink( $this->getFragment() );
}
/**
* @return string The HTML.
*/
protected function makeHeader( $header, $canonicalId ) {
- $spanAttribs = [ 'class' => 'mw-headline', 'id' => Sanitizer::escapeId( $header ) ];
- $h2Attribs = [ 'id' => Sanitizer::escapeId( $canonicalId ) ];
+ $spanAttribs = [ 'class' => 'mw-headline', 'id' => Sanitizer::escapeIdForAttribute( $header ) ];
+ $h2Attribs = [ 'id' => Sanitizer::escapeIdForAttribute( $canonicalId ) ];
return Html::rawElement( 'h2', $h2Attribs, Html::element( 'span', $spanAttribs, $header ) );
}
$header = $this->msg( 'api-help-datatypes-header' )->parse();
- // Add an additional span with sanitized ID
- if ( !$this->getConfig()->get( 'ExperimentalHtmlIds' ) ) {
- $header = Html::element( 'span', [ 'id' => Sanitizer::escapeId( 'main/datatypes' ) ] ) .
- $header;
- }
- $help['datatypes'] .= Html::rawElement( 'h' . min( 6, $level ),
- [ 'id' => 'main/datatypes', 'class' => 'apihelp-header' ],
- $header
+ $id = Sanitizer::escapeIdForAttribute( 'main/datatypes', Sanitizer::ID_PRIMARY );
+ $idFallback = Sanitizer::escapeIdForAttribute( 'main/datatypes', Sanitizer::ID_FALLBACK );
+
+ $help['datatypes'] .= Linker::makeHeadline( min( 6, $level ),
+ ' class="apihelp-header"',
+ $id,
+ $header,
+ '',
+ $idFallback
);
$help['datatypes'] .= $this->msg( 'api-help-datatypes' )->parseAsBlock();
if ( !isset( $tocData['main/datatypes'] ) ) {
];
}
- // Add an additional span with sanitized ID
- if ( !$this->getConfig()->get( 'ExperimentalHtmlIds' ) ) {
- $header = Html::element( 'span', [ 'id' => Sanitizer::escapeId( 'main/credits' ) ] ) .
- $header;
- }
$header = $this->msg( 'api-credits-header' )->parse();
- $help['credits'] .= Html::rawElement( 'h' . min( 6, $level ),
- [ 'id' => 'main/credits', 'class' => 'apihelp-header' ],
- $header
+ $id = Sanitizer::escapeIdForAttribute( 'main/credits', Sanitizer::ID_PRIMARY );
+ $idFallback = Sanitizer::escapeIdForAttribute( 'main/credits', Sanitizer::ID_FALLBACK );
+ $help['credits'] .= Linker::makeHeadline( min( 6, $level ),
+ ' class="apihelp-header"',
+ $id,
+ $header,
+ '',
+ $idFallback
);
$help['credits'] .= $this->msg( 'api-credits' )->useDatabase( false )->parseAsBlock();
if ( !isset( $tocData['main/credits'] ) ) {
$attributes = [];
if ( $fieldsetIDPrefix ) {
- $attributes['id'] = Sanitizer::escapeId( "$fieldsetIDPrefix$key" );
+ $attributes['id'] = Sanitizer::escapeIdForAttribute( "$fieldsetIDPrefix$key" );
}
$subsectionHtml .= $this->wrapFieldSetSection( $legend, $section, $attributes );
} else {
];
if ( $sectionName ) {
- $attribs['id'] = Sanitizer::escapeId( $sectionName );
+ $attribs['id'] = Sanitizer::escapeIdForAttribute( $sectionName );
}
if ( $displayFormat === 'table' ) {
$this->mDir = $params['dir'];
}
- $validName = Sanitizer::escapeId( $this->mName );
- $validName = str_replace( [ '.5B', '.5D' ], [ '[', ']' ], $validName );
+ $validName = urlencode( $this->mName );
+ $validName = str_replace( [ '%5B', '%5D' ], [ '[', ']' ], $validName );
if ( $this->mName != $validName && !isset( $params['nodata'] ) ) {
throw new MWException( "Invalid name '{$this->mName}' passed to " . __METHOD__ );
}
if ( isset( $params['id'] ) ) {
$id = $params['id'];
- $validId = Sanitizer::escapeId( $id );
+ $validId = urlencode( $id );
if ( $id != $validId ) {
throw new MWException( "Invalid id '$id' passed to " . __METHOD__ );
'items' => $fieldsHtml,
];
if ( $sectionName ) {
- $config['id'] = Sanitizer::escapeId( $sectionName );
+ $config['id'] = Sanitizer::escapeIdForAttribute( $sectionName );
}
if ( is_string( $this->mWrapperLegend ) ) {
$config['label'] = $this->mWrapperLegend;
$info['name'] = $name;
}
if ( isset( $info['id'] ) ) {
- $info['id'] = Sanitizer::escapeId( "{$this->mID}--$key--{$info['id']}" );
+ $info['id'] = Sanitizer::escapeIdForAttribute( "{$this->mID}--$key--{$info['id']}" );
} else {
- $info['id'] = Sanitizer::escapeId( "{$this->mID}--$key--$fieldname" );
+ $info['id'] = Sanitizer::escapeIdForAttribute( "{$this->mID}--$key--$fieldname" );
}
// Copy the hide-if rules to "child" fields, so that the JavaScript code handling them
// (resources/src/mediawiki/htmlform/hide-if.js) doesn't have to handle nested fields.
'type' => 'submit',
'formnovalidate' => true,
'name' => $name,
- 'id' => Sanitizer::escapeId( "{$this->mID}--$key--delete" ),
+ 'id' => Sanitizer::escapeIdForAttribute( "{$this->mID}--$key--delete" ),
'cssclass' => 'mw-htmlform-cloner-delete-button',
'default' => $this->getMessage( $label )->text(),
], $this->mParent );
'type' => 'submit',
'formnovalidate' => true,
'name' => $name,
- 'id' => Sanitizer::escapeId( "{$this->mID}--create" ),
+ 'id' => Sanitizer::escapeIdForAttribute( "{$this->mID}--create" ),
'cssclass' => 'mw-htmlform-cloner-create-button',
'default' => $this->getMessage( $label )->text(),
], $this->mParent );
$html .= Html::rawElement( 'h1', [], $label ) . "\n";
$html .= $this->formatOptions( $info, $value );
} else {
- $id = Sanitizer::escapeId( $this->mID . "-$info" );
+ $id = Sanitizer::escapeIdForAttribute( $this->mID . "-$info" );
$classes = [ 'mw-htmlform-flatlist-item' ];
if ( $wgUseMediaWikiUIEverywhere || $this->mParent instanceof VFormHTMLForm ) {
$classes[] = 'mw-ui-radio';
$r .= "<table id=\"mw_metadata\" class=\"mw_metadata\">\n";
foreach ( $metadata as $type => $stuff ) {
foreach ( $stuff as $v ) {
- # @todo FIXME: Why is this using escapeId for a class?!
- $class = Sanitizer::escapeId( $v['id'] );
+ $class = str_replace( ' ', '_', $v['id'] );
if ( $type == 'collapsed' ) {
// Handled by mediawiki.action.view.metadata module.
$class .= ' collapsable';
}
- $r .= "<tr class=\"$class\">\n";
- $r .= "<th>{$v['name']}</th>\n";
- $r .= "<td>{$v['value']}</td>\n</tr>";
+ $r .= Html::rawElement( 'tr',
+ [ 'class' => $class ],
+ Html::rawElement( 'th', [], $v['name'] )
+ . Html::rawElement( 'td', [], $v['value'] )
+ );
}
}
$r .= "</table>\n</div>\n";
* @private
*/
public function formatHeadings( $text, $origText, $isMain = true ) {
- global $wgMaxTocLevel, $wgExperimentalHtmlIds;
+ global $wgMaxTocLevel;
# Inhibit editsection links if requested in the page
if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
# Save headline for section edit hint before it's escaped
$headlineHint = $safeHeadline;
- if ( $wgExperimentalHtmlIds ) {
- # For reverse compatibility, provide an id that's
- # HTML4-compatible, like we used to.
- # It may be worth noting, academically, that it's possible for
- # the legacy anchor to conflict with a non-legacy headline
- # anchor on the page. In this case likely the "correct" thing
- # would be to either drop the legacy anchors or make sure
- # they're numbered first. However, this would require people
- # to type in section names like "abc_.D7.93.D7.90.D7.A4"
- # manually, so let's not bother worrying about it.
- $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
- [ 'noninitial', 'legacy' ] );
- $safeHeadline = Sanitizer::escapeId( $safeHeadline );
-
- if ( $legacyHeadline == $safeHeadline ) {
- # No reason to have both (in fact, we can't)
- $legacyHeadline = false;
- }
- } else {
- $legacyHeadline = false;
- $safeHeadline = Sanitizer::escapeId( $safeHeadline,
- 'noninitial' );
+ $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
+ $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
+ $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
+ if ( $fallbackHeadline === $safeHeadline ) {
+ # No reason to have both (in fact, we can't)
+ $fallbackHeadline = false;
}
- # HTML names must be case-insensitively unique (T12721).
- # This does not apply to Unicode characters per
- # https://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
+ # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
# @todo FIXME: We may be changing them depending on the current locale.
$arrayKey = strtolower( $safeHeadline );
- if ( $legacyHeadline === false ) {
- $legacyArrayKey = false;
+ if ( $fallbackHeadline === false ) {
+ $fallbackArrayKey = false;
} else {
- $legacyArrayKey = strtolower( $legacyHeadline );
+ $fallbackArrayKey = strtolower( $fallbackHeadline );
}
# Create the anchor for linking from the TOC to the section
$anchor = $safeHeadline;
- $legacyAnchor = $legacyHeadline;
+ $fallbackAnchor = $fallbackHeadline;
if ( isset( $refers[$arrayKey] ) ) {
// @codingStandardsIgnoreStart
for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
// @codingStandardsIgnoreEnd
$anchor .= "_$i";
+ $linkAnchor .= "_$i";
$refers["${arrayKey}_$i"] = true;
} else {
$refers[$arrayKey] = true;
}
- if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
+ if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
// @codingStandardsIgnoreStart
- for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
+ for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
// @codingStandardsIgnoreEnd
- $legacyAnchor .= "_$i";
- $refers["${legacyArrayKey}_$i"] = true;
+ $fallbackAnchor .= "_$i";
+ $refers["${fallbackArrayKey}_$i"] = true;
} else {
- $refers[$legacyArrayKey] = true;
+ $refers[$fallbackArrayKey] = true;
}
# Don't number the heading if it is the only one (looks silly)
}
if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
- $toc .= Linker::tocLine( $anchor, $tocline,
+ $toc .= Linker::tocLine( $linkAnchor, $tocline,
$numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
}
}
$head[$headlineCount] = Linker::makeHeadline( $level,
$matches['attrib'][$headlineCount], $anchor, $headline,
- $editlink, $legacyAnchor );
+ $editlink, $fallbackAnchor );
$headlineCount++;
}
# Strip out wikitext links(they break the anchor)
$text = $this->stripSectionName( $text );
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
- return '#' . Sanitizer::escapeId( $text, 'noninitial' );
+ return '#' . Sanitizer::escapeIdForLink( $text );
}
/**
* Same as guessSectionNameFromWikiText(), but produces legacy anchors
- * instead. For use in redirects, since IE6 interprets Redirect: headers
- * as something other than UTF-8 (apparently?), resulting in breakage.
+ * instead, if possible. For use in redirects, since various versions
+ * of Microsoft browsers interpret Location: headers as something other
+ * than UTF-8, resulting in breakage.
*
* @param string $text The section name
* @return string An anchor
*/
public function guessLegacySectionNameFromWikiText( $text ) {
+ global $wgFragmentMode;
+
# Strip out wikitext links(they break the anchor)
$text = $this->stripSectionName( $text );
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
- return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
+
+ if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) {
+ // ForAttribute() and ForLink() are the same for legacy encoding
+ $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK );
+ } else {
+ $id = Sanitizer::escapeIdForLink( $text );
+ }
+
+ return "#$id";
}
/**
--- /dev/null
+<?php
+/**
+ * ResourceLoader mediawiki.util module
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * ResourceLoader module for mediawiki.util
+ *
+ * @since 1.30
+ */
+class ResourceLoaderMediaWikiUtilModule extends ResourceLoaderFileModule {
+ /**
+ * @inheritdoc
+ */
+ public function getScript( ResourceLoaderContext $context ) {
+ return ResourceLoader::makeConfigSetScript(
+ [ 'wgFragmentMode' => $this->getConfig()->get( 'FragmentMode' ) ]
+ )
+ . "\n"
+ . parent::getScript( $context );
+ }
+
+ /**
+ * @inheritdoc
+ */
+ public function enableModuleContentVersion() {
+ return true;
+ }
+}
}
foreach ( $validFooterIcons as $blockName => $footerIcons ) {
$html .= Html::openElement( 'div', [
- 'id' => 'f-' . Sanitizer::escapeId( $blockName ) . 'ico',
+ 'id' => Sanitizer::escapeIdForAttribute( "f-{$blockName}ico" ),
'class' => 'footer-icons'
] );
foreach ( $footerIcons as $icon ) {
foreach ( $validFooterLinks as $aLink ) {
$html .= Html::rawElement(
'li',
- [ 'id' => Sanitizer::escapeId( $aLink ) ],
+ [ 'id' => Sanitizer::escapeIdForAttribute( $aLink ) ],
$this->get( $aLink )
);
}
$out .= Html::rawElement(
'div',
[
- 'id' => Sanitizer::escapeId( "mw-indicator-$id" ),
+ 'id' => Sanitizer::escapeIdForAttribute( "mw-indicator-$id" ),
'class' => 'mw-indicator',
],
$content
$bar[$heading][] = array_merge( [
'text' => $text,
'href' => $href,
- 'id' => 'n-' . Sanitizer::escapeId( strtr( $line[1], ' ', '-' ), 'noninitial' ),
- 'active' => false
+ 'id' => Sanitizer::escapeIdForAttribute( 'n-' . strtr( $line[1], ' ', '-' ) ),
+ 'active' => false,
], $extraAttribs );
} else {
continue;
$grantCellHtml = '<ul><li>' . implode( "</li>\n<li>", $descs ) . '</li></ul>';
}
- $id = \Sanitizer::escapeId( $grant );
+ $id = Sanitizer::escapeIdForAttribute( $grant );
$out->addHTML( \Html::rawElement( 'tr', [ 'id' => $id ],
"<td>" .
$this->msg(
? $groupsRemoveFromSelf[$group]
: [];
- $id = $group == '*' ? false : Sanitizer::escapeId( $group );
+ $id = $group == '*' ? false : Sanitizer::escapeIdForAttribute( $group );
$out->addHTML( Html::rawElement( 'tr', [ 'id' => $id ], "
<td>$grouppage$grouplink</td>
<td>" .
// Finally! Create the table
$html = Html::openElement( 'tr', [
'class' => 'mw-version-ext',
- 'id' => Sanitizer::escapeId( 'mw-version-ext-' . $type . '-' . $extension['name'] )
+ 'id' => Sanitizer::escapeIdForAttribute( 'mw-version-ext-' . $type . '-' . $extension['name'] )
]
);
}
if ( !$isSecond ) {
- $arr['id'] = Sanitizer::escapeId( 'msg_' . $this->getLanguage()->lcfirst( $row->am_title ) );
+ $arr['id'] = Sanitizer::escapeIdForAttribute(
+ 'msg_' . $this->getLanguage()->lcfirst( $row->am_title )
+ );
}
return $arr;
]
],
'mediawiki.util' => [
+ 'class' => 'ResourceLoaderMediaWikiUtilModule',
'scripts' => 'resources/src/mediawiki/mediawiki.util.js',
'dependencies' => [
'jquery.accessKeyLabel',
newList.push(
$( '<div>' )
.addClass( 'mw-indicator' )
- .attr( 'id', mw.util.escapeId( 'mw-indicator-' + name ) )
+ .attr( 'id', mw.util.escapeIdForAttribute( 'mw-indicator-' + name ) )
.html( indicator )
.get( 0 ),
// Add a whitespace between the <div>s because
( function ( mw, $ ) {
'use strict';
+ var util;
+
+ /**
+ * Encode the string like PHP's rawurlencode
+ * @ignore
+ *
+ * @param {string} str String to be encoded.
+ * @return {string} Encoded string
+ */
+ function rawurlencode( str ) {
+ str = String( str );
+ return encodeURIComponent( str )
+ .replace( /!/g, '%21' ).replace( /'/g, '%27' ).replace( /\(/g, '%28' )
+ .replace( /\)/g, '%29' ).replace( /\*/g, '%2A' ).replace( /~/g, '%7E' );
+ }
+
+ /**
+ * Private helper function used by util.escapeId*()
+ * @ignore
+ *
+ * @param {string} str String to be encoded
+ * @param {string} mode Encoding mode, see documentation for $wgFragmentMode
+ * in DefaultSettings.php
+ * @return {string} Encoded string
+ */
+ function escapeIdInternal( str, mode ) {
+ str = String( str );
+
+ switch ( mode ) {
+ case 'html5':
+ return str.replace( / /g, '_' );
+ case 'html5-legacy':
+ str = str.replace( /[ \t\n\r\f_'"&#%]+/g, '_' )
+ .replace( /^_+|_+$/, '' );
+ if ( str === '' ) {
+ str = '_';
+ }
+ return str;
+ case 'legacy':
+ return rawurlencode( str.replace( / /g, '_' ) )
+ .replace( /%3A/g, ':' )
+ .replace( /%/g, '.' );
+ default:
+ throw new Error( 'Unrecognized ID escaping mode ' + mode );
+ }
+ }
+
/**
* Utility library
* @class mw.util
* @singleton
*/
- var util = {
+ util = {
/* Main body */
* @param {string} str String to be encoded.
* @return {string} Encoded string
*/
- rawurlencode: function ( str ) {
- str = String( str );
- return encodeURIComponent( str )
- .replace( /!/g, '%21' ).replace( /'/g, '%27' ).replace( /\(/g, '%28' )
- .replace( /\)/g, '%29' ).replace( /\*/g, '%2A' ).replace( /~/g, '%7E' );
- },
+ rawurlencode: rawurlencode,
/**
- * Encode the string like Sanitizer::escapeId in PHP
+ * Encode the string like Sanitizer::escapeId() in PHP
+ * @deprecated since 1.30 use escapeIdForAttribute() or escapeIdForLink()
*
* @param {string} str String to be encoded.
* @return {string} Encoded string
*/
escapeId: function ( str ) {
- str = String( str );
- return util.rawurlencode( str.replace( / /g, '_' ) )
- .replace( /%3A/g, ':' )
- .replace( /%/g, '.' );
+ return escapeIdInternal( str, 'legacy' );
+ },
+
+ /**
+ * Encode string into HTML id compatible form suitable for use in HTML
+ * Analog to PHP Sanitizer::escapeIdForAttribute()
+ *
+ * @since 1.30
+ *
+ * @param {string} str String to encode
+ * @return {string} Encoded string
+ */
+ escapeIdForAttribute: function ( str ) {
+ var mode = mw.config.get( 'wgFragmentMode' )[ 0 ];
+
+ return escapeIdInternal( str, mode );
+ },
+
+ /**
+ * Encode string into HTML id compatible form suitable for use in links
+ * Analog to PHP Sanitizer::escapeIdForLink()
+ *
+ * @since 1.30
+ *
+ * @param {string} str String to encode
+ * @return {string} Encoded string
+ */
+ escapeIdForLink: function ( str ) {
+ var mode = mw.config.get( 'wgFragmentMode' )[ 0 ],
+ id = escapeIdInternal( str, mode );
+
+ if ( mode === 'html5' ) {
+ id = encodeURIComponent( id ).replace( /%3A/g, ':' );
+ }
+
+ return id;
},
/**
// Append the encoded fragment
if ( fragment.length ) {
- url += '#' + util.escapeId( fragment );
+ url += '#' + util.escapeIdForLink( fragment );
}
return url;
// wgEnableMagicLinks={"ISBN":false, "PMID":false, "RFC":false}
'wgEnableMagicLinks' => self::getOptionValue( 'wgEnableMagicLinks', $opts, [] )
+ [ 'ISBN' => true, 'PMID' => true, 'RFC' => true ],
+ // Test with legacy encoding by default until HTML5 is very stable and default
+ 'wgFragmentMode' => [ 'legacy' ],
];
if ( $config ) {
1&2&3&4&amp;5=Indicator
!! end
+
+!! test
+HTML5 ids: fallback to legacy
+!! config
+wgFragmentMode=[ 'html5', 'legacy' ]
+!! wikitext
+== Foo bar ==
+
+== foo Bar ==
+
+== Тест ==
+
+== Тест ==
+
+== тест ==
+
+== Hey < # " > % : ' ==
+[[#Foo bar]] [[#foo Bar]] [[#Тест]] [[#тест]] [[#Hey < # " > % : ']]
+
+{{anchorencode:💩}} <span id="{{anchorencode:💩}}"></span>
+
+<!-- These two links should produce identical HTML -->
+[[#啤酒]] [[#%E5%95%A4%E9%85%92]]
+
+!! html/php
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Foo_bar"><span class="tocnumber">1</span> <span class="toctext">Foo bar</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#foo_Bar_2"><span class="tocnumber">2</span> <span class="toctext">foo Bar</span></a></li>
+<li class="toclevel-1 tocsection-3"><a href="#%D0%A2%D0%B5%D1%81%D1%82"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-4"><a href="#%D0%A2%D0%B5%D1%81%D1%82_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-5"><a href="#%D1%82%D0%B5%D1%81%D1%82"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
+<li class="toclevel-1 tocsection-6"><a href="#Hey_%3C_%23_%22_%3E_%25_:_%27"><span class="tocnumber">6</span> <span class="toctext">Hey < # " > % : '</span></a></li>
+</ul>
+</div>
+
+<h2><span class="mw-headline" id="Foo_bar">Foo bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: Foo bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="foo_Bar_2">foo Bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=2" title="Edit section: foo Bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id=".D0.A2.D0.B5.D1.81.D1.82"></div><h2><span class="mw-headline" id="Тест">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=3" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id=".D0.A2.D0.B5.D1.81.D1.82_2"></div><h2><span class="mw-headline" id="Тест_2">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=4" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id=".D1.82.D0.B5.D1.81.D1.82"></div><h2><span class="mw-headline" id="тест">тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=5" title="Edit section: тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="Hey_.3C_.23_.22_.3E_.25_:_.27"></div><h2><span class="mw-headline" id="Hey_<_#_"_>_%_:_'">Hey < # " > % : '</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=6" title="Edit section: Hey < # " > % : '">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#%D0%A2%D0%B5%D1%81%D1%82">#Тест</a> <a href="#%D1%82%D0%B5%D1%81%D1%82">#тест</a> <a href="#Hey_%3C_%23_%22_%3E_%25_:_%27">#Hey < # " > % : '</a>
+</p><p>%F0%9F%92%A9 <span id="%F0%9F%92%A9"></span>
+</p><p><a href="#%E5%95%A4%E9%85%92">#啤酒</a> <a href="#%E5%95%A4%E9%85%92">#啤酒</a>
+</p>
+!! end
+
+!! test
+HTML5 ids: legacy with a fallback to modern
+!! config
+wgFragmentMode=[ 'legacy', 'html5' ]
+!! wikitext
+== Foo bar ==
+
+== foo Bar ==
+
+== Тест ==
+
+== Тест ==
+
+== тест ==
+
+== Hey < # " > % : ' ==
+[[#Foo bar]] [[#foo Bar]] [[#Тест]] [[#тест]] [[#Hey < # " > % : ']]
+
+{{anchorencode:💩}} <span id="{{anchorencode:💩}}"></span>
+
+<!-- These two links should produce identical HTML -->
+[[#啤酒]] [[#%E5%95%A4%E9%85%92]]
+
+!! html/php
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Foo_bar"><span class="tocnumber">1</span> <span class="toctext">Foo bar</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#foo_Bar_2"><span class="tocnumber">2</span> <span class="toctext">foo Bar</span></a></li>
+<li class="toclevel-1 tocsection-3"><a href="#.D0.A2.D0.B5.D1.81.D1.82"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-4"><a href="#.D0.A2.D0.B5.D1.81.D1.82_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-5"><a href="#.D1.82.D0.B5.D1.81.D1.82"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
+<li class="toclevel-1 tocsection-6"><a href="#Hey_.3C_.23_.22_.3E_.25_:_.27"><span class="tocnumber">6</span> <span class="toctext">Hey < # " > % : '</span></a></li>
+</ul>
+</div>
+
+<h2><span class="mw-headline" id="Foo_bar">Foo bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: Foo bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="foo_Bar_2">foo Bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=2" title="Edit section: foo Bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="Тест"></div><h2><span class="mw-headline" id=".D0.A2.D0.B5.D1.81.D1.82">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=3" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="Тест_2"></div><h2><span class="mw-headline" id=".D0.A2.D0.B5.D1.81.D1.82_2">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=4" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="тест"></div><h2><span class="mw-headline" id=".D1.82.D0.B5.D1.81.D1.82">тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=5" title="Edit section: тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="Hey_<_#_"_>_%_:_'"></div><h2><span class="mw-headline" id="Hey_.3C_.23_.22_.3E_.25_:_.27">Hey < # " > % : '</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=6" title="Edit section: Hey < # " > % : '">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#.D0.A2.D0.B5.D1.81.D1.82">#Тест</a> <a href="#.D1.82.D0.B5.D1.81.D1.82">#тест</a> <a href="#Hey_.3C_.23_.22_.3E_.25_:_.27">#Hey < # " > % : '</a>
+</p><p>.F0.9F.92.A9 <span id=".F0.9F.92.A9"></span>
+</p><p><a href="#.E5.95.A4.E9.85.92">#啤酒</a> <a href="#.E5.95.A4.E9.85.92">#啤酒</a>
+</p>
+!! end
+
+!! test
+HTML5 ids: no legacy
+!! config
+wgFragmentMode=[ 'html5' ]
+!! wikitext
+== Foo bar ==
+
+== foo Bar ==
+
+== Тест ==
+
+== Тест ==
+
+== тест ==
+
+== Hey < # " > % : ' ==
+[[#Foo bar]] [[#foo Bar]] [[#Тест]] [[#тест]] [[#Hey < # " > % : ']]
+
+{{anchorencode:💩}} <span id="{{anchorencode:💩}}"></span>
+
+<!-- These two links should produce identical HTML -->
+[[#啤酒]] [[#%E5%95%A4%E9%85%92]]
+
+!! html/php
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Foo_bar"><span class="tocnumber">1</span> <span class="toctext">Foo bar</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#foo_Bar_2"><span class="tocnumber">2</span> <span class="toctext">foo Bar</span></a></li>
+<li class="toclevel-1 tocsection-3"><a href="#%D0%A2%D0%B5%D1%81%D1%82"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-4"><a href="#%D0%A2%D0%B5%D1%81%D1%82_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-5"><a href="#%D1%82%D0%B5%D1%81%D1%82"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
+<li class="toclevel-1 tocsection-6"><a href="#Hey_%3C_%23_%22_%3E_%25_:_%27"><span class="tocnumber">6</span> <span class="toctext">Hey < # " > % : '</span></a></li>
+</ul>
+</div>
+
+<h2><span class="mw-headline" id="Foo_bar">Foo bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: Foo bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="foo_Bar_2">foo Bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=2" title="Edit section: foo Bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="Тест">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=3" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="Тест_2">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=4" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="тест">тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=5" title="Edit section: тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="Hey_<_#_"_>_%_:_'">Hey < # " > % : '</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=6" title="Edit section: Hey < # " > % : '">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#%D0%A2%D0%B5%D1%81%D1%82">#Тест</a> <a href="#%D1%82%D0%B5%D1%81%D1%82">#тест</a> <a href="#Hey_%3C_%23_%22_%3E_%25_:_%27">#Hey < # " > % : '</a>
+</p><p>%F0%9F%92%A9 <span id="%F0%9F%92%A9"></span>
+</p><p><a href="#%E5%95%A4%E9%85%92">#啤酒</a> <a href="#%E5%95%A4%E9%85%92">#啤酒</a>
+</p>
+!! end
/**
* @todo Tests covering decodeCharReferences can be refactored into a single
* method and dataprovider.
+ *
+ * @group Sanitizer
*/
class SanitizerTest extends MediaWikiTestCase {
}
/**
- * Test escapeIdReferenceList for consistency with escapeId
+ * Test escapeIdReferenceList for consistency with escapeIdForAttribute
*
* @dataProvider provideEscapeIdReferenceList
* @covers Sanitizer::escapeIdReferenceList
public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
$this->assertEquals(
Sanitizer::escapeIdReferenceList( $referenceList, 'noninitial' ),
- Sanitizer::escapeId( $id1, 'noninitial' )
+ Sanitizer::escapeIdForAttribute( $id1 )
. ' '
- . Sanitizer::escapeId( $id2, 'noninitial' )
+ . Sanitizer::escapeIdForAttribute( $id2 )
);
}
[ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
];
}
+
+ /**
+ * @dataProvider provideEscapeIdForStuff
+ *
+ * @covers Sanitizer::escapeIdForAttribute()
+ * @covers Sanitizer::escapeIdForLink()
+ * @covers Sanitizer::escapeIdForExternalInterwiki()
+ * @covers Sanitizer::escapeIdInternal()
+ * @covers Sanitizer::urlEscapeId()
+ *
+ * @param string $stuff
+ * @param string[] $config
+ * @param string $id
+ * @param string|false $expected
+ * @param int|null $mode
+ */
+ public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
+ $func = "Sanitizer::escapeIdFor{$stuff}";
+ $iwFlavor = array_pop( $config );
+ $this->setMwGlobals( [
+ 'wgFragmentMode' => $config,
+ 'wgExternalInterwikiFragmentMode' => $iwFlavor,
+ ] );
+ $escaped = call_user_func( $func, $id, $mode );
+ self::assertEquals( $expected, $escaped );
+ }
+
+ public function provideEscapeIdForStuff() {
+ // Test inputs and outputs
+ $text = 'foo тест_#%!\'()[]:<>';
+ $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E';
+ $html5Encoded = 'foo_тест_#%!\'()[]:<>';
+ $html5Escaped = 'foo_%D1%82%D0%B5%D1%81%D1%82_%23%25%21%27%28%29%5B%5D:%3C%3E';
+ $html5Experimental = 'foo_тест_!_()[]:<>';
+
+ // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
+ $legacy = [ 'legacy', 'legacy' ];
+ $legacyNew = [ 'legacy', 'html5', 'legacy' ];
+ $newLegacy = [ 'html5', 'legacy', 'legacy' ];
+ $new = [ 'html5', 'legacy' ];
+ $allNew = [ 'html5', 'html5' ];
+ $experimentalLegacy = [ 'html5-legacy', 'legacy', 'legacy' ];
+ $newExperimental = [ 'html5', 'html5-legacy', 'legacy' ];
+
+ return [
+ // Pure legacy: how MW worked before 2017
+ [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $legacy, $text, $legacyEncoded ],
+ [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
+
+ // Transition to a new world: legacy links with HTML5 fallback
+ [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $legacyNew, $text, $legacyEncoded ],
+ [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
+
+ // New world: HTML5 links, legacy fallbacks
+ [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $newLegacy, $text, $html5Escaped ],
+ [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
+
+ // Distant future: no legacy fallbacks, but still linking to leagacy wikis
+ [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $new, $text, $html5Escaped ],
+ [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
+
+ // Just before the heat death of universe: external interwikis are also HTML5 \m/
+ [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $allNew, $text, $html5Escaped ],
+ [ 'ExternalInterwiki', $allNew, $text, $html5Escaped ],
+
+ // Someone flipped $wgExperimentalHtmlIds on
+ [ 'Attribute', $experimentalLegacy, $text, $html5Experimental, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $experimentalLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $experimentalLegacy, $text, $html5Experimental ],
+ [ 'ExternalInterwiki', $experimentalLegacy, $text, $legacyEncoded ],
+
+ // Migration from $wgExperimentalHtmlIds to modern HTML5
+ [ 'Attribute', $newExperimental, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $newExperimental, $text, $html5Experimental, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $newExperimental, $text, $html5Escaped ],
+ [ 'ExternalInterwiki', $newExperimental, $text, $legacyEncoded ],
+ ];
+ }
+
+ /**
+ * @expectedException InvalidArgumentException
+ * @covers Sanitizer::escapeIdInternal()
+ */
+ public function testInvalidFragmentThrows() {
+ $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
+ Sanitizer::escapeIdForAttribute( 'This should throw' );
+ }
+
+ /**
+ * @expectedException UnexpectedValueException
+ * @covers Sanitizer::escapeIdForAttribute()
+ */
+ public function testNoPrimaryFragmentModeThrows() {
+ $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
+ Sanitizer::escapeIdForAttribute( 'This should throw' );
+ }
+
+ /**
+ * @expectedException UnexpectedValueException
+ * @covers Sanitizer::escapeIdForLink()
+ */
+ public function testNoPrimaryFragmentModeThrows2() {
+ $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
+ Sanitizer::escapeIdForLink( 'This should throw' );
+ }
}
} );
QUnit.test( 'escapeId', function ( assert ) {
- mw.config.set( 'wgExperimentalHtmlIds', false );
+ mw.config.set( 'wgFragmentMode', [ 'legacy' ] );
$.each( {
'+': '.2B',
'&': '.26',
} );
} );
+ QUnit.test( 'escapeIdForAttribute', function ( assert ) {
+ // Test cases are kept in sync with SanitizerTest.php
+ var text = 'foo тест_#%!\'()[]:<>',
+ legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E',
+ html5Encoded = 'foo_тест_#%!\'()[]:<>',
+ html5Experimental = 'foo_тест_!_()[]:<>',
+ // Settings: this is $wgFragmentMode
+ legacy = [ 'legacy' ],
+ legacyNew = [ 'legacy', 'html5' ],
+ newLegacy = [ 'html5', 'legacy' ],
+ allNew = [ 'html5' ],
+ experimentalLegacy = [ 'html5-legacy', 'legacy' ],
+ newExperimental = [ 'html5', 'html5-legacy' ];
+
+ // Test cases are kept in sync with SanitizerTest.php
+ $.each( [
+ // Pure legacy: how MW worked before 2017
+ [ legacy, text, legacyEncoded ],
+ // Transition to a new world: legacy links with HTML5 fallback
+ [ legacyNew, text, legacyEncoded ],
+ // New world: HTML5 links, legacy fallbacks
+ [ newLegacy, text, html5Encoded ],
+ // Distant future: no legacy fallbacks
+ [ allNew, text, html5Encoded ],
+ // Someone flipped $wgExperimentalHtmlIds on
+ [ experimentalLegacy, text, html5Experimental ],
+ // Migration from $wgExperimentalHtmlIds to modern HTML5
+ [ newExperimental, text, html5Encoded ]
+ ], function ( index, testCase ) {
+ mw.config.set( 'wgFragmentMode', testCase[ 0 ] );
+
+ assert.equal( util.escapeIdForAttribute( testCase[ 1 ] ), testCase[ 2 ] );
+ } );
+ } );
+
+ QUnit.test( 'escapeIdForLink', function ( assert ) {
+ // Test cases are kept in sync with SanitizerTest.php
+ var text = 'foo тест_#%!\'()[]:<>',
+ legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E',
+ html5Escaped = 'foo_%D1%82%D0%B5%D1%81%D1%82_%23%25!\'()%5B%5D:%3C%3E',
+ html5Experimental = 'foo_тест_!_()[]:<>',
+ // Settings: this is wgFragmentMode
+ legacy = [ 'legacy' ],
+ legacyNew = [ 'legacy', 'html5' ],
+ newLegacy = [ 'html5', 'legacy' ],
+ allNew = [ 'html5' ],
+ experimentalLegacy = [ 'html5-legacy', 'legacy' ],
+ newExperimental = [ 'html5', 'html5-legacy' ];
+
+ $.each( [
+ // Pure legacy: how MW worked before 2017
+ [ legacy, text, legacyEncoded ],
+ // Transition to a new world: legacy links with HTML5 fallback
+ [ legacyNew, text, legacyEncoded ],
+ // New world: HTML5 links, legacy fallbacks
+ [ newLegacy, text, html5Escaped ],
+ // Distant future: no legacy fallbacks
+ [ allNew, text, html5Escaped ],
+ // Someone flipped wgExperimentalHtmlIds on
+ [ experimentalLegacy, text, html5Experimental ],
+ // Migration from wgExperimentalHtmlIds to modern HTML5
+ [ newExperimental, text, html5Escaped ]
+ ], function ( index, testCase ) {
+ mw.config.set( 'wgFragmentMode', testCase[ 0 ] );
+
+ assert.equal( util.escapeIdForLink( testCase[ 1 ] ), testCase[ 2 ] );
+ } );
+ } );
+
QUnit.test( 'wikiUrlencode', function ( assert ) {
assert.equal( util.wikiUrlencode( 'Test:A & B/Here' ), 'Test:A_%26_B/Here' );
// See also wfUrlencodeTest.php#provideURLS