const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
+ # Regular expression for a non-newline space
+ const SPACE_NOT_NL = '(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
+
# State constants for the definition list colon extraction
const COLON_STATE_TEXT = 0;
const COLON_STATE_TAG = 1;
* @var MagicWordArray
*/
public $mSubstWords;
- public $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
+ # Initialised in constructor
+ public $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols;
# Cleared with clearState():
/**
* @param bool|PPFrame $frame
* @return mixed|string
*/
- public function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null, $frame = false ) {
+ public function preprocess( $text, Title $title = null,
+ ParserOptions $options, $revid = null, $frame = false
+ ) {
wfProfileIn( __METHOD__ );
$magicScopeVariable = $this->lock();
$this->startParse( $title, $options, self::OT_PREPROCESS, true );
wfProfileIn( __METHOD__ );
$prots = wfUrlProtocolsWithoutProtRel();
$urlChar = self::EXT_LINK_URL_CLASS;
+ $space = self::SPACE_NOT_NL; # non-newline space
+ $spdash = "(?:-|$space)"; # a dash or a non-newline space
+ $spaces = "$space++"; # possessive match of 1 or more spaces
$text = preg_replace_callback(
'!(?: # Start cases
(<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
(<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
- (\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . '
- \b(?:RFC|PMID)\s+([0-9]+)\b |# m[4]: RFC or PMID, capture number
- \bISBN\s+( # m[5]: ISBN, capture number
- (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
- (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
+ (\b(?i:$prots)$urlChar+) | # m[3]: Free external links
+ \b(?:RFC|PMID) $spaces # m[4]: RFC or PMID, capture number
+ ([0-9]+)\b |
+ \bISBN $spaces ( # m[5]: ISBN, capture number
+ (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
+ (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
[0-9Xx] # check digit
- )\b
- )!xu', array( &$this, 'magicLinkCallback' ), $text );
+ )\b
+ )!xu", array( &$this, 'magicLinkCallback' ), $text );
wfProfileOut( __METHOD__ );
return $text;
}
} elseif ( isset( $m[5] ) && $m[5] !== '' ) {
# ISBN
$isbn = $m[5];
+ $space = self::SPACE_NOT_NL; # non-newline space
+ $isbn = preg_replace( "/$space/", ' ', $isbn );
$num = strtr( $isbn, array(
'-' => '',
' ' => '',
$sep .= ')';
}
- $numSepChars = strspn( strrev( $url ), $sep );
+ $urlRev = strrev( $url );
+ $numSepChars = strspn( $urlRev, $sep );
+ # Don't break a trailing HTML entity by moving the ; into $trail
+ # This is in hot code, so use substr_compare to avoid having to
+ # create a new string object for the comparison
+ if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0) {
+ # more optimization: instead of running preg_match with a $
+ # anchor, which can be slow, do the match on the reversed
+ # string starting at the desired offset.
+ # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
+ if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
+ $numSepChars--;
+ }
+ }
if ( $numSepChars ) {
$trail = substr( $url, -$numSepChars ) . $trail;
$url = substr( $url, 0, -$numSepChars );
# * <sup> and <sub> (bug 8393)
# * <i> (bug 26375)
# * <b> (r105284)
+ # * <bdi> (bug 72884)
# * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
#
# We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
# to allow setting directionality in toc items.
$tocline = preg_replace(
array(
- '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#',
- '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#'
+ '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?' . '>#',
+ '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?' . '>#'
),
array( '', '<$1>' ),
$safeHeadline
$pairs = array(
"\r\n" => "\n",
+ "\r" => "\n",
);
$text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
if ( $options->getPreSaveTransform() ) {