# \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
# as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
- const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
+ # Simplified expression to match an IPv4 or IPv6 address, or
+ # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
+ const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
+ # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
+ const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
# Regular expression for a non-newline space
$this->mConf = $conf;
$this->mUrlProtocols = wfUrlProtocols();
$this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
- self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
+ self::EXT_LINK_ADDR .
+ self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
if ( isset( $conf['preprocessorClass'] ) ) {
$this->mPreprocessorClass = $conf['preprocessorClass'];
} elseif ( defined( 'HPHP_VERSION' ) ) {
public function doMagicLinks( $text ) {
$prots = wfUrlProtocolsWithoutProtRel();
$urlChar = self::EXT_LINK_URL_CLASS;
+ $addr = self::EXT_LINK_ADDR;
$space = self::SPACE_NOT_NL; # non-newline space
$spdash = "(?:-|$space)"; # a dash or a non-newline space
$spaces = "$space++"; # possessive match of 1 or more spaces
$text = preg_replace_callback(
- '!(?: # Start cases
- (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
- (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
- (\b(?i:$prots)$urlChar+) | # m[3]: Free external links
- \b(?:RFC|PMID) $spaces # m[4]: RFC or PMID, capture number
+ '!(?: # Start cases
+ (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
+ (<.*?>) | # m[2]: Skip stuff inside
+ # HTML elements' . "
+ (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
+ # m[4]: Post-protocol path
+ \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
([0-9]+)\b |
- \bISBN $spaces ( # m[5]: ISBN, capture number
- (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
- (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
- [0-9Xx] # check digit
+ \bISBN $spaces ( # m[6]: ISBN, capture number
+ (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
+ (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
+ [0-9Xx] # check digit
)\b
)!xu", array( &$this, 'magicLinkCallback' ), $text );
return $text;
return $m[0];
} elseif ( isset( $m[3] ) && $m[3] !== '' ) {
# Free external link
- return $this->makeFreeExternalLink( $m[0] );
- } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
+ return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
+ } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
# RFC or PMID
if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
$keyword = 'RFC';
$urlmsg = 'rfcurl';
$cssClass = 'mw-magiclink-rfc';
- $id = $m[4];
+ $id = $m[5];
} elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
$keyword = 'PMID';
$urlmsg = 'pubmedurl';
$cssClass = 'mw-magiclink-pmid';
- $id = $m[4];
+ $id = $m[5];
} else {
throw new MWException( __METHOD__ . ': unrecognised match type "' .
substr( $m[0], 0, 20 ) . '"' );
}
$url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
- } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
+ } elseif ( isset( $m[6] ) && $m[6] !== '' ) {
# ISBN
- $isbn = $m[5];
+ $isbn = $m[6];
$space = self::SPACE_NOT_NL; # non-newline space
$isbn = preg_replace( "/$space/", ' ', $isbn );
$num = strtr( $isbn, array(
* Make a free external link, given a user-supplied URL
*
* @param string $url
- *
+ * @param int $numPostProto
+ * The number of characters after the protocol.
* @return string HTML
* @private
*/
- public function makeFreeExternalLink( $url ) {
+ public function makeFreeExternalLink( $url, $numPostProto ) {
$trail = '';
$url = substr( $url, 0, -$numSepChars );
}
+ # Verify that we still have a real URL after trail removal, and
+ # not just lone protocol
+ if ( strlen( $trail ) >= $numPostProto ) {
+ return $url . $trail;
+ }
+
$url = Sanitizer::cleanUrl( $url );
# Is this an external image?
$firstspace = $i;
}
} elseif ( $x2 === ' ' ) {
- if ( $firstsingleletterword == -1 ) {
- $firstsingleletterword = $i;
- // if $firstsingleletterword is set, we don't
- // look at the other options, so we can bail early.
- break;
- }
+ $firstsingleletterword = $i;
+ // if $firstsingleletterword is set, we don't
+ // look at the other options, so we can bail early.
+ break;
} else {
if ( $firstmultiletterword == -1 ) {
$firstmultiletterword = $i;
case 'gallery-internal-link':
$linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
$chars = self::EXT_LINK_URL_CLASS;
+ $addr = self::EXT_LINK_ADDR;
$prots = $this->mUrlProtocols;
//check to see if link matches an absolute url, if not then it must be a wiki link.
- if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) {
+ if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
$link = $linkValue;
} else {
$localLinkTitle = Title::newFromText( $linkValue );
break;
case 'link':
$chars = self::EXT_LINK_URL_CLASS;
+ $addr = self::EXT_LINK_ADDR;
$prots = $this->mUrlProtocols;
if ( $value === '' ) {
$paramName = 'no-link';
$value = true;
$validated = true;
} elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
- if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
+ if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
$paramName = 'link-url';
$this->mOutput->addExternalLink( $value );
if ( $this->mOptions->getExternalLinkTarget() ) {
return $this;
}
}
+
+ /**
+ * Set's up the PHP implementation of OOUI for use in this request
+ * and instructs OutputPage to enable OOUI for itself.
+ *
+ * @since 1.26
+ */
+ public function enableOOUI() {
+ OutputPage::setupOOUI();
+ $this->mOutput->setEnableOOUI( true );
+ }
}