Merge "Tiny clean up of Parser::doQuotes()"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Fri, 21 Aug 2015 21:31:01 +0000 (21:31 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Fri, 21 Aug 2015 21:31:01 +0000 (21:31 +0000)
1  2 
includes/parser/Parser.php

@@@ -87,11 -87,7 +87,11 @@@ class Parser 
        # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
        # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
        const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
 -      const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
 +      # Simplified expression to match an IPv4 or IPv6 address, or
 +      # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
 +      const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
 +      # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
 +      const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
                \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
  
        # Regular expression for a non-newline space
                $this->mConf = $conf;
                $this->mUrlProtocols = wfUrlProtocols();
                $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
 -                      self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
 +                      self::EXT_LINK_ADDR .
 +                      self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
                if ( isset( $conf['preprocessorClass'] ) ) {
                        $this->mPreprocessorClass = $conf['preprocessorClass'];
                } elseif ( defined( 'HPHP_VERSION' ) ) {
        public function doMagicLinks( $text ) {
                $prots = wfUrlProtocolsWithoutProtRel();
                $urlChar = self::EXT_LINK_URL_CLASS;
 +              $addr = self::EXT_LINK_ADDR;
                $space = self::SPACE_NOT_NL; #  non-newline space
                $spdash = "(?:-|$space)"; # a dash or a non-newline space
                $spaces = "$space++"; # possessive match of 1 or more spaces
                $text = preg_replace_callback(
 -                      '!(?:                           # Start cases
 -                              (<a[ \t\r\n>].*?</a>) |     # m[1]: Skip link text
 -                              (<.*?>) |                   # m[2]: Skip stuff inside HTML elements' . "
 -                              (\b(?i:$prots)$urlChar+) |  # m[3]: Free external links
 -                              \b(?:RFC|PMID) $spaces      # m[4]: RFC or PMID, capture number
 +                      '!(?:                            # Start cases
 +                              (<a[ \t\r\n>].*?</a>) |      # m[1]: Skip link text
 +                              (<.*?>) |                    # m[2]: Skip stuff inside
 +                                                           #       HTML elements' . "
 +                              (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
 +                                                           # m[4]: Post-protocol path
 +                              \b(?:RFC|PMID) $spaces       # m[5]: RFC or PMID, capture number
                                        ([0-9]+)\b |
 -                              \bISBN $spaces (            # m[5]: ISBN, capture number
 -                                      (?: 97[89] $spdash? )?   # optional 13-digit ISBN prefix
 -                                      (?: [0-9]  $spdash? ){9} # 9 digits with opt. delimiters
 -                                      [0-9Xx]                 # check digit
 +                              \bISBN $spaces (             # m[6]: ISBN, capture number
 +                                      (?: 97[89] $spdash? )?   #  optional 13-digit ISBN prefix
 +                                      (?: [0-9]  $spdash? ){9} #  9 digits with opt. delimiters
 +                                      [0-9Xx]                  #  check digit
                                )\b
                        )!xu", array( &$this, 'magicLinkCallback' ), $text );
                return $text;
                        return $m[0];
                } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
                        # Free external link
 -                      return $this->makeFreeExternalLink( $m[0] );
 -              } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
 +                      return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
 +              } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
                        # RFC or PMID
                        if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
                                $keyword = 'RFC';
                                $urlmsg = 'rfcurl';
                                $cssClass = 'mw-magiclink-rfc';
 -                              $id = $m[4];
 +                              $id = $m[5];
                        } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
                                $keyword = 'PMID';
                                $urlmsg = 'pubmedurl';
                                $cssClass = 'mw-magiclink-pmid';
 -                              $id = $m[4];
 +                              $id = $m[5];
                        } else {
                                throw new MWException( __METHOD__ . ': unrecognised match type "' .
                                        substr( $m[0], 0, 20 ) . '"' );
                        }
                        $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
                        return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
 -              } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
 +              } elseif ( isset( $m[6] ) && $m[6] !== '' ) {
                        # ISBN
 -                      $isbn = $m[5];
 +                      $isbn = $m[6];
                        $space = self::SPACE_NOT_NL; #  non-newline space
                        $isbn = preg_replace( "/$space/", ' ', $isbn );
                        $num = strtr( $isbn, array(
         * Make a free external link, given a user-supplied URL
         *
         * @param string $url
 -       *
 +       * @param int $numPostProto
 +       *   The number of characters after the protocol.
         * @return string HTML
         * @private
         */
 -      public function makeFreeExternalLink( $url ) {
 +      public function makeFreeExternalLink( $url, $numPostProto ) {
  
                $trail = '';
  
                        $url = substr( $url, 0, -$numSepChars );
                }
  
 +              # Verify that we still have a real URL after trail removal, and
 +              # not just lone protocol
 +              if ( strlen( $trail ) >= $numPostProto ) {
 +                      return $url . $trail;
 +              }
 +
                $url = Sanitizer::cleanUrl( $url );
  
                # Is this an external image?
                                                        $firstspace = $i;
                                                }
                                        } elseif ( $x2 === ' ' ) {
-                                               if ( $firstsingleletterword == -1 ) {
-                                                       $firstsingleletterword = $i;
-                                                       // if $firstsingleletterword is set, we don't
-                                                       // look at the other options, so we can bail early.
-                                                       break;
-                                               }
+                                               $firstsingleletterword = $i;
+                                               // if $firstsingleletterword is set, we don't
+                                               // look at the other options, so we can bail early.
+                                               break;
                                        } else {
                                                if ( $firstmultiletterword == -1 ) {
                                                        $firstmultiletterword = $i;
                                                case 'gallery-internal-link':
                                                        $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
                                                        $chars = self::EXT_LINK_URL_CLASS;
 +                                                      $addr = self::EXT_LINK_ADDR;
                                                        $prots = $this->mUrlProtocols;
                                                        //check to see if link matches an absolute url, if not then it must be a wiki link.
 -                                                      if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) {
 +                                                      if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
                                                                $link = $linkValue;
                                                        } else {
                                                                $localLinkTitle = Title::newFromText( $linkValue );
                                                        break;
                                                case 'link':
                                                        $chars = self::EXT_LINK_URL_CLASS;
 +                                                      $addr = self::EXT_LINK_ADDR;
                                                        $prots = $this->mUrlProtocols;
                                                        if ( $value === '' ) {
                                                                $paramName = 'no-link';
                                                                $value = true;
                                                                $validated = true;
                                                        } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
 -                                                              if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
 +                                                              if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
                                                                        $paramName = 'link-url';
                                                                        $this->mOutput->addExternalLink( $value );
                                                                        if ( $this->mOptions->getExternalLinkTarget() ) {
                        return $this;
                }
        }
 +
 +      /**
 +       * Set's up the PHP implementation of OOUI for use in this request
 +       * and instructs OutputPage to enable OOUI for itself.
 +       *
 +       * @since 1.26
 +       */
 +      public function enableOOUI() {
 +              OutputPage::setupOOUI();
 +              $this->mOutput->setEnableOOUI( true );
 +      }
  }