Merge "Clean up $wgSQLiteDataDir handling and removed standalone sqlite class"
[lhc/web/wiklou.git] / includes / title / MediaWikiTitleCodec.php
index 6ca0799..c05a87d 100644 (file)
@@ -31,6 +31,7 @@
  * via parseTitle() or from a (semi)trusted source, such as the database.
  *
  * @see https://www.mediawiki.org/wiki/Requests_for_comment/TitleValue
+ * @since 1.23
  */
 class MediaWikiTitleCodec implements TitleFormatter, TitleParser {
        /**
@@ -322,7 +323,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser {
                }
 
                # Reject illegal characters.
-               $rxTc = Title::getTitleInvalidRegex();
+               $rxTc = self::getTitleInvalidRegex();
                if ( preg_match( $rxTc, $dbkey ) ) {
                        throw new MalformedTitleException( 'Illegal characters found in title: ' . $text );
                }
@@ -397,4 +398,33 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser {
 
                return $parts;
        }
+
+       /**
+        * Returns a simple regex that will match on characters and sequences invalid in titles.
+        * Note that this doesn't pick up many things that could be wrong with titles, but that
+        * replacing this regex with something valid will make many titles valid.
+        * Previously Title::getTitleInvalidRegex()
+        *
+        * @return string Regex string
+        * @since 1.25
+        */
+       public static function getTitleInvalidRegex() {
+               static $rxTc = false;
+               if ( !$rxTc ) {
+                       # Matching titles will be held as illegal.
+                       $rxTc = '/' .
+                               # Any character not allowed is forbidden...
+                               '[^' . Title::legalChars() . ']' .
+                               # URL percent encoding sequences interfere with the ability
+                               # to round-trip titles -- you can't link to them consistently.
+                               '|%[0-9A-Fa-f]{2}' .
+                               # XML/HTML character references produce similar issues.
+                               '|&[A-Za-z0-9\x80-\xff]+;' .
+                               '|&#[0-9]+;' .
+                               '|&#x[0-9A-Fa-f]+;' .
+                               '/S';
+               }
+
+               return $rxTc;
+       }
 }