* (bug 3097) Inconsistently usable titles containing HTML character entities
authorBrion Vibber <brion@users.mediawiki.org>
Fri, 28 Dec 2007 21:34:49 +0000 (21:34 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Fri, 28 Dec 2007 21:34:49 +0000 (21:34 +0000)
  are now forbidden. A run of cleanupTitles.php will fix up existing pages.

RELEASE-NOTES
includes/Title.php

index 6a43665..5e80096 100644 (file)
@@ -270,6 +270,8 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * (bug 8066) Spaces can't be entered in special page aliases
 * Hide undo link if user can't edit article
 * (bug 12416) Fix password setting for createAndPromote.php
+* (bug 3097) Inconsistently usable titles containing HTML character entities
+  are now forbidden. A run of cleanupTitles.php will fix up existing pages.
 
 
 == Parser changes in 1.12 ==
index 2225aa3..c788707 100644 (file)
@@ -1894,8 +1894,18 @@ class Title {
                # Initialisation
                static $rxTc = false;
                if( !$rxTc ) {
-                       # % is needed as well
-                       $rxTc = '/[^' . Title::legalChars() . ']|%[0-9A-Fa-f]{2}/S';
+                       # Matching titles will be held as illegal.
+                       $rxTc = '/' .
+                               # Any character not allowed is forbidden...
+                               '[^' . Title::legalChars() . ']' .
+                               # URL percent encoding sequences interfere with the ability
+                               # to round-trip titles -- you can't link to them consistently.
+                               '|%[0-9A-Fa-f]{2}' .
+                               # XML/HTML character references produce similar issues.
+                               '|&[A-Za-z0-9\x80-\xff]+;' .
+                               '|&#[0-9]+;' .
+                               '|&#x[0-9A-Fa-f]+;' .
+                               '/S';
                }
 
                $this->mInterwiki = $this->mFragment = '';