Add experimental $wgActionPaths config option.
[lhc/web/wiklou.git] / includes / Title.php
index e9d723d..9ac3be5 100644 (file)
@@ -11,6 +11,13 @@ require_once( 'normal/UtfNormal.php' );
 $wgTitleInterwikiCache = array();
 define ( 'GAID_FOR_UPDATE', 1 );
 
+# Title::newFromTitle maintains a cache to avoid
+# expensive re-normalization of commonly used titles.
+# On a batch operation this can become a memory leak
+# if not bounded. After hitting this many titles,
+# reset the cache.
+define( 'MW_TITLECACHE_MAX', 1000 );
+
 /**
  * Title class
  * - Represents a title, which may contain an interwiki designation or namespace
@@ -52,10 +59,12 @@ class Title {
                $this->mInterwiki = $this->mUrlform =
                $this->mTextform = $this->mDbkeyform = '';
                $this->mArticleID = -1;
-               $this->mNamespace = 0;
+               $this->mNamespace = NS_MAIN;
                $this->mRestrictionsLoaded = false;
                $this->mRestrictions = array();
-               $this->mDefaultNamespace = 0;
+               # Dont change the following, NS_MAIN is hardcoded in several place
+               # See bug #696
+               $this->mDefaultNamespace = NS_MAIN;
        }
 
        /**
@@ -89,10 +98,14 @@ class Title {
         * @static
         * @access public
         */
-       /* static */ function &newFromText( $text, $defaultNamespace = 0 ) {    
+       function &newFromText( $text, $defaultNamespace = NS_MAIN ) {   
                $fname = 'Title::newFromText';
                wfProfileIn( $fname );
                
+               if( is_object( $text ) ) {
+                       wfDebugDieBacktrace( 'Title::newFromText given an object' );
+               }
+               
                /**
                 * Wiki pages often contain multiple links to the same page.
                 * Title normalization and parsing can become expensive on
@@ -102,7 +115,7 @@ class Title {
                 * In theory these are value objects and won't get changed...
                 */
                static $titleCache = array();
-               if( $defaultNamespace == 0 && isset( $titleCache[$text] ) ) {
+               if( $defaultNamespace == NS_MAIN && isset( $titleCache[$text] ) ) {
                        wfProfileOut( $fname );
                        return $titleCache[$text];
                }
@@ -127,7 +140,11 @@ class Title {
                $t->mDefaultNamespace = $defaultNamespace;
 
                if( $t->secureAndSplit() ) {
-                       if( $defaultNamespace == 0 ) {
+                       if( $defaultNamespace == NS_MAIN ) {
+                               if( count( $titleCache ) >= MW_TITLECACHE_MAX ) {
+                                       # Avoid memory leaks on mass operations...
+                                       $titleCache = array();
+                               }
                                $titleCache[$text] =& $t;
                        }
                        wfProfileOut( $fname );
@@ -637,6 +654,17 @@ class Title {
                if ( $query == '' ) {
                        $url = str_replace( '$1', $dbkey, $wgArticlePath );
                } else {
+                       if( preg_match( '/^(.*&|)action=([^&]*)(&(.*)|)$/', $query, $matches ) ) {
+                               global $wgActionPaths;
+                               $action = urldecode( $matches[2] );
+                               if( isset( $wgActionPaths[$action] ) ) {
+                                       $query = $matches[1];
+                                       if( isset( $matches[4] ) ) $query .= $matches[4];
+                                       $url = str_replace( '$1', $dbkey, $wgActionPaths[$action] );
+                                       if( $query != '' ) $url .= '?' . $query;
+                                       return $url;
+                               }
+                       }
                        if ( $query == '-' ) {
                                $query = '';
                        }
@@ -857,6 +885,15 @@ class Title {
                }
                return false;
        }
+       
+       /**
+        * Is this a talk page of some sort?
+        * @return bool
+        * @access public
+        */
+       function isTalkPage() {
+               return Namespace::isTalk( $this->getNamespace() );
+       }
 
        /**
         * Is this a .css or .js subpage of a user page?
@@ -1060,7 +1097,7 @@ class Title {
 
                # Clean up whitespace
                #
-               $t = preg_replace( '/[\\s_]+/', '_', $this->mDbkeyform );
+               $t = preg_replace( '/[ _]+/', '_', $this->mDbkeyform );
                $t = trim( $t, '_' );
 
                if ( '' == $t ) {
@@ -1169,6 +1206,7 @@ class Title {
                # We shouldn't need to query the DB for the size.
                #$maxSize = $dbr->textFieldSize( 'page', 'page_title' );
                if ( strlen( $r ) > 255 ) {
+                       wfProfileOut( $fname );
                        return false;
                }
 
@@ -1186,6 +1224,26 @@ class Title {
                        $t = $r;
                }
                
+               /**
+                * Can't make a link to a namespace alone...
+                * "empty" local links can only be self-links
+                * with a fragment identifier.
+                */
+               if( $t == '' &&
+                       $this->mInterwiki == '' &&
+                       $this->mNamespace != NS_MAIN ) {
+                       wfProfileOut( $fname );
+                       return false;
+               }
+               
+               if( $wgUseLatin1 && $this->mInterwiki != '' ) {
+                       # On a Latin-1 wiki, numbered character entities may have
+                       # left us with a mix of 8-bit and UTF-8 characters, and
+                       # some of those might be Windows-1252 special chars.
+                       # Normalize interwikis to pure UTF-8.
+                       $t = Title::mergeLatin1Utf8( $t );
+               }
+
                # Fill fields
                $this->mDbkeyform = $t;
                $this->mUrlform = wfUrlencode( $t );
@@ -1318,6 +1376,7 @@ class Title {
         * @access public
         */
        function moveTo( &$nt, $auth = true ) {
+               global $wgUser;
                if( !$this or !$nt ) {
                        return 'badtitletext';
                }
@@ -1384,6 +1443,7 @@ class Title {
                $u = new SearchUpdate( $newid, $this->getPrefixedDBkey(), '' );
                $u->doUpdate();
 
+               wfRunHooks( 'TitleMoveComplete', $this, $nt, $wgUser, $oldid, $newid );
                return true;
        }
        
@@ -1398,10 +1458,9 @@ class Title {
        /* private */ function moveOverExistingRedirect( &$nt ) {
                global $wgUser, $wgLinkCache, $wgUseSquid, $wgMwRedir;
                $fname = 'Title::moveOverExistingRedirect';
-               $comment = wfMsg( '1movedto2', $this->getPrefixedText(), $nt->getPrefixedText() );
+               $comment = wfMsgForContent( '1movedto2', $this->getPrefixedText(), $nt->getPrefixedText() );
                
                $now = wfTimestampNow();
-               $won = wfInvertTimestamp( $now );
                $rand = wfRandom();
                $newid = $nt->getArticleID();
                $oldid = $this->getArticleID();
@@ -1433,8 +1492,8 @@ class Title {
                        'rev_comment' => $comment,
                        'rev_user' => $wgUser->getID(),
                        'rev_user_text' => $wgUser->getName(),
-                       'rev_timestamp' => $now,
-                       'inverse_timestamp' => $won ), $fname
+                       'rev_timestamp' => $now
+                       ), $fname
                );
                $revid = $dbw->insertId();
                $dbw->insert( 'text', array(
@@ -1455,9 +1514,15 @@ class Title {
                );
                $newid = $dbw->insertId();
                $wgLinkCache->clearLink( $this->getPrefixedDBkey() );
-               
-               RecentChange::notifyMoveOverRedirect( $now, $this, $nt, $wgUser, $comment );
 
+               # Record in RC
+               // Replaced by a log entry
+               // RecentChange::notifyMoveOverRedirect( $now, $this, $nt, $wgUser, $comment );
+
+               # Log the move
+               $log = new LogPage( 'move' );
+               $log->addEntry( 'move_redir', $this, '', array(1 => $nt->getText()) );
+               
                # Swap links
                
                # Load titles and IDs
@@ -1525,13 +1590,12 @@ class Title {
                global $wgUser, $wgLinkCache, $wgUseSquid;
                global $wgMwRedir;
                $fname = 'MovePageForm::moveToNewTitle';
-               $comment = wfMsg( '1movedto2', $this->getPrefixedText(), $nt->getPrefixedText() );
+               $comment = wfMsgForContent( '1movedto2', $this->getPrefixedText(), $nt->getPrefixedText() );
 
                $newid = $nt->getArticleID();
                $oldid = $this->getArticleID();
                $dbw =& wfGetDB( DB_MASTER );
                $now = $dbw->timestamp();
-               $won = wfInvertTimestamp( wfTimestamp(TS_MW,$now) );
                wfSeedRandom();
                $rand = wfRandom();
 
@@ -1555,8 +1619,8 @@ class Title {
                        'rev_comment' => $comment,
                        'rev_user' => $wgUser->getID(),
                        'rev_user_text' => $wgUser->getName(),
-                       'rev_timestamp' => $now,
-                       'inverse_timestamp' => $won ), $fname
+                       'rev_timestamp' => $now
+                       ), $fname
                );
                $revid = $dbw->insertId();
                $dbw->insert( 'text', array(
@@ -1578,8 +1642,16 @@ class Title {
                $newid = $dbw->insertId();
                $wgLinkCache->clearLink( $this->getPrefixedDBkey() );
 
+               // attach revision to the new page
+               $dbw->update( 'revision', array('rev_page' => $newid), array('rev_id' => $revid), $fname);
+
                # Record in RC
-               RecentChange::notifyMoveToNew( $now, $this, $nt, $wgUser, $comment );
+               // Replaced by a log entry
+               // RecentChange::notifyMoveToNew( $now, $this, $nt, $wgUser, $comment );
+
+               # Log the move
+               $log = new LogPage( 'move' );
+               $log->addEntry( 'move', $this, '', array(1 => $nt->getText()) );
 
                # Purge squid and linkscc as per article creation
                Article::onArticleCreate( $nt );
@@ -1673,7 +1745,6 @@ class Title {
                $fname = 'Title::createRedirect';
                $dbw =& wfGetDB( DB_MASTER );
                $now = wfTimestampNow();
-               $won = wfInvertTimestamp( $now );
                
                $seqVal = $dbw->nextSequenceValue( 'page_page_id_seq' );
                $dbw->insert( 'page', array(
@@ -1702,7 +1773,6 @@ class Title {
                        'rev_user' => $wgUser->getID(),
                        'rev_user_text' => $wgUser->getName(),
                        'rev_timestamp' => $now,
-                       'inverse_timestamp' => $won,
                ), $fname );
                
                $dbw->update( 'page',
@@ -1844,6 +1914,97 @@ class Title {
                        'rev_page=' . IntVal( $this->getArticleId() ) .
                        ' AND rev_id>' . IntVal( $revision ) . ' ORDER BY rev_id' );
        }
+       
+       /**
+        * Compare with another title.
+        *
+        * @param Title $title
+        * @return bool
+        */
+       function equals( &$title ) {
+               return $this->getInterwiki() == $title->getInterwiki()
+                       && $this->getNamespace() == $title->getNamespace()
+                       && $this->getDbkey() == $title->getDbkey();
+       }
+
+       /**
+        * Convert Windows-1252 extended codepoints to their real Unicode points.
+        * @param int $codepoint
+        * @return int
+        * @access private
+        */
+       function cp1252toUnicode( $codepoint ) {
+               # Mappings from:
+               # http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
+               static $cp1252 = array(
+                       0x80 => 0x20AC, #EURO SIGN
+                       0x81 => UNICODE_REPLACEMENT,
+                       0x82 => 0x201A, #SINGLE LOW-9 QUOTATION MARK
+                       0x83 => 0x0192, #LATIN SMALL LETTER F WITH HOOK
+                       0x84 => 0x201E, #DOUBLE LOW-9 QUOTATION MARK
+                       0x85 => 0x2026, #HORIZONTAL ELLIPSIS
+                       0x86 => 0x2020, #DAGGER
+                       0x87 => 0x2021, #DOUBLE DAGGER
+                       0x88 => 0x02C6, #MODIFIER LETTER CIRCUMFLEX ACCENT
+                       0x89 => 0x2030, #PER MILLE SIGN
+                       0x8A => 0x0160, #LATIN CAPITAL LETTER S WITH CARON
+                       0x8B => 0x2039, #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+                       0x8C => 0x0152, #LATIN CAPITAL LIGATURE OE
+                       0x8D => UNICODE_REPLACEMENT,
+                       0x8E => 0x017D, #LATIN CAPITAL LETTER Z WITH CARON
+                       0x8F => UNICODE_REPLACEMENT,
+                       0x90 => UNICODE_REPLACEMENT,
+                       0x91 => 0x2018, #LEFT SINGLE QUOTATION MARK
+                       0x92 => 0x2019, #RIGHT SINGLE QUOTATION MARK
+                       0x93 => 0x201C, #LEFT DOUBLE QUOTATION MARK
+                       0x94 => 0x201D, #RIGHT DOUBLE QUOTATION MARK
+                       0x95 => 0x2022, #BULLET
+                       0x96 => 0x2013, #EN DASH
+                       0x97 => 0x2014, #EM DASH
+                       0x98 => 0x02DC, #SMALL TILDE
+                       0x99 => 0x2122, #TRADE MARK SIGN
+                       0x9A => 0x0161, #LATIN SMALL LETTER S WITH CARON
+                       0x9B => 0x203A, #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+                       0x9C => 0x0153, #LATIN SMALL LIGATURE OE
+                       0x9D => UNICODE_REPLACEMENT,
+                       0x9E => 0x017E, #LATIN SMALL LETTER Z WITH CARON
+                       0x9F => 0x0178, #LATIN CAPITAL LETTER Y WITH DIAERESIS
+                       );
+               return isset( $cp1252[$codepoint] )
+                       ? $cp1252[$codepoint]
+                       : $codepoint;
+       }
+       
+       /**
+        * HACKHACKHACK
+        * Take a string containing a mix of CP1252 characters and UTF-8 and try
+        * to convert it completely to UTF-8.
+        *
+        * @param string $string
+        * @return string
+        * @access private
+        */
+       function mergeLatin1Utf8( $string ) {
+               return preg_replace_callback(
+                       # Windows CP1252 extends ISO-8859-1 by putting extra characters
+                       # into the high control chars area. We have to convert these
+                       # to their proper Unicode counterparts.
+                       '/([\x80-\x9f])/u',
+                       create_function( '$matches',
+                               'return codepointToUtf8(
+                                       Title::cp1252toUnicode(
+                                               utf8ToCodepoint( $matches[1] ) ) );' ),
+                       preg_replace_callback(
+                               # Up-convert everything from 8-bit to UTF-8, then
+                               # filter the valid-looking UTF-8 back from the
+                               # double-converted form.
+                               '/((?:[\xc0-\xdf][\x80-\xbf]
+                                        |[\xe0-\xef][\x80-\xbf]{2}
+                                        |[\xf0-\xf7][\x80-\xbf]{3})+)/ux',
+                               create_function( '$matches',
+                                       'return utf8_decode( $matches[1] );' ),
+                               utf8_encode( $string ) ) );
+       }
 
 }
 ?>