* @access public
*/
function getFullURL( $query = '' ) {
- global $wgContLang, $wgArticlePath, $wgServer, $wgScript;
+ global $wgContLang, $wgServer, $wgScript;
if ( '' == $this->mInterwiki ) {
- $p = $wgArticlePath;
return $wgServer . $this->getLocalUrl( $query );
} else {
$baseUrl = $this->getInterwikiLink( $this->mInterwiki );
}
}
+ /**
+ * Get a relative directory for putting an HTML version of this article into
+ */
+ function getHashedDirectory() {
+ $dbkey = $this->getPrefixedDBkey();
+ if ( strlen( $dbkey ) < 2 ) {
+ $dbkey = sprintf( "%2s", $dbkey );
+ }
+ $dir = '';
+ for ( $i=0; $i<=1; $i++ ) {
+ if ( $i ) {
+ $dir .= '/';
+ }
+ if ( ord( $dbkey{$i} ) < 128 && ord( $dbkey{$i} ) > 32 ) {
+ $dir .= strtolower( $dbkey{$i} );
+ } else {
+ $dir .= sprintf( "%02X", ord( $dbkey{$i} ) );
+ }
+ }
+ return $dir;
+ }
+
+ function getHashedFilename() {
+ $dbkey = $this->getPrefixedDBkey();
+ $dir = $this->getHashedDirectory();
+ $friendlyName = strtr( $dbkey, '/\\:*?"<>|', '_________' );
+ return "$dir/$friendlyName.html";
+ }
+
/**
* Get a URL with no fragment or server name
* @param string $query an optional query string; if not specified,
* @access public
*/
function getLocalURL( $query = '' ) {
- global $wgLang, $wgArticlePath, $wgScript;
+ global $wgLang, $wgArticlePath, $wgScript, $wgMakeDumpLinks;
if ( $this->isExternal() ) {
return $this->getFullURL();
}
-
+
$dbkey = wfUrlencode( $this->getPrefixedDBkey() );
- if ( $query == '' ) {
+ if ( $wgMakeDumpLinks ) {
+ $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename() ), $wgArticlePath );
+ } elseif ( $query == '' ) {
$url = str_replace( '$1', $dbkey, $wgArticlePath );
} else {
if( preg_match( '/^(.*&|)action=([^&]*)(&(.*)|)$/', $query, $matches ) ) {
return false;
}
- global $wgUseLatin1;
- if( !$wgUseLatin1 && false !== strpos( $t, UTF8_REPLACEMENT ) ) {
+ if( false !== strpos( $t, UTF8_REPLACEMENT ) ) {
# Contained illegal UTF-8 sequences or forbidden Unicode chars.
wfProfileOut( $fname );
return false;
return false;
}
- if( $wgUseLatin1 && $this->mInterwiki != '' ) {
- # On a Latin-1 wiki, numbered character entities may have
- # left us with a mix of 8-bit and UTF-8 characters, and
- # some of those might be Windows-1252 special chars.
- # Normalize interwikis to pure UTF-8.
- $t = Title::mergeLatin1Utf8( $t );
- }
-
# Fill fields
$this->mDbkeyform = $t;
$this->mUrlform = wfUrlencode( $t );
$sql .= "($id, $oldid)";
}
- $dbw->query( $sql, DB_MASTER, $fname );
+ $dbw->query( $sql, $fname );
}
# Now, we record the link from the redirect to the new title.
&& $this->getDbkey() == $title->getDbkey();
}
- /**
- * Convert Windows-1252 extended codepoints to their real Unicode points.
- * @param int $codepoint
- * @return int
- * @access private
- */
- function cp1252toUnicode( $codepoint ) {
- # Mappings from:
- # http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
- static $cp1252 = array(
- 0x80 => 0x20AC, #EURO SIGN
- 0x81 => UNICODE_REPLACEMENT,
- 0x82 => 0x201A, #SINGLE LOW-9 QUOTATION MARK
- 0x83 => 0x0192, #LATIN SMALL LETTER F WITH HOOK
- 0x84 => 0x201E, #DOUBLE LOW-9 QUOTATION MARK
- 0x85 => 0x2026, #HORIZONTAL ELLIPSIS
- 0x86 => 0x2020, #DAGGER
- 0x87 => 0x2021, #DOUBLE DAGGER
- 0x88 => 0x02C6, #MODIFIER LETTER CIRCUMFLEX ACCENT
- 0x89 => 0x2030, #PER MILLE SIGN
- 0x8A => 0x0160, #LATIN CAPITAL LETTER S WITH CARON
- 0x8B => 0x2039, #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
- 0x8C => 0x0152, #LATIN CAPITAL LIGATURE OE
- 0x8D => UNICODE_REPLACEMENT,
- 0x8E => 0x017D, #LATIN CAPITAL LETTER Z WITH CARON
- 0x8F => UNICODE_REPLACEMENT,
- 0x90 => UNICODE_REPLACEMENT,
- 0x91 => 0x2018, #LEFT SINGLE QUOTATION MARK
- 0x92 => 0x2019, #RIGHT SINGLE QUOTATION MARK
- 0x93 => 0x201C, #LEFT DOUBLE QUOTATION MARK
- 0x94 => 0x201D, #RIGHT DOUBLE QUOTATION MARK
- 0x95 => 0x2022, #BULLET
- 0x96 => 0x2013, #EN DASH
- 0x97 => 0x2014, #EM DASH
- 0x98 => 0x02DC, #SMALL TILDE
- 0x99 => 0x2122, #TRADE MARK SIGN
- 0x9A => 0x0161, #LATIN SMALL LETTER S WITH CARON
- 0x9B => 0x203A, #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
- 0x9C => 0x0153, #LATIN SMALL LIGATURE OE
- 0x9D => UNICODE_REPLACEMENT,
- 0x9E => 0x017E, #LATIN SMALL LETTER Z WITH CARON
- 0x9F => 0x0178, #LATIN CAPITAL LETTER Y WITH DIAERESIS
- );
- return isset( $cp1252[$codepoint] )
- ? $cp1252[$codepoint]
- : $codepoint;
- }
-
- /**
- * HACKHACKHACK
- * Take a string containing a mix of CP1252 characters and UTF-8 and try
- * to convert it completely to UTF-8.
- *
- * @param string $string
- * @return string
- * @access private
- */
- function mergeLatin1Utf8( $string ) {
- return preg_replace_callback(
- # Windows CP1252 extends ISO-8859-1 by putting extra characters
- # into the high control chars area. We have to convert these
- # to their proper Unicode counterparts.
- '/([\x80-\x9f])/u',
- create_function( '$matches',
- 'return codepointToUtf8(
- Title::cp1252toUnicode(
- utf8ToCodepoint( $matches[1] ) ) );' ),
- preg_replace_callback(
- # Up-convert everything from 8-bit to UTF-8, then
- # filter the valid-looking UTF-8 back from the
- # double-converted form.
- '/((?:[\xc0-\xdf][\x80-\xbf]
- |[\xe0-\xef][\x80-\xbf]{2}
- |[\xf0-\xf7][\x80-\xbf]{3})+)/ux',
- create_function( '$matches',
- 'return utf8_decode( $matches[1] );' ),
- utf8_encode( $string ) ) );
- }
-
}
?>