$textField = $prefix . 'text';
$flagsField = $prefix . 'flags';
- if ( isset( $row->$flagsField ) ) {
+ if( isset( $row->$flagsField ) ) {
$flags = explode( ',', $row->$flagsField );
} else {
$flags = array();
}
- if ( isset( $row->$textField ) ) {
+ if( isset( $row->$textField ) ) {
$text = $row->$textField;
} else {
return false;
}
- if ( in_array( 'link', $flags ) ) {
- # Handle link type
- $text = Article::followLink( $text );
- } elseif ( in_array( 'gzip', $flags ) ) {
+ if( in_array( 'gzip', $flags ) ) {
# Deal with optional compression of archived pages.
# This can be done periodically via maintenance/compressOld.php, and
# as pages are saved if $wgCompressRevisions is set.
- return gzinflate( $text );
+ $text = gzinflate( $text );
+ }
+
+ global $wgLegacyEncoding;
+ if( $wgLegacyEncoding && !in_array( 'utf-8', $flags ) ) {
+ # Old revisions kept around in a legacy encoding?
+ # Upconvert on demand.
+ global $wgInputEncoding, $wgContLang;
+ $text = $wgContLang->iconv( $wgLegacyEncoding, $wgInputEncoding, $text );
+ }
+
+ if( in_array( 'link', $flags ) ) {
+ # Handle link type
+ $text = Article::followLink( $text );
}
return $text;
}
/**
- * If $wgCompressRevisions is enabled, we will compress datas
+ * If $wgCompressRevisions is enabled, we will compress data.
+ * The input string is modified in place.
+ * Return value is the flags field: contains 'gzip' if the
+ * data is compressed, and 'utf-8' if we're saving in UTF-8
+ * mode.
+ *
* @static
* @param mixed $text reference to a text
- * @return string 'gzip' if it get compressed, '' overwise
+ * @return string
*/
function compressRevisionText( &$text ) {
- global $wgCompressRevisions;
- if( !$wgCompressRevisions ) {
- return '';
- }
- if( !function_exists( 'gzdeflate' ) ) {
- wfDebug( "Article::compressRevisionText() -- no zlib support, not compressing\n" );
- return '';
+ global $wgCompressRevisions, $wgUseLatin1;
+ $flags = array();
+ if( !$wgUseLatin1 ) {
+ # Revisions not marked this way will be converted
+ # on load if $wgLegacyCharset is set in the future.
+ $flags[] = 'utf-8';
+ }
+ if( $wgCompressRevisions ) {
+ if( function_exists( 'gzdeflate' ) ) {
+ $text = gzdeflate( $text );
+ $flags[] = 'gzip';
+ } else {
+ wfDebug( "Article::compressRevisionText() -- no zlib support, not compressing\n" );
+ }
}
- $text = gzdeflate( $text );
- return 'gzip';
+ return implode( ',', $flags );
}
/**
$wgOutputEncoding = 'ISO-8859-1'; # unless you set the next option to true:
$wgUseLatin1 = false; # Enable ISO-8859-1 compatibility mode
$wgEditEncoding = '';
+
+# Set this to eg 'ISO-8859-1' to perform character set
+# conversion when loading old revisions not marked with
+# "utf-8" flag. Use this when converting wiki to UTF-8
+# without the burdensome mass conversion of old text data.
+#
+# NOTE! This DOES NOT touch any fields other than old_text.
+# Titles, comments, user names, etc still must be converted
+# en masse in the database before continuing as a UTF-8 wiki.
+$wgLegacyEncoding = false;
+
$wgMimeType = 'text/html';
$wgDocType = '-//W3C//DTD XHTML 1.0 Transitional//EN';
$wgDTD = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd';