}
function getDiff( $otext, $ntext, $otitle, $ntitle ) {
- global $wgUseExternalDiffEngine;
+ global $wgUseExternalDiffEngine, $wgContLang;
$out = "
<table border='0' width='98%' cellpadding='0' cellspacing='4' class='diff'>
<tr>
<td colspan='2' width='50%' align='center' class='diff-ntitle'>{$ntitle}</td>
</tr>
";
-
+ $otext = $wgContLang->segmentForDiff($otext);
+ $ntext = $wgContLang->segmentForDiff($ntext);
+ $difftext='';
if ( $wgUseExternalDiffEngine ) {
# For historical reasons, external diff engine expects
# input text to be HTML-escaped already
if( !function_exists( 'wikidiff_do_diff' ) ) {
dl('php_wikidiff.so');
}
- $out .= wikidiff_do_diff( $otext, $ntext, 2 );
+ $difftext = wikidiff_do_diff( $otext, $ntext, 2 );
} else {
$ota = explode( "\n", str_replace( "\r\n", "\n", $otext ) );
$nta = explode( "\n", str_replace( "\r\n", "\n", $ntext ) );
$diffs =& new Diff( $ota, $nta );
$formatter =& new TableDiffFormatter();
- $out .= $formatter->format( $diffs );
+ $difftext = $formatter->format( $diffs );
}
- $out .= "</table>\n";
+ $difftext = $wgContLang->unsegmentForDiff($difftext);
+ $out .= $difftext."</table>\n";
return $out;
}
return $word;
}
-
+ # languages like Chinese need to be segmented in order for the diff
+ # to be of any use
+ function segmentForDiff( $text ) {
+ return $text;
+ }
+ # and unsegment to show the result
+ function unsegmentForDiff( $text ) {
+ return $text;
+ }
+
# convert text to different variants of a language. the automatic
# conversion is done in autoConvert(). here we parse the text
# marked with -{}-, which specifies special conversions of the
return $this->lang->getPreferredVariant();
}
+ function segmentForDiff( $text ) {
+ return $text;
+ }
+
+ function unsegmentForDiff( $text ) {
+ return $text;
+ }
+
function convert( $text, $isTitle=false ) {
return utf8_decode( $this->lang->convert( utf8_encode( $text ), $isTitle ) );
}
return $this->mZhLanguageCode;
}
-
+ # this should give much better diff info
+ function segmentForDiff( $text ) {
+ return preg_replace(
+ "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+ "' ' .\"$1\"", $text);
+ }
+
+ function unsegmentForDiff( $text ) {
+ return preg_replace(
+ "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+ "\"$1\"", $text);
+ }
+
+
function autoConvert($text, $toVariant=false) {
if(!$toVariant)