<?php
+# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
+# http://www.mediawiki.org/
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# http://www.gnu.org/copyleft/gpl.html
+
+/**
+ * Additional tests for UtfNormal::cleanUp() function, inclusion
+ * regression checks for known problems.
+ *
+ * Requires PHPUnit.
+ *
+ * @package UtfNormal
+ * @access private
+ */
+
+if( php_sapi_name() != 'cli' ) {
+ die( "Run me from the command line please.\n" );
+}
+
/** */
if( isset( $_SERVER['argv'] ) && in_array( '--icu', $_SERVER['argv'] ) ) {
dl( 'php_utfnormal.so' );
bin2hex( $expect ),
bin2hex( UtfNormal::cleanUp( $text ) ) );
}
+
+ function testHangulRegression() {
+ $text = "\xed\x9c\xaf" . # Hangul char
+ "\xe1\x87\x81"; # followed by another final jamo
+ $expect = $text; # Should *not* change.
+ $this->assertEquals(
+ bin2hex( $expect ),
+ bin2hex( UtfNormal::cleanUp( $text ) ) );
+ }
}
$result = PHPUnit::run( $suite );
echo $result->toString();
+if( !$result->wasSuccessful() ) {
+ exit( -1 );
+}
+exit( 0 );
?>
\ No newline at end of file
$len = strlen( $string );
$out = '';
$lastClass = -1;
+ $lastHangul = 0;
$startChar = '';
$combining = '';
$x1 = ord(substr(UTF8_HANGUL_VBASE,0,1));
$combining .= $c;
}
$lastClass = $class;
+ $lastHangul = 0;
continue;
}
}
if( $lastClass == 0 ) {
if( isset( $utfCanonicalComp[$pair] ) ) {
$startChar = $utfCanonicalComp[$pair];
+ $lastHangul = 0;
continue;
}
if( $n >= $x1 && $n <= $x2 ) {
$startChar = chr( $hangulPoint >> 12 & 0x0f | 0xe0 ) .
chr( $hangulPoint >> 6 & 0x3f | 0x80 ) .
chr( $hangulPoint & 0x3f | 0x80 );
+ $lastHangul = 0;
continue;
} elseif( $c >= UTF8_HANGUL_TBASE &&
$c <= UTF8_HANGUL_TEND &&
$startChar >= UTF8_HANGUL_FIRST &&
- $startChar <= UTF8_HANGUL_LAST ) {
+ $startChar <= UTF8_HANGUL_LAST &&
+ !$lastHangul ) {
# $tIndex = utf8ToCodepoint( $c ) - UNICODE_HANGUL_TBASE;
$tIndex = ord( $c{2} ) - 0xa7;
if( $tIndex < 0 ) $tIndex = ord( $c{2} ) - 0x80 + (0x11c0 - 0x11a7);
$startChar{1} = chr( $mid );
}
$startChar{2} = chr( $tail );
+
+ # If there's another jamo char after this, *don't* try to merge it.
+ $lastHangul = 1;
continue;
}
}
$startChar = $c;
$combining = '';
$lastClass = 0;
+ $lastHangul = 0;
}
$out .= $startChar . $combining;
return $out;