3 * Unicode normalization routines
5 * Copyright © 2004 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
28 * @defgroup UtfNormal UtfNormal
31 use UtfNormal\Validator
;
34 * Unicode normalization routines for working with UTF-8 strings.
35 * Currently assumes that input strings are valid UTF-8!
37 * Not as fast as I'd like, but should be usable for most purposes.
38 * UtfNormal::toNFC() will bail early if given ASCII text or text
39 * it can quickly determine is already normalized.
41 * All functions can be called static.
43 * See description of forms at https://www.unicode.org/reports/tr15/
45 * @deprecated since 1.25, use UtfNormal\Validator directly
50 * The ultimate convenience function! Clean up invalid UTF-8 sequences,
51 * and convert to normal form C, canonical composition.
53 * Fast return for pure ASCII strings; some lesser optimizations for
54 * strings containing only known-good characters. Not as fast as toNFC().
56 * @param string $string a UTF-8 string
57 * @return string a clean, shiny, normalized UTF-8 string
59 static function cleanUp( $string ) {
60 wfDeprecated( __METHOD__
, '1.25' );
61 return Validator
::cleanUp( $string );
65 * Convert a UTF-8 string to normal form C, canonical composition.
66 * Fast return for pure ASCII strings; some lesser optimizations for
67 * strings containing only known-good characters.
69 * @param string $string a valid UTF-8 string. Input is not validated.
70 * @return string a UTF-8 string in normal form C
72 static function toNFC( $string ) {
73 wfDeprecated( __METHOD__
, '1.25' );
74 return Validator
::toNFC( $string );
78 * Convert a UTF-8 string to normal form D, canonical decomposition.
79 * Fast return for pure ASCII strings.
81 * @param string $string a valid UTF-8 string. Input is not validated.
82 * @return string a UTF-8 string in normal form D
84 static function toNFD( $string ) {
85 wfDeprecated( __METHOD__
, '1.25' );
86 return Validator
::toNFD( $string );
90 * Convert a UTF-8 string to normal form KC, compatibility composition.
91 * This may cause irreversible information loss, use judiciously.
92 * Fast return for pure ASCII strings.
94 * @param string $string a valid UTF-8 string. Input is not validated.
95 * @return string a UTF-8 string in normal form KC
97 static function toNFKC( $string ) {
98 wfDeprecated( __METHOD__
, '1.25' );
99 return Validator
::toNFKC( $string );
103 * Convert a UTF-8 string to normal form KD, compatibility decomposition.
104 * This may cause irreversible information loss, use judiciously.
105 * Fast return for pure ASCII strings.
107 * @param string $string a valid UTF-8 string. Input is not validated.
108 * @return string a UTF-8 string in normal form KD
110 static function toNFKD( $string ) {
111 wfDeprecated( __METHOD__
, '1.25' );
112 return Validator
::toNFKD( $string );
116 * Returns true if the string is _definitely_ in NFC.
117 * Returns false if not or uncertain.
118 * @param string $string a valid UTF-8 string. Input is not validated.
121 static function quickIsNFC( $string ) {
122 wfDeprecated( __METHOD__
, '1.25' );
123 return Validator
::quickIsNFC( $string );
127 * Returns true if the string is _definitely_ in NFC.
128 * Returns false if not or uncertain.
129 * @param string &$string a UTF-8 string, altered on output to be valid UTF-8 safe for XML.
132 static function quickIsNFCVerify( &$string ) {
133 wfDeprecated( __METHOD__
, '1.25' );
134 return Validator
::quickIsNFCVerify( $string );