[SPIP] ~maj v3.2.9-->v3.2.11
[lhc/web/www.git] / www / ecrire / inc / charsets.php
index 7dbd178..98036d6 100644 (file)
@@ -404,20 +404,26 @@ function charset2unicode($texte, $charset = 'AUTO' /* $forcer: obsolete*/) {
                default:
                        // mbstring presente ?
                        if (init_mb_string()) {
-                               if ($order = mb_detect_order() # mb_string connait-il $charset?
-                                       and mb_detect_order($charset)
-                               ) {
-                                       $s = mb_convert_encoding($texte, 'utf-8', $charset);
-                                       if ($s && $s != $texte) {
-                                               return utf_8_to_unicode($s);
+                               $order = mb_detect_order();
+                               try {
+                                       # mb_string connait-il $charset?
+                                       if ($order and mb_detect_order($charset)) {
+                                               $s = mb_convert_encoding($texte, 'utf-8', $charset);
+                                               if ($s && $s != $texte) {
+                                                       return utf_8_to_unicode($s);
+                                               }
                                        }
-                               }
+                                       
+                               } catch (\Exception $e) {
+                                       // Le charset n'existe probablement pas
+                               } 
                                mb_detect_order($order); # remettre comme precedemment
                        }
 
                        // Sinon, peut-etre connaissons-nous ce charset ?
                        if (!isset($trans[$charset])) {
-                               if ($cset = load_charset($charset)
+                               if (
+                                       $cset = load_charset($charset)
                                        and is_array($GLOBALS['CHARSET'][$cset])
                                ) {
                                        foreach ($GLOBALS['CHARSET'][$cset] as $key => $val) {
@@ -425,7 +431,7 @@ function charset2unicode($texte, $charset = 'AUTO' /* $forcer: obsolete*/) {
                                        }
                                }
                        }
-                       if (count($trans[$charset])) {
+                       if (isset($trans[$charset]) and count($trans[$charset])) {
                                return str_replace(array_keys($trans[$charset]), array_values($trans[$charset]), $texte);
                        }
 
@@ -831,7 +837,7 @@ function javascript_to_binary($texte) {
  * @return string
  */
 function translitteration_rapide($texte, $charset = 'AUTO', $complexe = '') {
-       static $trans;
+       static $trans = [];
        if ($charset == 'AUTO') {
                $charset = $GLOBALS['meta']['charset'];
        }
@@ -842,7 +848,8 @@ function translitteration_rapide($texte, $charset = 'AUTO', $complexe = '') {
        $table_translit = 'translit' . $complexe;
 
        // 2. Translitterer grace a la table predefinie
-       if (!$trans[$complexe]) {
+       if (!isset($trans[$complexe])) {
+               $trans[$complexe] = [];
                load_charset($table_translit);
                foreach ($GLOBALS['CHARSET'][$table_translit] as $key => $val) {
                        $trans[$complexe][caractere_utf_8($key)] = $val;
@@ -897,8 +904,11 @@ function translitteration_complexe($texte, $chiffres = false) {
        $texte = translitteration($texte, 'AUTO', 'complexe');
 
        if ($chiffres) {
-               $texte = preg_replace("/[aeiuoyd]['`?~.^+(-]{1,2}/eS",
-                       "translitteration_chiffree('\\0')", $texte);
+               $texte = preg_replace_callback(
+                       "/[aeiuoyd]['`?~.^+(-]{1,2}/S",
+                       function($m) { return translitteration_chiffree($m[0]); },
+                       $texte
+               );
        }
 
        return $texte;
@@ -994,34 +1004,29 @@ function transcoder_page($texte, $headers = '') {
                return $texte;
        }
 
-       // Reconnaitre le BOM utf-8 (0xEFBBBF)
        if (bom_utf8($texte)) {
+               // Reconnaitre le BOM utf-8 (0xEFBBBF)
                $charset = 'utf-8';
                $texte = substr($texte, 3);
-       } // charset precise par le contenu (xml)
-       else {
-               if (preg_match(
-                       ',<[?]xml[^>]*encoding[^>]*=[^>]*([-_a-z0-9]+?),UimsS', $texte, $regs)) {
-                       $charset = trim(strtolower($regs[1]));
-               } // charset precise par le contenu (html)
-               else {
-                       if (preg_match(
-                                       ',<(meta|html|body)[^>]*charset[^>]*=[^>]*([-_a-z0-9]+?),UimsS',
-                                       $texte, $regs)
-                               # eviter #CHARSET des squelettes
-                               and (($tmp = trim(strtolower($regs[2]))) != 'charset')
-                       ) {
-                               $charset = $tmp;
-                       } // charset de la reponse http
-                       else {
-                               if (preg_match(',charset=([-_a-z0-9]+),i', $headers, $regs)) {
-                                       $charset = trim(strtolower($regs[1]));
-                               } else {
-                                       $charset = '';
-                               }
-                       }
-               }
+       } elseif (preg_match(',<[?]xml[^>]*encoding[^>]*=[^>]*([-_a-z0-9]+?),UimsS', $texte, $regs)) {
+               // charset precise par le contenu (xml)
+               $charset = trim(strtolower($regs[1]));
+       } elseif (
+               // charset precise par le contenu (html)
+               preg_match(',<(meta|html|body)[^>]*charset[^>]*=[^>]*([#-_a-z0-9]+?),UimsS', $texte, $regs)
+               # eviter toute balise SPIP tel que #CHARSET ou #CONFIG d'un squelette
+               and false === strpos($regs[2], '#')
+               and $tmp = trim(strtolower($regs[2]))
+       ) {
+               $charset = $tmp;
+       } elseif (preg_match(',charset=([-_a-z0-9]+),i', $headers, $regs)) {
+               // charset de la reponse http
+               $charset = trim(strtolower($regs[1]));
+       } else {
+               $charset = '';
        }
+
+
        // normaliser les noms du shif-jis japonais
        if (preg_match(',^(x|shift)[_-]s?jis$,i', $charset)) {
                $charset = 'shift-jis';