[SPIP] ~maj v3.2.9-->v3.2.11

[lhc/web/www.git] / www / ecrire / inc / charsets.php
diff --git a/www/ecrire/inc/charsets.php b/www/ecrire/inc/charsets.php

index 7dbd178..98036d6 100644 (file)
--- a/www/ecrire/inc/charsets.php
+++ b/www/ecrire/inc/charsets.php
@@ -404,20 +404,26 @@ function charset2unicode($texte, $charset = 'AUTO' /* $forcer: obsolete*/) {
                 default:
                         // mbstring presente ?
                         if (init_mb_string()) {
-                               if ($order = mb_detect_order() # mb_string connait-il $charset?
-                                       and mb_detect_order($charset)
-                               ) {
-                                       $s = mb_convert_encoding($texte, 'utf-8', $charset);
-                                       if ($s && $s != $texte) {
-                                               return utf_8_to_unicode($s);
+                               $order = mb_detect_order();
+                               try {
+                                       # mb_string connait-il $charset?
+                                       if ($order and mb_detect_order($charset)) {
+                                               $s = mb_convert_encoding($texte, 'utf-8', $charset);
+                                               if ($s && $s != $texte) {
+                                                       return utf_8_to_unicode($s);
+                                               }
                                         }
-                               }
+                                       
+                               } catch (\Exception $e) {
+                                       // Le charset n'existe probablement pas
+                               } 
                                 mb_detect_order($order); # remettre comme precedemment
                         }
  
                         // Sinon, peut-etre connaissons-nous ce charset ?
                         if (!isset($trans[$charset])) {
-                               if ($cset = load_charset($charset)
+                               if (
+                                       $cset = load_charset($charset)
                                         and is_array($GLOBALS['CHARSET'][$cset])
                                 ) {
                                         foreach ($GLOBALS['CHARSET'][$cset] as $key => $val) {
@@ -425,7 +431,7 @@ function charset2unicode($texte, $charset = 'AUTO' /* $forcer: obsolete*/) {
                                         }
                                 }
                         }
-                       if (count($trans[$charset])) {
+                       if (isset($trans[$charset]) and count($trans[$charset])) {
                                 return str_replace(array_keys($trans[$charset]), array_values($trans[$charset]), $texte);
                         }
  
@@ -831,7 +837,7 @@ function javascript_to_binary($texte) {
   * @return string
   */
  function translitteration_rapide($texte, $charset = 'AUTO', $complexe = '') {
-       static $trans;
+       static $trans = [];
         if ($charset == 'AUTO') {
                 $charset = $GLOBALS['meta']['charset'];
         }
@@ -842,7 +848,8 @@ function translitteration_rapide($texte, $charset = 'AUTO', $complexe = '') {
         $table_translit = 'translit' . $complexe;
  
         // 2. Translitterer grace a la table predefinie
-       if (!$trans[$complexe]) {
+       if (!isset($trans[$complexe])) {
+               $trans[$complexe] = [];
                 load_charset($table_translit);
                 foreach ($GLOBALS['CHARSET'][$table_translit] as $key => $val) {
                         $trans[$complexe][caractere_utf_8($key)] = $val;
@@ -897,8 +904,11 @@ function translitteration_complexe($texte, $chiffres = false) {
         $texte = translitteration($texte, 'AUTO', 'complexe');
  
         if ($chiffres) {
-               $texte = preg_replace("/[aeiuoyd]['`?~.^+(-]{1,2}/eS",
-                       "translitteration_chiffree('\\0')", $texte);
+               $texte = preg_replace_callback(
+                       "/[aeiuoyd]['`?~.^+(-]{1,2}/S",
+                       function($m) { return translitteration_chiffree($m[0]); },
+                       $texte
+               );
         }
  
         return $texte;
@@ -994,34 +1004,29 @@ function transcoder_page($texte, $headers = '') {
                 return $texte;
         }
  
-       // Reconnaitre le BOM utf-8 (0xEFBBBF)
         if (bom_utf8($texte)) {
+               // Reconnaitre le BOM utf-8 (0xEFBBBF)
                 $charset = 'utf-8';
                 $texte = substr($texte, 3);
-       } // charset precise par le contenu (xml)
-       else {
-               if (preg_match(
-                       ',<[?]xml[^>]*encoding[^>]*=[^>]*([-_a-z0-9]+?),UimsS', $texte, $regs)) {
-                       $charset = trim(strtolower($regs[1]));
-               } // charset precise par le contenu (html)
-               else {
-                       if (preg_match(
-                                       ',<(meta|html|body)[^>]*charset[^>]*=[^>]*([-_a-z0-9]+?),UimsS',
-                                       $texte, $regs)
-                               # eviter #CHARSET des squelettes
-                               and (($tmp = trim(strtolower($regs[2]))) != 'charset')
-                       ) {
-                               $charset = $tmp;
-                       } // charset de la reponse http
-                       else {
-                               if (preg_match(',charset=([-_a-z0-9]+),i', $headers, $regs)) {
-                                       $charset = trim(strtolower($regs[1]));
-                               } else {
-                                       $charset = '';
-                               }
-                       }
-               }
+       } elseif (preg_match(',<[?]xml[^>]*encoding[^>]*=[^>]*([-_a-z0-9]+?),UimsS', $texte, $regs)) {
+               // charset precise par le contenu (xml)
+               $charset = trim(strtolower($regs[1]));
+       } elseif (
+               // charset precise par le contenu (html)
+               preg_match(',<(meta|html|body)[^>]*charset[^>]*=[^>]*([#-_a-z0-9]+?),UimsS', $texte, $regs)
+               # eviter toute balise SPIP tel que #CHARSET ou #CONFIG d'un squelette
+               and false === strpos($regs[2], '#')
+               and $tmp = trim(strtolower($regs[2]))
+       ) {
+               $charset = $tmp;
+       } elseif (preg_match(',charset=([-_a-z0-9]+),i', $headers, $regs)) {
+               // charset de la reponse http
+               $charset = trim(strtolower($regs[1]));
+       } else {
+               $charset = '';
         }
+
+
         // normaliser les noms du shif-jis japonais
         if (preg_match(',^(x|shift)[_-]s?jis$,i', $charset)) {
                 $charset = 'shift-jis';