www/ecrire/inc/xml.php

   1 <?php
   2
   3 /***************************************************************************\
   4  *  SPIP, Systeme de publication pour l'internet                           *
   5  *                                                                         *
   6  *  Copyright (c) 2001-2019                                                *
   7  *  Arnaud Martin, Antoine Pitrou, Philippe Riviere, Emmanuel Saint-James  *
   8  *                                                                         *
   9  *  Ce programme est un logiciel libre distribue sous licence GNU/GPL.     *
  10  *  Pour plus de details voir le fichier COPYING.txt ou l'aide en ligne.   *
  11 \***************************************************************************/
  12
  13 /**
  14  * Outils pour lecture de XML
  15  *
  16  * @package SPIP\Core\XML
  17  **/
  18
  19 if (!defined('_ECRIRE_INC_VERSION')) {
  20         return;
  21 }
  22
  23
  24 /**
  25  * Lit un fichier xml donné et renvoie son arbre.
  26  *
  27  * @example
  28  *     ```
  29  *     include_spip('inc/xml');
  30  *     $arbre = spip_xml_load(_DIR_PLUGINS . "$plug/plugin.xml");
  31  *     ```
  32  *
  33  * @uses spip_xml_parse()
  34  *
  35  * @param string $fichier
  36  *     Chemin local ou URL distante du fichier XML
  37  * @param bool $strict
  38  *     true pour râler si une balise n'est pas correctement fermée, false sinon.
  39  * @param bool $clean ?
  40  * @param int $taille_max
  41  *     Taille maximale si fichier distant
  42  * @param string|array $datas
  43  *     Données à envoyer pour récupérer le fichier distant
  44  * @param int $profondeur ?
  45  * @return array|bool
  46  *     - array : l'arbre XML,
  47  *     - false si l'arbre xml ne peut être créé ou est vide
  48  **/
  49 function spip_xml_load($fichier, $strict = true, $clean = true, $taille_max = 1048576, $datas = '', $profondeur = -1) {
  50         $contenu = "";
  51         if (tester_url_absolue($fichier)) {
  52                 include_spip('inc/distant');
  53                 $contenu = recuperer_page($fichier, false, false, $taille_max, $datas);
  54         } else {
  55                 lire_fichier($fichier, $contenu);
  56         }
  57         $arbre = array();
  58         if ($contenu) {
  59                 $arbre = spip_xml_parse($contenu, $strict, $clean, $profondeur);
  60         }
  61
  62         return count($arbre) ? $arbre : false;
  63 }
  64
  65 if (!defined('_SPIP_XML_TAG_SPLIT')) {
  66         define('_SPIP_XML_TAG_SPLIT', "{<([^:>][^>]*?)>}sS");
  67 }
  68
  69 /**
  70  * Parse une chaine XML donnée et retourne un tableau.
  71  *
  72  * @see spip_xml_aplatit() pour l'inverse
  73  *
  74  * @param string $texte
  75  *     Texte XML
  76  * @param bool $strict
  77  *     true pour râler si une balise n'est pas correctement fermée, false sinon.
  78  * @param bool $clean ?
  79  * @param int $profondeur ?
  80  * @return array|bool
  81  *     - array : l'arbre XML,
  82  *     - false si l'arbre xml ne peut être créé ou est vide
  83  **/
  84 function spip_xml_parse(&$texte, $strict = true, $clean = true, $profondeur = -1) {
  85         $out = array();
  86         // enlever les commentaires
  87         $charset = 'AUTO';
  88         if ($clean === true) {
  89                 if (preg_match(",<\?xml\s(.*?)encoding=['\"]?(.*?)['\"]?(\s(.*))?\?>,im", $texte, $regs)) {
  90                         $charset = $regs[2];
  91                 }
  92                 $texte = preg_replace(',<!--(.*?)-->,is', '', $texte);
  93                 $texte = preg_replace(',<\?(.*?)\?>,is', '', $texte);
  94                 include_spip('inc/charsets');
  95                 $clean = $charset;
  96                 //$texte = importer_charset($texte,$charset);
  97         }
  98         if (is_string($clean)) {
  99                 $charset = $clean;
 100         }
 101         $txt = $texte;
 102
 103         // tant qu'il y a des tags
 104         $chars = preg_split(_SPIP_XML_TAG_SPLIT, $txt, 2, PREG_SPLIT_DELIM_CAPTURE);
 105         while (count($chars) >= 2) {
 106                 // tag ouvrant
 107                 //$chars = preg_split("{<([^>]*?)>}s",$txt,2,PREG_SPLIT_DELIM_CAPTURE);
 108
 109                 // $before doit etre vide ou des espaces uniquements!
 110                 $before = trim($chars[0]);
 111
 112                 if (strlen($before) > 0) {
 113                         return importer_charset($texte, $charset);
 114                 }//$texte; // before non vide, donc on est dans du texte
 115
 116                 $tag = rtrim($chars[1]);
 117                 $txt = $chars[2];
 118
 119                 if (strncmp($tag, '![CDATA[', 8) == 0) {
 120                         return importer_charset($texte, $charset);
 121                 }//$texte;
 122                 if (substr($tag, -1) == '/') { // self closing tag
 123                         $tag = rtrim(substr($tag, 0, strlen($tag) - 1));
 124                         $out[$tag][] = "";
 125                 } else {
 126                         $closing_tag = preg_split(",\s|\t|\n|\r,", trim($tag));
 127                         $closing_tag = reset($closing_tag);
 128                         // tag fermant
 129                         $ncclos = strlen("</$closing_tag>");
 130                         $p = strpos($txt, "</$closing_tag>");
 131                         if ($p !== false and (strpos($txt, "<") < $p)) {
 132                                 $nclose = 0;
 133                                 $nopen = 0;
 134                                 $d = 0;
 135                                 while (
 136                                         $p !== false
 137                                         and ($morceau = substr($txt, $d, $p - $d))
 138                                         and (($nopen += preg_match_all("{<" . preg_quote($closing_tag) . "(\s*>|\s[^>]*[^/>]>)}is", $morceau,
 139                                                         $matches, PREG_SET_ORDER)) > $nclose)
 140                                 ) {
 141                                         $nclose++;
 142                                         $d = $p + $ncclos;
 143                                         $p = strpos($txt, "</$closing_tag>", $d);
 144                                 }
 145                         }
 146                         if ($p === false) {
 147                                 if ($strict) {
 148                                         $out[$tag][] = "erreur : tag fermant $tag manquant::$txt";
 149
 150                                         return $out;
 151                                 } else {
 152                                         return importer_charset($texte, $charset);
 153                                 }//$texte // un tag qui constitue du texte a reporter dans $before
 154                         }
 155                         $content = substr($txt, 0, $p);
 156                         $txt = substr($txt, $p + $ncclos);
 157                         if ($profondeur == 0 or strpos($content, "<") === false) // eviter une recursion si pas utile
 158                         {
 159                                 $out[$tag][] = importer_charset($content, $charset);
 160                         }//$content;
 161                         else {
 162                                 $out[$tag][] = spip_xml_parse($content, $strict, $clean, $profondeur - 1);
 163                         }
 164                 }
 165                 $chars = preg_split(_SPIP_XML_TAG_SPLIT, $txt, 2, PREG_SPLIT_DELIM_CAPTURE);
 166         }
 167         if (count($out) && (strlen(trim($txt)) == 0)) {
 168                 return $out;
 169         } else {
 170                 return importer_charset($texte, $charset);
 171         }//$texte;
 172 }
 173
 174 // http://code.spip.net/@spip_xml_aplatit
 175 function spip_xml_aplatit($arbre, $separateur = " ") {
 176         $s = "";
 177         if (is_array($arbre)) {
 178                 foreach ($arbre as $tag => $feuille) {
 179                         if (is_array($feuille)) {
 180                                 if ($tag !== intval($tag)) {
 181                                         $f = spip_xml_aplatit($feuille, $separateur);
 182                                         if (strlen($f)) {
 183                                                 $tagf = explode(" ", $tag);
 184                                                 $tagf = $tagf[0];
 185                                                 $s .= "<$tag>$f</$tagf>";
 186                                         } else {
 187                                                 $s .= "<$tag />";
 188                                         }
 189                                 } else {
 190                                         $s .= spip_xml_aplatit($feuille);
 191                                 }
 192                                 $s .= $separateur;
 193                         } else {
 194                                 $s .= "$feuille$separateur";
 195                         }
 196                 }
 197         }
 198
 199         return strlen($separateur) ? substr($s, 0, -strlen($separateur)) : $s;
 200 }
 201
 202 // http://code.spip.net/@spip_xml_tagname
 203 function spip_xml_tagname($tag) {
 204         if (preg_match(',^([a-z][\w:]*),i', $tag, $reg)) {
 205                 return $reg[1];
 206         }
 207
 208         return "";
 209 }
 210
 211 // http://code.spip.net/@spip_xml_decompose_tag
 212 function spip_xml_decompose_tag($tag) {
 213         $tagname = spip_xml_tagname($tag);
 214         $liste = array();
 215         $tag = ltrim(strpbrk($tag, " \n\t"));
 216         $p = strpos($tag, '=');
 217         while ($p !== false) {
 218                 $attr = trim(substr($tag, 0, $p));
 219                 $tag = ltrim(substr($tag, $p + 1));
 220                 $quote = $tag[0];
 221                 $p = strpos($tag, $quote, 1);
 222                 $cont = substr($tag, 1, $p - 1);
 223                 $liste[$attr] = $cont;
 224                 $tag = substr($tag, $p + 1);
 225                 $p = strpos($tag, '=');
 226         }
 227
 228         return array($tagname, $liste);
 229 }
 230
 231 /**
 232  * Recherche dans un arbre XML généré par `spip_xml_parse()` (ou une branche de cet arbre)
 233  * les clés de l'arbre qui valident la regexp donnée.
 234  *
 235  * Les branches qui valident la regexp sont retournées dans le tableau `$matches`.
 236  *
 237  * @see spip_xml_parse()
 238  * @see spip_xml_decompose_tag()
 239  *
 240  * @param string $regexp
 241  *     Expression régulière
 242  * @param array $arbre
 243  *     Arbre XML
 244  * @param array $matches
 245  *     Branches de l'arbre validant la rexgep
 246  * @param bool $init ?
 247  * @return bool
 248  *     false si aucun élément ne valide l'expression régulière, true sinon.
 249  **/
 250 function spip_xml_match_nodes($regexp, &$arbre, &$matches, $init = true) {
 251         if ($init) {
 252                 $matches = array();
 253         }
 254         if (is_array($arbre) && count($arbre)) {
 255                 foreach (array_keys($arbre) as $tag) {
 256                         if (preg_match($regexp, $tag)) {
 257                                 $matches[$tag] = &$arbre[$tag];
 258                         }
 259                         if (is_array($arbre[$tag])) {
 260                                 foreach (array_keys($arbre[$tag]) as $occurences) {
 261                                         spip_xml_match_nodes($regexp, $arbre[$tag][$occurences], $matches, false);
 262                                 }
 263                         }
 264                 }
 265         }
 266
 267         return (count($matches));
 268 }