6 * let's reinvent the wheel one last time
8 * This library of code is meant to be a fast and universal replacement
9 * for any and all text-processing systems written in PHP
11 * It is dual-licensed for any use under the GNU/GPL2 and MIT licenses,
14 * (c) 2009 Fil - fil@rezo.net
15 * Documentation & http://zzz.rezo.net/-TextWheel-
17 * Usage: $wheel = new TextWheel(); echo $wheel->text($text);
21 if (!defined('_ECRIRE_INC_VERSION')) {
25 require_once dirname(__FILE__
) . "/textwheelruleset.php";
29 protected static $subwheel = array();
31 protected $compiled = array();
36 * @param TextWheelRuleSet $ruleset
38 public function __construct($ruleset = null) {
39 $this->setRuleSet($ruleset);
45 * @param TextWheelRuleSet $ruleset
47 public function setRuleSet($ruleset) {
48 if (!is_object($ruleset)) {
49 $ruleset = new TextWheelRuleSet($ruleset);
51 $this->ruleset
= $ruleset;
55 * Apply all rules of RuleSet to a text
60 public function text($t) {
61 $rules = &$this->ruleset
->getRules();
62 ## apply each in order
63 foreach ($rules as $name => $rule) #php4+php5
65 $this->apply($rules[$name], $t);
67 #foreach ($this->rules as &$rule) #smarter &reference, but php5 only
68 # $this->apply($rule, $t);
72 private function export($x) {
73 return addcslashes(var_export($x, true), "\n\r\t");
76 public function compile($b = null) {
77 $rules = &$this->ruleset
->getRules();
79 ## apply each in order
83 foreach ($rules as $name => $rule) {
85 $this->initRule($rule);
86 if (is_string($rule->replace
)
87 and isset($this->compiled
[$rule->replace
])
88 and $fun = $this->compiled
[$rule->replace
]
90 $pre[] = "\n###\n## $name\n###\n" . $fun;
91 preg_match(',function (\w+), ', $fun, $r);
92 $rule->compilereplace
= $r[1]; # ne pas modifier ->replace sinon on casse l'execution...
95 $r = "\t/* $name */\n";
98 $r .= "\t" . 'require_once ' . TextWheel
::export($rule->require) . ';' . "\n";
101 $r .= "\t" . 'if (strpos($t, ' . TextWheel
::export($rule->if_str
) . ') === false)' . "\n";
103 if ($rule->if_stri
) {
104 $r .= "\t" . 'if (stripos($t, ' . TextWheel
::export($rule->if_stri
) . ') === false)' . "\n";
106 if ($rule->if_match
) {
107 $r .= "\t" . 'if (preg_match(' . TextWheel
::export($rule->if_match
) . ', $t))' . "\n";
110 if ($rule->func_replace
!== 'replace_identity') {
111 $fun = 'TextWheel::' . $rule->func_replace
;
113 case 'TextWheel::replace_all_cb':
114 $fun = $rule->replace
; # trim()...
116 case 'TextWheel::replace_preg':
117 $fun = 'preg_replace';
119 case 'TextWheel::replace_str':
120 $fun = 'str_replace';
122 case 'TextWheel::replace_preg_cb':
123 $fun = 'preg_replace_callback';
128 $r .= "\t" . '$t = ' . $fun . '(' . TextWheel
::export($rule->match
) . ', ' . TextWheel
::export($rule->replace
) . ', $t);' . "\n";
133 $code = join("\n", $comp);
134 $code = 'function ' . $b . '($t) {' . "\n" . $code . "\n\treturn \$t;\n}\n\n";
135 $code = join("\n", $pre) . $code;
142 * Get an internal global subwheel
143 * read acces for annymous function only
148 public static function &getSubWheel($n) {
149 return TextWheel
::$subwheel[$n];
153 * Create SubWheel (can be overriden in debug class)
155 * @param TextWheelRuleset $rules
158 protected function &createSubWheel(&$rules) {
159 $tw = new TextWheel($rules);
165 * Initializing a rule a first call
166 * including file, creating function or wheel
169 * @param TextWheelRule $rule
171 protected function initRule(&$rule) {
173 if ($rule->require) {
174 require_once $rule->require;
177 # optimization: strpos or stripos?
178 if (isset($rule->if_str
)) {
179 if (strtolower($rule->if_str
) !== strtoupper($rule->if_str
)) {
180 $rule->if_stri
= $rule->if_str
;
181 $rule->if_str
= null;
185 if ($rule->create_replace
) {
186 $compile = $rule->replace
. '($t)';
187 $rule->replace
= create_function('$m', $rule->replace
);
188 $this->compiled
[$rule->replace
] = $compile;
189 $rule->create_replace
= false;
190 $rule->is_callback
= true;
191 } elseif ($rule->is_wheel
) {
192 $n = count(TextWheel
::$subwheel);
193 TextWheel
::$subwheel[] = $this->createSubWheel($rule->replace
);
194 $var = '$m[' . intval($rule->pick_match
) . ']';
195 if ($rule->type
== 'all' or $rule->type
== 'str' or $rule->type
== 'split' or !isset($rule->match
)) {
198 $code = 'return TextWheel::getSubWheel(' . $n . ')->text(' . $var . ');';
199 $rule->replace
= create_function('$m', $code);
200 $cname = 'compiled_' . str_replace('-', '_', $rule->name
);
201 $compile = TextWheel
::getSubWheel($n)->compile($cname);
202 $this->compiled
[$rule->replace
] = $compile;
203 $rule->is_wheel
= false;
204 $rule->is_callback
= true;
208 $rule->func_replace
= '';
209 if (isset($rule->replace
)) {
210 switch ($rule->type
) {
212 $rule->func_replace
= 'replace_all';
215 $rule->func_replace
= 'replace_str';
216 // test if quicker strtr usable
217 if (!$rule->is_callback
218 and is_array($rule->match
) and is_array($rule->replace
)
219 and $c = array_map('strlen', $rule->match
)
220 and $c = array_unique($c)
223 and $c = array_map('strlen', $rule->replace
)
224 and $c = array_unique($c)
228 $rule->match
= implode('', $rule->match
);
229 $rule->replace
= implode('', $rule->replace
);
230 $rule->func_replace
= 'replace_strtr';
234 $rule->func_replace
= 'replace_split';
235 $rule->match
= array($rule->match
, is_null($rule->glue
) ?
$rule->match
: $rule->glue
);
239 $rule->func_replace
= 'replace_preg';
242 if ($rule->is_callback
) {
243 $rule->func_replace
.= '_cb';
246 if (!method_exists("TextWheel", $rule->func_replace
)) {
247 $rule->disabled
= true;
248 $rule->func_replace
= 'replace_identity';
254 * Apply a rule to a text
256 * @param TextWheelRule $rule
260 protected function apply(&$rule, &$t, &$count = null) {
262 if ($rule->disabled
) {
266 if (isset($rule->if_chars
) and (strpbrk($t, $rule->if_chars
) === false)) {
270 if (isset($rule->if_match
) and !preg_match($rule->if_match
, $t)) {
274 // init rule before testing if_str / if_stri as they are optimized by initRule
275 if (!isset($rule->func_replace
)) {
276 $this->initRule($rule);
279 if (isset($rule->if_str
) and strpos($t, $rule->if_str
) === false) {
283 if (isset($rule->if_stri
) and stripos($t, $rule->if_stri
) === false) {
287 $func = $rule->func_replace
;
288 TextWheel
::$func($rule->match
, $rule->replace
, $t, $count);
292 * No Replacement function
293 * fall back in case of unknown method for replacing
294 * should be called max once per rule
296 * @param mixed $match
297 * @param mixed $replace
301 protected static function replace_identity(&$match, &$replace, &$t, &$count) {
305 * Static replacement of All text
307 * @param mixed $match
308 * @param mixed $replace
312 protected static function replace_all(&$match, &$replace, &$t, &$count) {
313 # special case: replace $0 with $t
314 # replace: "A$0B" will surround the string with A..B
315 # replace: "$0$0" will repeat the string
316 if (strpos($replace, '$0') !== false) {
317 $t = str_replace('$0', $t, $replace);
324 * Call back replacement of All text
326 * @param mixed $match
327 * @param mixed $replace
331 protected static function replace_all_cb(&$match, &$replace, &$t, &$count) {
336 * Static string replacement
338 * @param mixed $match
339 * @param mixed $replace
343 protected static function replace_str(&$match, &$replace, &$t, &$count) {
344 if (!is_string($match) or strpos($t, $match) !== false) {
345 $t = str_replace($match, $replace, $t, $count);
350 * Fast Static string replacement one char to one char
352 * @param mixed $match
353 * @param mixed $replace
357 protected static function replace_strtr(&$match, &$replace, &$t, &$count) {
358 $t = strtr($t, $match, $replace);
362 * Callback string replacement
364 * @param mixed $match
365 * @param mixed $replace
369 protected static function replace_str_cb(&$match, &$replace, &$t, &$count) {
370 if (strpos($t, $match) !== false) {
371 if (count($b = explode($match, $t)) > 1) {
372 $t = join($replace($match), $b);
378 * Static Preg replacement
380 * @param mixed $match
381 * @param mixed $replace
386 protected static function replace_preg(&$match, &$replace, &$t, &$count) {
387 $t = preg_replace($match, $replace, $t, -1, $count);
389 throw new Exception('Memory error, increase pcre.backtrack_limit in php.ini');
394 * Callback Preg replacement
396 * @param mixed $match
397 * @param mixed $replace
402 protected static function replace_preg_cb(&$match, &$replace, &$t, &$count) {
403 $t = preg_replace_callback($match, $replace, $t, -1, $count);
405 throw new Exception('Memory error, increase pcre.backtrack_limit in php.ini');
411 * Static split replacement : invalid
413 * @param mixed $match
414 * @param mixed $replace
418 protected static function replace_split(&$match, &$replace, &$t, &$count) {
419 throw new InvalidArgumentException('split rule always needs a callback function as replace');
423 * Callback split replacement
425 * @param array $match
426 * @param mixed $replace
430 protected static function replace_split_cb(&$match, &$replace, &$t, &$count) {
431 $a = explode($match[0], $t);
432 $t = join($match[1], array_map($replace, $a));
436 class TextWheelDebug
extends TextWheel
{
437 protected static $t; #tableaux des temps
438 protected static $tu; #tableaux des temps (rules utilises)
439 protected static $tnu; #tableaux des temps (rules non utilises)
440 protected static $u; #compteur des rules utiles
441 protected static $w; #compteur des rules appliques
442 public static $total;
445 * Timer for profiling
447 * @staticvar int $time
452 protected function timer($t = 'rien', $raw = false) {
456 // microtime peut contenir les microsecondes et le temps
457 $b = explode(' ', $b);
458 if (count($b) == 2) {
462 if (!isset($time[$t])) {
465 $p = ($a +
$b - $time[$t]) * 1000;
473 $s = sprintf("%d ", $x = floor($p / 1000));
477 return $s . sprintf("%.3f ms", $p);
482 * Apply all rules of RuleSet to a text
487 public function text($t) {
488 $rules = &$this->ruleset
->getRules();
489 ## apply each in order
490 foreach ($rules as $name => $rule) #php4+php5
493 $name .= ' ' . $rule->match
;
497 $this->apply($rule, $t);
498 TextWheelDebug
::$w[$name]++
; # nombre de fois appliquee
499 $v = $this->timer($name, true); # timer
500 TextWheelDebug
::$t[$name] +
= $v;
502 TextWheelDebug
::$u[$name]++
; # nombre de fois utile
503 TextWheelDebug
::$tu[$name] +
= $v;
505 TextWheelDebug
::$tnu[$name] +
= $v;
509 #foreach ($this->rules as &$rule) #smarter &reference, but php5 only
510 # $this->apply($rule, $t);
515 * Ouputs data stored for profiling/debuging purposes
517 public static function outputDebug() {
518 if (isset(TextWheelDebug
::$t)) {
519 $time = array_flip(array_map('strval', TextWheelDebug
::$t));
522 <div class='textwheeldebug'>
523 <style type='text/css'>
524 .textwheeldebug table { margin:1em 0; }
525 .textwheeldebug th,.textwheeldebug td { padding-left: 15px }
526 .textwheeldebug .prof-0 .number { padding-right: 60px }
527 .textwheeldebug .prof-1 .number { padding-right: 30px }
528 .textwheeldebug .prof-1 .name { padding-left: 30px }
529 .textwheeldebug .prof-2 .name { padding-left: 60px }
530 .textwheeldebug .zero { color:orange; }
531 .textwheeldebug .number { text-align:right; }
532 .textwheeldebug .strong { font-weight:bold; }
534 <table class='sortable'>
535 <caption>Temps par rule</caption>
536 <thead><tr><th>temps (ms)</th><th>rule</th><th>application</th><th>t/u (ms)</th><th>t/n-u (ms)</th></tr></thead>\n";
538 foreach ($time as $t => $r) {
539 $applications = intval(TextWheelDebug
::$u[$r]);
541 if (intval($t * 10)) {
543 <td class='number strong'>" . number_format(round($t * 10) / 10, 1) . "</td><td> " . spip_htmlspecialchars($r) . "</td>
545 . (!$applications ?
" class='zero'" : "")
546 . ">" . $applications . "/" . intval(TextWheelDebug
::$w[$r]) . "</td>
547 <td class='number'>" . ($applications ?
number_format(round(TextWheelDebug
::$tu[$r] / $applications * 100) / 100,
549 <td class='number'>" . (($nu = intval(TextWheelDebug
::$w[$r]) - $applications) ?
number_format(round(TextWheelDebug
::$tnu[$r] / $nu * 100) / 100,
558 <caption>Temps total par rule</caption>
559 <thead><tr><th>temps</th><th>rule</th></tr></thead>\n";
560 ksort($GLOBALS['totaux']);
561 TextWheelDebug
::outputTotal($GLOBALS['totaux']);
563 # somme des temps des rules, ne tient pas compte des subwheels
564 echo "<p>temps total rules: " . round($total) . " ms</p>\n";
569 public static function outputTotal($liste, $profondeur = 0) {
571 foreach ($liste as $cause => $duree) {
572 if (is_array($duree)) {
573 TextWheelDebug
::outputTotal($duree, $profondeur +
1);
575 echo "<tr class='prof-$profondeur'>
576 <td class='number'><b>" . intval($duree) . "</b> ms</td>
577 <td class='name'>" . spip_htmlspecialchars($cause) . "</td>
584 * Create SubWheel (can be overriden in debug class)
586 * @param TextWheelRuleset $rules
589 protected function &createSubWheel(&$rules) {
590 return new TextWheelDebug($rules);
599 if (!function_exists('stripos')) {
600 function stripos($haystack, $needle) {
601 return strpos($haystack, stristr($haystack, $needle));
606 * approximation of strpbrk for php4
607 * return false if no char of $char_list is in $haystack
609 if (!function_exists('strpbrk')) {
610 function strpbrk($haystack, $char_list) {
611 $result = strcspn($haystack, $char_list);
612 if ($result != strlen($haystack)) {