5 * This set of functions allows you to filter html in order to remove
6 * any malicious tags from it. Useful in cases when you need to filter
7 * user input for any cross-site-scripting attempts.
9 * Copyright (C) 2002-2004 by Duke University
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 * @Author Konstantin Riabitsev <icon@linux.duke.edu>
27 * @Author Jim Jagielski <jim@jaguNET.com / jimjag@gmail.com>
28 * @Version 1.1 ($Date$)
32 * This function returns the final tag out of the tag name, an array
33 * of attributes, and the type of the tag. This function is called by
34 * tln_sanitize internally.
36 * @param string $tagname the name of the tag.
37 * @param array $attary the array of attributes and their values
38 * @param integer $tagtype The type of the tag (see in comments).
39 * @return string A string with the final tag representation.
41 function tln_tagprint($tagname, $attary, $tagtype)
44 $fulltag = '</' . $tagname . '>';
46 $fulltag = '<' . $tagname;
47 if (is_array($attary) && sizeof($attary)) {
49 while (list($attname, $attvalue) = each($attary)) {
50 array_push($atts, "$attname=$attvalue");
52 $fulltag .= ' ' . join(' ', $atts);
63 * A small helper function to use with array_walk. Modifies a by-ref
64 * value and makes it lowercase.
66 * @param string $val a value passed by-ref.
67 * @return void since it modifies a by-ref value.
69 function tln_casenormalize(&$val)
71 $val = strtolower($val);
75 * This function skips any whitespace from the current position within
76 * a string and to the next non-whitespace value.
78 * @param string $body the string
79 * @param integer $offset the offset within the string where we should start
80 * looking for the next non-whitespace character.
81 * @return integer the location within the $body where the next
82 * non-whitespace char is located.
84 function tln_skipspace($body, $offset)
86 preg_match('/^(\s*)/s', substr($body, $offset), $matches);
87 if (sizeof($matches[1])) {
88 $count = strlen($matches[1]);
95 * This function looks for the next character within a string. It's
96 * really just a glorified "strpos", except it catches the failures
99 * @param string $body The string to look for needle in.
100 * @param integer $offset Start looking from this position.
101 * @param string $needle The character/string to look for.
102 * @return integer location of the next occurrence of the needle, or
103 * strlen($body) if needle wasn't found.
105 function tln_findnxstr($body, $offset, $needle)
107 $pos = strpos($body, $needle, $offset);
108 if ($pos === false) {
109 $pos = strlen($body);
115 * This function takes a PCRE-style regexp and tries to match it
118 * @param string $body The string to look for needle in.
119 * @param integer $offset Start looking from here.
120 * @param string $reg A PCRE-style regex to match.
121 * @return array|boolean Returns a false if no matches found, or an array
122 * with the following members:
123 * - integer with the location of the match within $body
124 * - string with whatever content between offset and the match
125 * - string with whatever it is we matched
127 function tln_findnxreg($body, $offset, $reg)
131 $preg_rule = '%^(.*?)(' . $reg . ')%s';
132 preg_match($preg_rule, substr($body, $offset), $matches);
133 if (!isset($matches[0]) ||
!$matches[0]) {
136 $retarr[0] = $offset +
strlen($matches[1]);
137 $retarr[1] = $matches[1];
138 $retarr[2] = $matches[2];
144 * This function looks for the next tag.
146 * @param string $body String where to look for the next tag.
147 * @param integer $offset Start looking from here.
148 * @return array|boolean false if no more tags exist in the body, or
149 * an array with the following members:
150 * - string with the name of the tag
151 * - array with attributes and their values
152 * - integer with tag type (1, 2, or 3)
153 * - integer where the tag starts (starting "<")
154 * - integer where the tag ends (ending ">")
155 * first three members will be false, if the tag is invalid.
157 function tln_getnxtag($body, $offset)
159 if ($offset > strlen($body)) {
162 $lt = tln_findnxstr($body, $offset, '<');
163 if ($lt == strlen($body)) {
168 * blah blah <tag attribute="value">
171 $pos = tln_skipspace($body, $lt +
1);
172 if ($pos >= strlen($body)) {
173 return array(false, false, false, $lt, strlen($body));
176 * There are 3 kinds of tags:
177 * 1. Opening tag, e.g.:
179 * 2. Closing tag, e.g.:
181 * 3. XHTML-style content-less tag, e.g.:
184 switch (substr($body, $pos, 1)) {
191 * A comment or an SGML declaration.
193 if (substr($body, $pos +
1, 2) == '--') {
194 $gt = strpos($body, '-->', $pos);
200 return array(false, false, false, $lt, $gt);
202 $gt = tln_findnxstr($body, $pos, '>');
203 return array(false, false, false, $lt, $gt);
208 * Assume tagtype 1 for now. If it's type 3, we'll switch values
216 * Look for next [\W-_], which will indicate the end of the tag name.
218 $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
219 if ($regary == false) {
220 return array(false, false, false, $lt, strlen($body));
222 list($pos, $tagname, $match) = $regary;
223 $tagname = strtolower($tagname);
226 * $match can be either of these:
227 * '>' indicating the end of the tag entirely.
228 * '\s' indicating the end of the tag name.
229 * '/' indicating that this is type-3 xhtml tag.
231 * Whatever else we find there indicates an invalid tag.
236 * This is an xhtml-style tag with a closing / at the
237 * end, like so: <img src="blah"/>. Check if it's followed
238 * by the closing bracket. If not, then this tag is invalid
240 if (substr($body, $pos, 2) == '/>') {
244 $gt = tln_findnxstr($body, $pos, '>');
245 $retary = array(false, false, false, $lt, $gt);
248 //intentional fall-through
250 return array($tagname, false, $tagtype, $lt, $pos);
254 * Check if it's whitespace
256 if (!preg_match('/\s/', $match)) {
258 * This is an invalid tag! Look for the next closing ">".
260 $gt = tln_findnxstr($body, $lt, '>');
261 return array(false, false, false, $lt, $gt);
267 * At this point we're here:
268 * <tagname attribute='blah'>
271 * At this point we loop in order to find all attributes.
275 while ($pos <= strlen($body)) {
276 $pos = tln_skipspace($body, $pos);
277 if ($pos == strlen($body)) {
281 return array(false, false, false, $lt, $pos);
284 * See if we arrived at a ">" or "/>", which means that we reached
285 * the end of the tag.
288 if (preg_match('%^(\s*)(>|/>)%s', substr($body, $pos), $matches)) {
292 $pos +
= strlen($matches[1]);
293 if ($matches[2] == '/>') {
297 return array($tagname, $attary, $tagtype, $lt, $pos);
301 * There are several types of attributes, with optional
302 * [:space:] between members.
304 * attrname[:space:]=[:space:]'CDATA'
306 * attrname[:space:]=[:space:]"CDATA"
308 * attr[:space:]=[:space:]CDATA
312 * We leave types 1 and 2 the same, type 3 we check for
313 * '"' and convert to """ if needed, then wrap in
314 * double quotes. Type 4 we convert into:
317 $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
318 if ($regary == false) {
320 * Looks like body ended before the end of tag.
322 return array(false, false, false, $lt, strlen($body));
324 list($pos, $attname, $match) = $regary;
325 $attname = strtolower($attname);
327 * We arrived at the end of attribute name. Several things possible
329 * '>' means the end of the tag and this is attribute type 4
330 * '/' if followed by '>' means the same thing as above
331 * '\s' means a lot of things -- look what it's followed by.
332 * anything else means the attribute is invalid.
337 * This is an xhtml-style tag with a closing / at the
338 * end, like so: <img src="blah"/>. Check if it's followed
339 * by the closing bracket. If not, then this tag is invalid
341 if (substr($body, $pos, 2) == '/>') {
345 $gt = tln_findnxstr($body, $pos, '>');
346 $retary = array(false, false, false, $lt, $gt);
349 //intentional fall-through
351 $attary{$attname} = '"yes"';
352 return array($tagname, $attary, $tagtype, $lt, $pos);
356 * Skip whitespace and see what we arrive at.
358 $pos = tln_skipspace($body, $pos);
359 $char = substr($body, $pos, 1);
361 * Two things are valid here:
362 * '=' means this is attribute type 1 2 or 3.
363 * \w means this was attribute type 4.
364 * anything else we ignore and re-loop. End of tag and
365 * invalid stuff will be caught by our checks at the beginning
370 $pos = tln_skipspace($body, $pos);
372 * Here are 3 possibilities:
373 * "'" attribute type 1
374 * '"' attribute type 2
375 * everything else is the content of tag type 3
377 $quot = substr($body, $pos, 1);
379 $regary = tln_findnxreg($body, $pos +
1, '\'');
380 if ($regary == false) {
381 return array(false, false, false, $lt, strlen($body));
383 list($pos, $attval, $match) = $regary;
385 $attary{$attname} = '\'' . $attval . '\'';
386 } elseif ($quot == '"') {
387 $regary = tln_findnxreg($body, $pos +
1, '\"');
388 if ($regary == false) {
389 return array(false, false, false, $lt, strlen($body));
391 list($pos, $attval, $match) = $regary;
393 $attary{$attname} = '"' . $attval . '"';
396 * These are hateful. Look for \s, or >.
398 $regary = tln_findnxreg($body, $pos, '[\s>]');
399 if ($regary == false) {
400 return array(false, false, false, $lt, strlen($body));
402 list($pos, $attval, $match) = $regary;
404 * If it's ">" it will be caught at the top.
406 $attval = preg_replace('/\"/s', '"', $attval);
407 $attary{$attname} = '"' . $attval . '"';
409 } elseif (preg_match('|[\w/>]|', $char)) {
411 * That was attribute type 4.
413 $attary{$attname} = '"yes"';
416 * An illegal character. Find next '>' and return.
418 $gt = tln_findnxstr($body, $pos, '>');
419 return array(false, false, false, $lt, $gt);
425 * The fact that we got here indicates that the tag end was never
426 * found. Return invalid tag indication so it gets stripped.
428 return array(false, false, false, $lt, strlen($body));
432 * Translates entities into literal values so they can be checked.
434 * @param string $attvalue the by-ref value to check.
435 * @param string $regex the regular expression to check against.
436 * @param boolean $hex whether the entites are hexadecimal.
437 * @return boolean True or False depending on whether there were matches.
439 function tln_deent(&$attvalue, $regex, $hex = false)
441 preg_match_all($regex, $attvalue, $matches);
442 if (is_array($matches) && sizeof($matches[0]) > 0) {
444 for ($i = 0; $i < sizeof($matches[0]); $i++
) {
445 $numval = $matches[1][$i];
447 $numval = hexdec($numval);
449 $repl{$matches[0][$i]} = chr($numval);
451 $attvalue = strtr($attvalue, $repl);
459 * This function checks attribute values for entity-encoded values
460 * and returns them translated into 8-bit strings so we can run
463 * @param string $attvalue A string to run entity check against.
465 function tln_defang(&$attvalue)
468 * Skip this if there aren't ampersands or backslashes.
470 if (strpos($attvalue, '&') === false
471 && strpos($attvalue, '\\') === false
477 $m = $m ||
tln_deent($attvalue, '/\�*(\d+);*/s');
478 $m = $m ||
tln_deent($attvalue, '/\�*((\d|[a-f])+);*/si', true);
479 $m = $m ||
tln_deent($attvalue, '/\\\\(\d+)/s', true);
480 } while ($m == true);
481 $attvalue = stripslashes($attvalue);
485 * Kill any tabs, newlines, or carriage returns. Our friends the
486 * makers of the browser with 95% market value decided that it'd
487 * be funny to make "java[tab]script" be just as good as "javascript".
489 * @param string $attvalue The attribute value before extraneous spaces removed.
491 function tln_unspace(&$attvalue)
493 if (strcspn($attvalue, "\t\r\n\0 ") != strlen($attvalue)) {
494 $attvalue = str_replace(
495 array("\t", "\r", "\n", "\0", " "),
496 array('', '', '', '', ''),
503 * This function runs various checks against the attributes.
505 * @param string $tagname String with the name of the tag.
506 * @param array $attary Array with all tag attributes.
507 * @param array $rm_attnames See description for tln_sanitize
508 * @param array $bad_attvals See description for tln_sanitize
509 * @param array $add_attr_to_tag See description for tln_sanitize
510 * @param string $trans_image_path
511 * @param boolean $block_external_images
512 * @return array with modified attributes.
514 function tln_fixatts(
521 $block_external_images
523 while (list($attname, $attvalue) = each($attary)) {
525 * See if this attribute should be removed.
527 foreach ($rm_attnames as $matchtag => $matchattrs) {
528 if (preg_match($matchtag, $tagname)) {
529 foreach ($matchattrs as $matchattr) {
530 if (preg_match($matchattr, $attname)) {
531 unset($attary{$attname});
538 * Remove any backslashes, entities, or extraneous whitespace.
540 $oldattvalue = $attvalue;
541 tln_defang($attvalue);
542 if ($attname == 'style' && $attvalue !== $oldattvalue) {
543 $attvalue = "idiocy";
544 $attary{$attname} = $attvalue;
546 tln_unspace($attvalue);
549 * Now let's run checks on the attvalues.
550 * I don't expect anyone to comprehend this. If you do,
551 * get in touch with me so I can drive to where you live and
552 * shake your hand personally. :)
554 foreach ($bad_attvals as $matchtag => $matchattrs) {
555 if (preg_match($matchtag, $tagname)) {
556 foreach ($matchattrs as $matchattr => $valary) {
557 if (preg_match($matchattr, $attname)) {
559 * There are two arrays in valary.
561 * Second one is replacements
563 list($valmatch, $valrepl) = $valary;
564 $newvalue = preg_replace($valmatch, $valrepl, $attvalue);
565 if ($newvalue != $attvalue) {
566 $attary{$attname} = $newvalue;
567 $attvalue = $newvalue;
573 if ($attname == 'style') {
574 if (preg_match('/[\0-\37\200-\377]+/', $attvalue)) {
575 $attary{$attname} = '"disallowed character"';
577 preg_match_all("/url\s*\((.+)\)/si", $attvalue, $aMatch);
578 if (count($aMatch)) {
579 foreach($aMatch[1] as $sMatch) {
581 tln_fixurl($attname, $urlvalue, $trans_image_path, $block_external_images);
582 $attary{$attname} = str_replace($sMatch, $urlvalue, $attvalue);
588 * See if we need to append any attributes to this tag.
590 foreach ($add_attr_to_tag as $matchtag => $addattary) {
591 if (preg_match($matchtag, $tagname)) {
592 $attary = array_merge($attary, $addattary);
598 function tln_fixurl($attname, &$attvalue, $trans_image_path, $block_external_images)
601 $attvalue = trim($attvalue);
602 if ($attvalue && ($attvalue[0] =='"'||
$attvalue[0] == "'")) {
603 // remove the double quotes
604 $sQuote = $attvalue[0];
605 $attvalue = trim(substr($attvalue,1,-1));
609 * Replace empty src tags with the blank image. src is only used
610 * for frames, images, and image inputs. Doing a replace should
611 * not affect them working as should be, however it will stop
612 * IE from being kicked off when src for img tags are not set
614 if ($attvalue == '') {
615 $attvalue = $sQuote . $trans_image_path . $sQuote;
617 // first, disallow 8 bit characters and control characters
618 if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
621 $attvalue = $sQuote . 'http://invalid-stuff-detected.example.com' . $sQuote;
624 $attvalue = $sQuote . $trans_image_path . $sQuote;
628 $aUrl = parse_url($attvalue);
629 if (isset($aUrl['scheme'])) {
630 switch(strtolower($aUrl['scheme'])) {
635 if ($attname != 'href') {
636 if ($block_external_images == true) {
637 $attvalue = $sQuote . $trans_image_path . $sQuote;
639 if (!isset($aUrl['path'])) {
640 $attvalue = $sQuote . $trans_image_path . $sQuote;
644 $attvalue = $sQuote . $attvalue . $sQuote;
648 $attvalue = $sQuote . $attvalue . $sQuote;
651 $attvalue = $sQuote . $attvalue . $sQuote;
654 $attvalue = $sQuote . $trans_image_path . $sQuote;
658 if (!isset($aUrl['path']) ||
$aUrl['path'] != $trans_image_path) {
659 $
$attvalue = $sQuote . $trans_image_path . $sQuote;
666 function tln_fixstyle($body, $pos, $trans_image_path, $block_external_images)
668 // workaround for </style> in between comments
673 for ($i=$pos,$iCount=strlen($body);$i<$iCount;++
$i) {
680 if ($sToken == '<') {
690 if (preg_match('/\<\/\s*style\s*\>/i',$sToken,$aMatch)) {
703 if ($sToken == '<') {
705 if (isset($body{$i+
2}) && substr($body,$i,3) == '!--') {
706 $i = strpos($body,'-->',$i+
3);
707 if ($i === false) { // no end comment
725 if ($bSucces == FALSE){
726 return array(FALSE, strlen($body));
732 * First look for general BODY style declaration, which would be
734 * body {background: blah-blah}
735 * and change it to .bodyclass so we can just assign it to a <div>
737 $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
740 * Fix url('blah') declarations.
742 // $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
743 // "url(\\1$trans_image_path\\2)", $content);
745 // first check for 8bit sequences and disallowed control characters
746 if (preg_match('/[\16-\37\200-\377]+/',$content)) {
747 $content = '<!-- style block removed by html filter due to presence of 8bit characters -->';
748 return array($content, $newpos);
751 // remove @import line
752 $content = preg_replace("/^\s*(@import.*)$/mi","\n<!-- @import rules forbidden -->\n",$content);
754 $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
755 preg_match_all("/url\s*\((.+)\)/si",$content,$aMatch);
756 if (count($aMatch)) {
757 $aValue = $aReplace = array();
758 foreach($aMatch[1] as $sMatch) {
761 tln_fixurl('style',$urlvalue, $trans_image_path, $block_external_images);
763 $aReplace[] = $urlvalue;
765 $content = str_replace($aValue,$aReplace,$content);
769 * Remove any backslashes, entities, and extraneous whitespace.
771 $contentTemp = $content;
772 tln_defang($contentTemp);
773 tln_unspace($contentTemp);
775 $match = Array('/\/\*.*\*\//',
783 $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', '');
784 $contentNew = preg_replace($match, $replace, $contentTemp);
785 if ($contentNew !== $contentTemp) {
786 $content = $contentNew;
788 return array($content, $newpos);
791 function tln_body2div($attary, $trans_image_path)
793 $divattary = array('class' => "'bodyclass'");
795 $has_bgc_stl = $has_txt_stl = false;
797 if (is_array($attary) && sizeof($attary) > 0){
798 foreach ($attary as $attname=>$attvalue){
799 $quotchar = substr($attvalue, 0, 1);
800 $attvalue = str_replace($quotchar, "", $attvalue);
803 $styledef .= "background-image: url('$trans_image_path'); ";
807 $styledef .= "background-color: $attvalue; ";
811 $styledef .= "color: $attvalue; ";
815 // Outlook defines a white bgcolor and no text color. This can lead to
816 // white text on a white bg with certain themes.
817 if ($has_bgc_stl && !$has_txt_stl) {
818 $styledef .= "color: $text; ";
820 if (strlen($styledef) > 0){
821 $divattary{"style"} = "\"$styledef\"";
829 * @param string $body The HTML you wish to filter
830 * @param array $tag_list see description above
831 * @param array $rm_tags_with_content see description above
832 * @param array $self_closing_tags see description above
833 * @param boolean $force_tag_closing see description above
834 * @param array $rm_attnames see description above
835 * @param array $bad_attvals see description above
836 * @param array $add_attr_to_tag see description above
837 * @param string $trans_image_path
838 * @param boolean $block_external_images
840 * @return string Sanitized html safe to show on your pages.
842 function tln_sanitize(
845 $rm_tags_with_content,
852 $block_external_images
855 * Normalize rm_tags and rm_tags_with_content.
857 $rm_tags = array_shift($tag_list);
858 @array_walk
($tag_list, 'tln_casenormalize');
859 @array_walk
($rm_tags_with_content, 'tln_casenormalize');
860 @array_walk
($self_closing_tags, 'tln_casenormalize');
862 * See if tag_list is of tags to remove or tags to allow.
863 * false means remove these tags
864 * true means allow these tags
867 $open_tags = array();
868 $trusted = "<!-- begin tln_sanitized html -->\n";
869 $skip_content = false;
871 * Take care of netscape's stupid javascript entities like
874 $body = preg_replace('/&(\{.*?\};)/si', '&\\1', $body);
875 while (($curtag = tln_getnxtag($body, $curpos)) != false) {
876 list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
877 $free_content = substr($body, $curpos, $lt-$curpos);
879 * Take care of <style>
881 if ($tagname == "style" && $tagtype == 1){
882 list($free_content, $curpos) =
883 tln_fixstyle($body, $gt+
1, $trans_image_path, $block_external_images);
884 if ($free_content != FALSE){
885 if ( !empty($attary) ) {
886 $attary = tln_fixatts($tagname,
892 $block_external_images
895 $trusted .= tln_tagprint($tagname, $attary, $tagtype);
896 $trusted .= $free_content;
897 $trusted .= tln_tagprint($tagname, null, 2);
901 if ($skip_content == false){
902 $trusted .= $free_content;
904 if ($tagname != false) {
906 if ($skip_content == $tagname) {
908 * Got to the end of tag we needed to remove.
911 $skip_content = false;
913 if ($skip_content == false) {
914 if ($tagname == "body") {
917 if (isset($open_tags{$tagname}) &&
918 $open_tags{$tagname} > 0
920 $open_tags{$tagname}--;
928 * $rm_tags_with_content
930 if ($skip_content == false) {
932 * See if this is a self-closing type and change
933 * tagtype appropriately.
936 && in_array($tagname, $self_closing_tags)
941 * See if we should skip this tag and any content
945 && in_array($tagname, $rm_tags_with_content)
947 $skip_content = $tagname;
949 if (($rm_tags == false
950 && in_array($tagname, $tag_list)) ||
952 && !in_array($tagname, $tag_list))
957 * Convert body into div.
959 if ($tagname == "body"){
961 $attary = tln_body2div($attary, $trans_image_path);
964 if (isset($open_tags{$tagname})) {
965 $open_tags{$tagname}++
;
967 $open_tags{$tagname} = 1;
971 * This is where we run other checks.
973 if (is_array($attary) && sizeof($attary) > 0) {
974 $attary = tln_fixatts(
981 $block_external_images
988 if ($tagname != false && $skip_content == false) {
989 $trusted .= tln_tagprint($tagname, $attary, $tagtype);
994 $trusted .= substr($body, $curpos, strlen($body) - $curpos);
995 if ($force_tag_closing == true) {
996 foreach ($open_tags as $tagname => $opentimes) {
997 while ($opentimes > 0) {
998 $trusted .= '</' . $tagname . '>';
1004 $trusted .= "<!-- end tln_sanitized html -->\n";
1009 // Use the nifty htmlfilter library
1013 function HTMLFilter($body, $trans_image_path, $block_external_images = false)
1030 $rm_tags_with_content = array(
1040 $self_closing_tags = array(
1048 $force_tag_closing = true;
1050 $rm_attnames = array(
1061 $bad_attvals = array(
1064 "/^src|background/i" =>
1067 '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1068 '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1069 '/^([\'"])\s*about\s*:.*([\'"])/si'
1072 "\\1$trans_image_path\\2",
1073 "\\1$trans_image_path\\2",
1074 "\\1$trans_image_path\\2"
1077 "/^href|action/i" =>
1080 '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1081 '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1082 '/^([\'"])\s*about\s*:.*([\'"])/si'
1097 "/include-source/i",
1099 '/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i',
1100 '/url\s*\(\s*([\'"])\s*\S+script\s*:.*([\'"])\s*\)/si',
1101 '/url\s*\(\s*([\'"])\s*mocha\s*:.*([\'"])\s*\)/si',
1102 '/url\s*\(\s*([\'"])\s*about\s*:.*([\'"])\s*\)/si',
1103 '/(.*)\s*:\s*url\s*\(\s*([\'"]*)\s*\S+script\s*:.*([\'"]*)\s*\)/si'
1122 if ($block_external_images) {
1124 $bad_attvals{'/.*/'}{'/^src|background/i'}[0],
1125 '/^([\'\"])\s*https*:.*([\'\"])/si'
1128 $bad_attvals{'/.*/'}{'/^src|background/i'}[1],
1129 "\\1$trans_image_path\\1"
1132 $bad_attvals{'/.*/'}{'/^style/i'}[0],
1133 '/url\(([\'\"])\s*https*:.*([\'\"])\)/si'
1136 $bad_attvals{'/.*/'}{'/^style/i'}[1],
1137 "url(\\1$trans_image_path\\1)"
1141 $add_attr_to_tag = array(
1143 array('target' => '"_blank"')
1146 $trusted = tln_sanitize(
1149 $rm_tags_with_content,
1156 $block_external_images