5 * This set of functions allows you to filter html in order to remove
6 * any malicious tags from it. Useful in cases when you need to filter
7 * user input for any cross-site-scripting attempts.
9 * Copyright (C) 2002-2004 by Duke University
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 * @Author Konstantin Riabitsev <icon@linux.duke.edu>
27 * @Author Jim Jagielski <jim@jaguNET.com / jimjag@gmail.com>
28 * @Version 1.1 ($Date$)
32 * This function returns the final tag out of the tag name, an array
33 * of attributes, and the type of the tag. This function is called by
34 * tln_sanitize internally.
36 * @param string $tagname the name of the tag.
37 * @param array $attary the array of attributes and their values
38 * @param integer $tagtype The type of the tag (see in comments).
39 * @return string A string with the final tag representation.
41 function tln_tagprint($tagname, $attary, $tagtype)
44 $fulltag = '</' . $tagname . '>';
46 $fulltag = '<' . $tagname;
47 if (is_array($attary) && sizeof($attary)) {
49 while (list($attname, $attvalue) = each($attary)) {
50 array_push($atts, "$attname=$attvalue");
52 $fulltag .= ' ' . join(' ', $atts);
63 * A small helper function to use with array_walk. Modifies a by-ref
64 * value and makes it lowercase.
66 * @param string $val a value passed by-ref.
67 * @return void since it modifies a by-ref value.
69 function tln_casenormalize(&$val)
71 $val = strtolower($val);
75 * This function skips any whitespace from the current position within
76 * a string and to the next non-whitespace value.
78 * @param string $body the string
79 * @param integer $offset the offset within the string where we should start
80 * looking for the next non-whitespace character.
81 * @return integer the location within the $body where the next
82 * non-whitespace char is located.
84 function tln_skipspace($body, $offset)
86 preg_match('/^(\s*)/s', substr($body, $offset), $matches);
87 if (sizeof($matches[1])) {
88 $count = strlen($matches[1]);
95 * This function looks for the next character within a string. It's
96 * really just a glorified "strpos", except it catches the failures
99 * @param string $body The string to look for needle in.
100 * @param integer $offset Start looking from this position.
101 * @param string $needle The character/string to look for.
102 * @return integer location of the next occurrence of the needle, or
103 * strlen($body) if needle wasn't found.
105 function tln_findnxstr($body, $offset, $needle)
107 $pos = strpos($body, $needle, $offset);
108 if ($pos === false) {
109 $pos = strlen($body);
115 * This function takes a PCRE-style regexp and tries to match it
118 * @param string $body The string to look for needle in.
119 * @param integer $offset Start looking from here.
120 * @param string $reg A PCRE-style regex to match.
121 * @return array|boolean Returns a false if no matches found, or an array
122 * with the following members:
123 * - integer with the location of the match within $body
124 * - string with whatever content between offset and the match
125 * - string with whatever it is we matched
127 function tln_findnxreg($body, $offset, $reg)
131 $preg_rule = '%^(.*?)(' . $reg . ')%s';
132 preg_match($preg_rule, substr($body, $offset), $matches);
133 if (!isset($matches[0]) ||
!$matches[0]) {
136 $retarr[0] = $offset +
strlen($matches[1]);
137 $retarr[1] = $matches[1];
138 $retarr[2] = $matches[2];
144 * This function looks for the next tag.
146 * @param string $body String where to look for the next tag.
147 * @param integer $offset Start looking from here.
148 * @return array|boolean false if no more tags exist in the body, or
149 * an array with the following members:
150 * - string with the name of the tag
151 * - array with attributes and their values
152 * - integer with tag type (1, 2, or 3)
153 * - integer where the tag starts (starting "<")
154 * - integer where the tag ends (ending ">")
155 * first three members will be false, if the tag is invalid.
157 function tln_getnxtag($body, $offset)
159 if ($offset > strlen($body)) {
162 $lt = tln_findnxstr($body, $offset, '<');
163 if ($lt == strlen($body)) {
168 * blah blah <tag attribute="value">
171 $pos = tln_skipspace($body, $lt +
1);
172 if ($pos >= strlen($body)) {
173 return array(false, false, false, $lt, strlen($body));
176 * There are 3 kinds of tags:
177 * 1. Opening tag, e.g.:
179 * 2. Closing tag, e.g.:
181 * 3. XHTML-style content-less tag, e.g.:
184 switch (substr($body, $pos, 1)) {
191 * A comment or an SGML declaration.
193 if (substr($body, $pos +
1, 2) == '--') {
194 $gt = strpos($body, '-->', $pos);
200 return array(false, false, false, $lt, $gt);
202 $gt = tln_findnxstr($body, $pos, '>');
203 return array(false, false, false, $lt, $gt);
208 * Assume tagtype 1 for now. If it's type 3, we'll switch values
216 * Look for next [\W-_], which will indicate the end of the tag name.
218 $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
219 if ($regary == false) {
220 return array(false, false, false, $lt, strlen($body));
222 list($pos, $tagname, $match) = $regary;
223 $tagname = strtolower($tagname);
226 * $match can be either of these:
227 * '>' indicating the end of the tag entirely.
228 * '\s' indicating the end of the tag name.
229 * '/' indicating that this is type-3 xhtml tag.
231 * Whatever else we find there indicates an invalid tag.
236 * This is an xhtml-style tag with a closing / at the
237 * end, like so: <img src="blah"/>. Check if it's followed
238 * by the closing bracket. If not, then this tag is invalid
240 if (substr($body, $pos, 2) == '/>') {
244 $gt = tln_findnxstr($body, $pos, '>');
245 $retary = array(false, false, false, $lt, $gt);
248 //intentional fall-through
250 return array($tagname, false, $tagtype, $lt, $pos);
254 * Check if it's whitespace
256 if (!preg_match('/\s/', $match)) {
258 * This is an invalid tag! Look for the next closing ">".
260 $gt = tln_findnxstr($body, $lt, '>');
261 return array(false, false, false, $lt, $gt);
267 * At this point we're here:
268 * <tagname attribute='blah'>
271 * At this point we loop in order to find all attributes.
275 while ($pos <= strlen($body)) {
276 $pos = tln_skipspace($body, $pos);
277 if ($pos == strlen($body)) {
281 return array(false, false, false, $lt, $pos);
284 * See if we arrived at a ">" or "/>", which means that we reached
285 * the end of the tag.
288 if (preg_match('%^(\s*)(>|/>)%s', substr($body, $pos), $matches)) {
292 $pos +
= strlen($matches[1]);
293 if ($matches[2] == '/>') {
297 return array($tagname, $attary, $tagtype, $lt, $pos);
301 * There are several types of attributes, with optional
302 * [:space:] between members.
304 * attrname[:space:]=[:space:]'CDATA'
306 * attrname[:space:]=[:space:]"CDATA"
308 * attr[:space:]=[:space:]CDATA
312 * We leave types 1 and 2 the same, type 3 we check for
313 * '"' and convert to """ if needed, then wrap in
314 * double quotes. Type 4 we convert into:
317 $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
318 if ($regary == false) {
320 * Looks like body ended before the end of tag.
322 return array(false, false, false, $lt, strlen($body));
324 list($pos, $attname, $match) = $regary;
325 $attname = strtolower($attname);
327 * We arrived at the end of attribute name. Several things possible
329 * '>' means the end of the tag and this is attribute type 4
330 * '/' if followed by '>' means the same thing as above
331 * '\s' means a lot of things -- look what it's followed by.
332 * anything else means the attribute is invalid.
337 * This is an xhtml-style tag with a closing / at the
338 * end, like so: <img src="blah"/>. Check if it's followed
339 * by the closing bracket. If not, then this tag is invalid
341 if (substr($body, $pos, 2) == '/>') {
345 $gt = tln_findnxstr($body, $pos, '>');
346 $retary = array(false, false, false, $lt, $gt);
349 //intentional fall-through
351 $attary{$attname} = '"yes"';
352 return array($tagname, $attary, $tagtype, $lt, $pos);
356 * Skip whitespace and see what we arrive at.
358 $pos = tln_skipspace($body, $pos);
359 $char = substr($body, $pos, 1);
361 * Two things are valid here:
362 * '=' means this is attribute type 1 2 or 3.
363 * \w means this was attribute type 4.
364 * anything else we ignore and re-loop. End of tag and
365 * invalid stuff will be caught by our checks at the beginning
370 $pos = tln_skipspace($body, $pos);
372 * Here are 3 possibilities:
373 * "'" attribute type 1
374 * '"' attribute type 2
375 * everything else is the content of tag type 3
377 $quot = substr($body, $pos, 1);
379 $regary = tln_findnxreg($body, $pos +
1, '\'');
380 if ($regary == false) {
381 return array(false, false, false, $lt, strlen($body));
383 list($pos, $attval, $match) = $regary;
385 $attary{$attname} = '\'' . $attval . '\'';
386 } elseif ($quot == '"') {
387 $regary = tln_findnxreg($body, $pos +
1, '\"');
388 if ($regary == false) {
389 return array(false, false, false, $lt, strlen($body));
391 list($pos, $attval, $match) = $regary;
393 $attary{$attname} = '"' . $attval . '"';
396 * These are hateful. Look for \s, or >.
398 $regary = tln_findnxreg($body, $pos, '[\s>]');
399 if ($regary == false) {
400 return array(false, false, false, $lt, strlen($body));
402 list($pos, $attval, $match) = $regary;
404 * If it's ">" it will be caught at the top.
406 $attval = preg_replace('/\"/s', '"', $attval);
407 $attary{$attname} = '"' . $attval . '"';
409 } elseif (preg_match('|[\w/>]|', $char)) {
411 * That was attribute type 4.
413 $attary{$attname} = '"yes"';
416 * An illegal character. Find next '>' and return.
418 $gt = tln_findnxstr($body, $pos, '>');
419 return array(false, false, false, $lt, $gt);
425 * The fact that we got here indicates that the tag end was never
426 * found. Return invalid tag indication so it gets stripped.
428 return array(false, false, false, $lt, strlen($body));
432 * Translates entities into literal values so they can be checked.
434 * @param string $attvalue the by-ref value to check.
435 * @param string $regex the regular expression to check against.
436 * @param boolean $hex whether the entites are hexadecimal.
437 * @return boolean True or False depending on whether there were matches.
439 function tln_deent(&$attvalue, $regex, $hex = false)
441 preg_match_all($regex, $attvalue, $matches);
442 if (is_array($matches) && sizeof($matches[0]) > 0) {
444 for ($i = 0; $i < sizeof($matches[0]); $i++
) {
445 $numval = $matches[1][$i];
447 $numval = hexdec($numval);
449 $repl{$matches[0][$i]} = chr($numval);
451 $attvalue = strtr($attvalue, $repl);
459 * This function checks attribute values for entity-encoded values
460 * and returns them translated into 8-bit strings so we can run
463 * @param string $attvalue A string to run entity check against.
464 * @return Void, modifies a reference value.
466 function tln_defang(&$attvalue)
469 * Skip this if there aren't ampersands or backslashes.
471 if (strpos($attvalue, '&') === false
472 && strpos($attvalue, '\\') === false
478 $m = $m ||
tln_deent($attvalue, '/\�*(\d+);*/s');
479 $m = $m ||
tln_deent($attvalue, '/\�*((\d|[a-f])+);*/si', true);
480 $m = $m ||
tln_deent($attvalue, '/\\\\(\d+)/s', true);
481 } while ($m == true);
482 $attvalue = stripslashes($attvalue);
486 * Kill any tabs, newlines, or carriage returns. Our friends the
487 * makers of the browser with 95% market value decided that it'd
488 * be funny to make "java[tab]script" be just as good as "javascript".
490 * @param string $attvalue The attribute value before extraneous spaces removed.
491 * @return Void, modifies a reference value.
493 function tln_unspace(&$attvalue)
495 if (strcspn($attvalue, "\t\r\n\0 ") != strlen($attvalue)) {
496 $attvalue = str_replace(
497 array("\t", "\r", "\n", "\0", " "),
498 array('', '', '', '', ''),
505 * This function runs various checks against the attributes.
507 * @param string $tagname String with the name of the tag.
508 * @param array $attary Array with all tag attributes.
509 * @param array $rm_attnames See description for tln_sanitize
510 * @param array $bad_attvals See description for tln_sanitize
511 * @param array $add_attr_to_tag See description for tln_sanitize
512 * @param string $trans_image_path
513 * @param boolean $block_external_images
514 * @return Array with modified attributes.
516 function tln_fixatts(
523 $block_external_images
525 while (list($attname, $attvalue) = each($attary)) {
527 * See if this attribute should be removed.
529 foreach ($rm_attnames as $matchtag => $matchattrs) {
530 if (preg_match($matchtag, $tagname)) {
531 foreach ($matchattrs as $matchattr) {
532 if (preg_match($matchattr, $attname)) {
533 unset($attary{$attname});
540 * Remove any backslashes, entities, or extraneous whitespace.
542 $oldattvalue = $attvalue;
543 tln_defang($attvalue);
544 if ($attname == 'style' && $attvalue !== $oldattvalue) {
545 $attvalue = "idiocy";
546 $attary{$attname} = $attvalue;
548 tln_unspace($attvalue);
551 * Now let's run checks on the attvalues.
552 * I don't expect anyone to comprehend this. If you do,
553 * get in touch with me so I can drive to where you live and
554 * shake your hand personally. :)
556 foreach ($bad_attvals as $matchtag => $matchattrs) {
557 if (preg_match($matchtag, $tagname)) {
558 foreach ($matchattrs as $matchattr => $valary) {
559 if (preg_match($matchattr, $attname)) {
561 * There are two arrays in valary.
563 * Second one is replacements
565 list($valmatch, $valrepl) = $valary;
566 $newvalue = preg_replace($valmatch, $valrepl, $attvalue);
567 if ($newvalue != $attvalue) {
568 $attary{$attname} = $newvalue;
569 $attvalue = $newvalue;
575 if ($attname == 'style') {
576 if (preg_match('/[\0-\37\200-\377]+/', $attvalue)) {
577 $attary{$attname} = '"disallowed character"';
579 preg_match_all("/url\s*\((.+)\)/si", $attvalue, $aMatch);
580 if (count($aMatch)) {
581 foreach($aMatch[1] as $sMatch) {
583 tln_fixurl($attname, $urlvalue, $trans_image_path, $block_external_images);
584 $attary{$attname} = str_replace($sMatch, $urlvalue, $attvalue);
590 * See if we need to append any attributes to this tag.
592 foreach ($add_attr_to_tag as $matchtag => $addattary) {
593 if (preg_match($matchtag, $tagname)) {
594 $attary = array_merge($attary, $addattary);
600 function tln_fixurl($attname, &$attvalue, $trans_image_path, $block_external_images)
603 $attvalue = trim($attvalue);
604 if ($attvalue && ($attvalue[0] =='"'||
$attvalue[0] == "'")) {
605 // remove the double quotes
606 $sQuote = $attvalue[0];
607 $attvalue = trim(substr($attvalue,1,-1));
611 * Replace empty src tags with the blank image. src is only used
612 * for frames, images, and image inputs. Doing a replace should
613 * not affect them working as should be, however it will stop
614 * IE from being kicked off when src for img tags are not set
616 if ($attvalue == '') {
617 $attvalue = $sQuote . $trans_image_path . $sQuote;
619 // first, disallow 8 bit characters and control characters
620 if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
623 $attvalue = $sQuote . 'http://invalid-stuff-detected.example.com' . $sQuote;
626 $attvalue = $sQuote . $trans_image_path . $sQuote;
630 $aUrl = parse_url($attvalue);
631 if (isset($aUrl['scheme'])) {
632 switch(strtolower($aUrl['scheme'])) {
637 if ($attname != 'href') {
638 if ($block_external_images == true) {
639 $attvalue = $sQuote . $trans_image_path . $sQuote;
641 if (!isset($aUrl['path'])) {
642 $attvalue = $sQuote . $trans_image_path . $sQuote;
646 $attvalue = $sQuote . $attvalue . $sQuote;
650 $attvalue = $sQuote . $attvalue . $sQuote;
653 $attvalue = $sQuote . $attvalue . $sQuote;
656 $attvalue = $sQuote . $trans_image_path . $sQuote;
660 if (!isset($aUrl['path']) ||
$aUrl['path'] != $trans_image_path) {
661 $
$attvalue = $sQuote . $trans_image_path . $sQuote;
668 function tln_fixstyle($body, $pos, $trans_image_path, $block_external_images)
670 $me = 'tln_fixstyle';
671 // workaround for </style> in between comments
677 for ($i=$pos,$iCount=strlen($body);$i<$iCount;++
$i) {
684 if ($sToken == '<') {
694 if (preg_match('/\<\/\s*style\s*\>/i',$sToken,$aMatch)) {
707 if ($sToken == '<') {
709 if (isset($body{$i+
2}) && substr($body,$i,3) == '!--') {
710 $i = strpos($body,'-->',$i+
3);
711 if ($i === false) { // no end comment
729 if ($bSucces == FALSE){
730 return array(FALSE, strlen($body));
736 * First look for general BODY style declaration, which would be
738 * body {background: blah-blah}
739 * and change it to .bodyclass so we can just assign it to a <div>
741 $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
743 $trans_image_path = $trans_image_path;
746 * Fix url('blah') declarations.
748 // $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
749 // "url(\\1$trans_image_path\\2)", $content);
751 // first check for 8bit sequences and disallowed control characters
752 if (preg_match('/[\16-\37\200-\377]+/',$content)) {
753 $content = '<!-- style block removed by html filter due to presence of 8bit characters -->';
754 return array($content, $newpos);
757 // remove @import line
758 $content = preg_replace("/^\s*(@import.*)$/mi","\n<!-- @import rules forbidden -->\n",$content);
760 $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
761 preg_match_all("/url\s*\((.+)\)/si",$content,$aMatch);
762 if (count($aMatch)) {
763 $aValue = $aReplace = array();
764 foreach($aMatch[1] as $sMatch) {
767 tln_fixurl('style',$urlvalue, $trans_image_path, $block_external_images);
769 $aReplace[] = $urlvalue;
771 $content = str_replace($aValue,$aReplace,$content);
775 * Remove any backslashes, entities, and extraneous whitespace.
777 $contentTemp = $content;
778 tln_defang($contentTemp);
779 tln_unspace($contentTemp);
781 $match = Array('/\/\*.*\*\//',
789 $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', '');
790 $contentNew = preg_replace($match, $replace, $contentTemp);
791 if ($contentNew !== $contentTemp) {
792 $content = $contentNew;
794 return array($content, $newpos);
797 function tln_body2div($attary, $trans_image_path)
799 $me = 'tln_body2div';
800 $divattary = array('class' => "'bodyclass'");
802 $has_bgc_stl = $has_txt_stl = false;
804 if (is_array($attary) && sizeof($attary) > 0){
805 foreach ($attary as $attname=>$attvalue){
806 $quotchar = substr($attvalue, 0, 1);
807 $attvalue = str_replace($quotchar, "", $attvalue);
810 $styledef .= "background-image: url('$trans_image_path'); ";
814 $styledef .= "background-color: $attvalue; ";
818 $styledef .= "color: $attvalue; ";
822 // Outlook defines a white bgcolor and no text color. This can lead to
823 // white text on a white bg with certain themes.
824 if ($has_bgc_stl && !$has_txt_stl) {
825 $styledef .= "color: $text; ";
827 if (strlen($styledef) > 0){
828 $divattary{"style"} = "\"$styledef\"";
836 * @param string $body The HTML you wish to filter
837 * @param array $tag_list see description above
838 * @param array $rm_tags_with_content see description above
839 * @param array $self_closing_tags see description above
840 * @param boolean $force_tag_closing see description above
841 * @param array $rm_attnames see description above
842 * @param array $bad_attvals see description above
843 * @param array $add_attr_to_tag see description above
844 * @param string $trans_image_path
845 * @param boolean $block_external_images
847 * @return string Sanitized html safe to show on your pages.
849 function tln_sanitize(
852 $rm_tags_with_content,
859 $block_external_images
862 * Normalize rm_tags and rm_tags_with_content.
864 $rm_tags = array_shift($tag_list);
865 @array_walk
($tag_list, 'tln_casenormalize');
866 @array_walk
($rm_tags_with_content, 'tln_casenormalize');
867 @array_walk
($self_closing_tags, 'tln_casenormalize');
869 * See if tag_list is of tags to remove or tags to allow.
870 * false means remove these tags
871 * true means allow these tags
874 $open_tags = array();
875 $trusted = "<!-- begin tln_sanitized html -->\n";
876 $skip_content = false;
878 * Take care of netscape's stupid javascript entities like
881 $body = preg_replace('/&(\{.*?\};)/si', '&\\1', $body);
882 while (($curtag = tln_getnxtag($body, $curpos)) != false) {
883 list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
884 $free_content = substr($body, $curpos, $lt-$curpos);
886 * Take care of <style>
888 if ($tagname == "style" && $tagtype == 1){
889 list($free_content, $curpos) =
890 tln_fixstyle($body, $gt+
1, $trans_image_path, $block_external_images);
891 if ($free_content != FALSE){
892 if ( !empty($attary) ) {
893 $attary = tln_fixatts($tagname,
899 $block_external_images
902 $trusted .= tln_tagprint($tagname, $attary, $tagtype);
903 $trusted .= $free_content;
904 $trusted .= tln_tagprint($tagname, false, 2);
908 if ($skip_content == false){
909 $trusted .= $free_content;
911 if ($tagname != false) {
913 if ($skip_content == $tagname) {
915 * Got to the end of tag we needed to remove.
918 $skip_content = false;
920 if ($skip_content == false) {
921 if ($tagname == "body") {
924 if (isset($open_tags{$tagname}) &&
925 $open_tags{$tagname} > 0
927 $open_tags{$tagname}--;
935 * $rm_tags_with_content
937 if ($skip_content == false) {
939 * See if this is a self-closing type and change
940 * tagtype appropriately.
943 && in_array($tagname, $self_closing_tags)
948 * See if we should skip this tag and any content
952 && in_array($tagname, $rm_tags_with_content)
954 $skip_content = $tagname;
956 if (($rm_tags == false
957 && in_array($tagname, $tag_list)) ||
959 && !in_array($tagname, $tag_list))
964 * Convert body into div.
966 if ($tagname == "body"){
968 $attary = tln_body2div($attary, $trans_image_path);
971 if (isset($open_tags{$tagname})) {
972 $open_tags{$tagname}++
;
974 $open_tags{$tagname} = 1;
978 * This is where we run other checks.
980 if (is_array($attary) && sizeof($attary) > 0) {
981 $attary = tln_fixatts(
988 $block_external_images
995 if ($tagname != false && $skip_content == false) {
996 $trusted .= tln_tagprint($tagname, $attary, $tagtype);
1001 $trusted .= substr($body, $curpos, strlen($body) - $curpos);
1002 if ($force_tag_closing == true) {
1003 foreach ($open_tags as $tagname => $opentimes) {
1004 while ($opentimes > 0) {
1005 $trusted .= '</' . $tagname . '>';
1011 $trusted .= "<!-- end tln_sanitized html -->\n";
1016 // Use the nifty htmlfilter library
1020 function HTMLFilter($body, $trans_image_path, $block_external_images = false)
1037 $rm_tags_with_content = array(
1047 $self_closing_tags = array(
1055 $force_tag_closing = true;
1057 $rm_attnames = array(
1068 $bad_attvals = array(
1071 "/^src|background/i" =>
1074 '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1075 '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1076 '/^([\'"])\s*about\s*:.*([\'"])/si'
1079 "\\1$trans_image_path\\2",
1080 "\\1$trans_image_path\\2",
1081 "\\1$trans_image_path\\2"
1084 "/^href|action/i" =>
1087 '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1088 '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1089 '/^([\'"])\s*about\s*:.*([\'"])/si'
1104 "/include-source/i",
1106 '/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i',
1107 '/url\s*\(\s*([\'"])\s*\S+script\s*:.*([\'"])\s*\)/si',
1108 '/url\s*\(\s*([\'"])\s*mocha\s*:.*([\'"])\s*\)/si',
1109 '/url\s*\(\s*([\'"])\s*about\s*:.*([\'"])\s*\)/si',
1110 '/(.*)\s*:\s*url\s*\(\s*([\'"]*)\s*\S+script\s*:.*([\'"]*)\s*\)/si'
1129 if ($block_external_images) {
1131 $bad_attvals{'/.*/'}{'/^src|background/i'}[0],
1132 '/^([\'\"])\s*https*:.*([\'\"])/si'
1135 $bad_attvals{'/.*/'}{'/^src|background/i'}[1],
1136 "\\1$trans_image_path\\1"
1139 $bad_attvals{'/.*/'}{'/^style/i'}[0],
1140 '/url\(([\'\"])\s*https*:.*([\'\"])\)/si'
1143 $bad_attvals{'/.*/'}{'/^style/i'}[1],
1144 "url(\\1$trans_image_path\\1)"
1148 $add_attr_to_tag = array(
1150 array('target' => '"_blank"')
1153 $trusted = tln_sanitize(
1156 $rm_tags_with_content,
1163 $block_external_images