Merging resourceloader branch into trunk. Full documentation is at http://www.mediawi...
[lhc/web/wiklou.git] / includes / CSSJanus.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 */
19
20 /**
21 * This is a PHP port of CSSJanus, a utility that transforms CSS style sheets
22 * written for LTR to RTL.
23 *
24 * The original Python version of CSSJanus is Copyright 2008 by Google Inc. and
25 * is distributed under the Apache license.
26 *
27 * Original code: http://code.google.com/p/cssjanus/source/browse/trunk/cssjanus.py
28 * License of original code: http://code.google.com/p/cssjanus/source/browse/trunk/LICENSE
29 * @author Roan Kattouw
30 *
31 */
32 class CSSJanus {
33 // Patterns defined as null are built dynamically by buildPatterns()
34 private static $patterns = array(
35 'tmpToken' => '`TMP`',
36 'nonAscii' => '[\200-\377]',
37 'unicode' => '(?:(?:\\[0-9a-f]{1,6})(?:\r\n|\s)?)',
38 'num' => '(?:[0-9]*\.[0-9]+|[0-9]+)',
39 'unit' => '(?:em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)',
40 'body_selector' => 'body\s*{\s*',
41 'direction' => 'direction\s*:\s*',
42 'escape' => null,
43 'nmstart' => null,
44 'nmchar' => null,
45 'ident' => null,
46 'quantity' => null,
47 'possibly_negative_quantity' => null,
48 'color' => null,
49 'url_special_chars' => '[!#$%&*-~]',
50 'valid_after_uri_chars' => '[\'\"]?\s*',
51 'url_chars' => null,
52 'lookahead_not_open_brace' => null,
53 'lookahead_not_closing_paren' => null,
54 'lookahead_for_closing_paren' => null,
55 'lookbehind_not_letter' => '(?<![a-zA-Z])',
56 'chars_within_selector' => '[^\}]*?',
57 'noflip_annotation' => '\/\*\s*@noflip\s*\*\/',
58
59 'noflip_single' => null,
60 'noflip_class' => null,
61 'comment' => '/\/\*[^*]*\*+([^\/*][^*]*\*+)*\//',
62 'body_direction_ltr' => null,
63 'body_direction_rtl' => null,
64 'left' => null,
65 'right' => null,
66 'left_in_url' => null,
67 'right_in_url' => null,
68 'ltr_in_url' => null,
69 'rtl_in_url' => null,
70 'cursor_east' => null,
71 'cursor_west' => null,
72 'four_notation_quantity' => null,
73 'four_notation_color' => null,
74 'bg_horizontal_percentage' => null,
75 'bg_horizontal_percentage_x' => null,
76 );
77
78 /**
79 * Build patterns we can't define above because they depend on other patterns.
80 */
81 private static function buildPatterns() {
82 if ( !is_null( self::$patterns['escape'] ) ) {
83 // Patterns have already been built
84 return;
85 }
86 $patterns =& self::$patterns;
87 $patterns['escape'] = "(?:{$patterns['unicode']}|\\[^\r\n\f0-9a-f])";
88 $patterns['nmstart'] = "(?:[_a-z]|{$patterns['nonAscii']}|{$patterns['escape']})";
89 $patterns['nmchar'] = "(?:[_a-z0-9-]|{$patterns['nonAscii']}|{$patterns['escape']})";
90 $patterns['ident'] = "-?{$patterns['nmstart']}{$patterns['nmchar']}*";
91 $patterns['quantity'] = "{$patterns['num']}(?:\s*{$patterns['unit']}|{$patterns['ident']})?";
92 $patterns['possibly_negative_quantity'] = "((?:-?{$patterns['quantity']})|(?:inherit|auto))";
93 $patterns['color'] = "(#?{$patterns['nmchar']}+)";
94 $patterns['url_chars'] = "(?:{$patterns['url_special_chars']}|{$patterns['nonAscii']}|{$patterns['escape']})*";
95 $patterns['lookahead_not_open_brace'] = "(?!({$patterns['nmchar']}|\r?\n|\s|#|\:|\.|\,|\+|>)*?{)";
96 $patterns['lookahead_not_closing_paren'] = "(?!{$patterns['url_chars']}?{$patterns['valid_after_uri_chars']}\))";
97 $patterns['lookahead_for_closing_paren'] = "(?={$patterns['url_chars']}?{$patterns['valid_after_uri_chars']}\))";
98
99 $patterns['noflip_single'] = "/({$patterns['noflip_annotation']}{$patterns['lookahead_not_open_brace']}[^;}]+;?)/i";
100 $patterns['noflip_class'] = "/({$patterns['noflip_annotation']}{$patterns['chars_within_selector']}})/i";
101 $patterns['body_direction_ltr'] = "/({$patterns['body_selector']}{$patterns['chars_within_selector']}{$patterns['direction']})ltr/i";
102 $patterns['body_direction_rtl'] = "/({$patterns['body_selector']}{$patterns['chars_within_selector']}{$patterns['direction']})rtl/i";
103 $patterns['left'] = "/{$patterns['lookbehind_not_letter']}(left){$patterns['lookahead_not_closing_paren']}{$patterns['lookahead_not_open_brace']}/i";
104 $patterns['right'] = "/{$patterns['lookbehind_not_letter']}(right){$patterns['lookahead_not_closing_paren']}{$patterns['lookahead_not_open_brace']}/i";
105 $patterns['left_in_url'] = "/{$patterns['lookbehind_not_letter']}(left){$patterns['lookahead_for_closing_paren']}/i";
106 $patterns['right_in_url'] = "/{$patterns['lookbehind_not_letter']}(right){$patterns['lookahead_for_closing_paren']}/i";
107 $patterns['ltr_in_url'] = "/{$patterns['lookbehind_not_letter']}(ltr){$patterns['lookahead_for_closing_paren']}/i";
108 $patterns['rtl_in_url'] = "/{$patterns['lookbehind_not_letter']}(rtl){$patterns['lookahead_for_closing_paren']}/i";
109 $patterns['cursor_east'] = "/{$patterns['lookbehind_not_letter']}([ns]?)e-resize/";
110 $patterns['cursor_west'] = "/{$patterns['lookbehind_not_letter']}([ns]?)w-resize/";
111 $patterns['four_notation_quantity'] = "/{$patterns['possibly_negative_quantity']}(\s+){$patterns['possibly_negative_quantity']}(\s+){$patterns['possibly_negative_quantity']}(\s+){$patterns['possibly_negative_quantity']}/i";
112 $patterns['four_notation_color'] = "/(-color\s*:\s*){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}/i";
113 // The two regexes below are parenthesized differently then in the original implementation to make the
114 // callback's job more straightforward
115 $patterns['bg_horizontal_percentage'] = "/(background(?:-position)?\s*:\s*[^%]*?)({$patterns['num']})(%\s*(?:{$patterns['quantity']}|{$patterns['ident']}))/";
116 $patterns['bg_horizontal_percentage_x'] = "/(background-position-x\s*:\s*)({$patterns['num']})(%)/";
117 }
118
119 /**
120 * Transform an LTR stylesheet to RTL
121 * @param string $css Stylesheet to transform
122 * @param bool $swapLtrRtlInURL If true, swap 'ltr' and 'rtl' in URLs
123 * @param bool $swapLeftRightInURL If true, swap 'left' and 'right' in URLs
124 * @return Transformed stylesheet
125 */
126 public static function transform( $css, $swapLtrRtlInURL = false, $swapLeftRightInURL = false ) {
127 // We wrap tokens in ` , not ~ like the original implementation does.
128 // This was done because ` is not a legal character in CSS and can only
129 // occur in URLs, where we escape it to %60 before inserting our tokens.
130 $css = str_replace( '`', '%60', $css );
131
132 self::buildPatterns();
133
134 // Tokenize single line rules with /* @noflip */
135 $noFlipSingle = new CSSJanus_Tokenizer( self::$patterns['noflip_single'], '`NOFLIP_SINGLE`' );
136 $css = $noFlipSingle->tokenize( $css );
137
138 // Tokenize class rules with /* @noflip */
139 $noFlipClass = new CSSJanus_Tokenizer( self::$patterns['noflip_class'], '`NOFLIP_CLASS`' );
140 $css = $noFlipClass->tokenize( $css );
141
142 // Tokenize comments
143 $comments = new CSSJanus_Tokenizer( self::$patterns['comment'], '`C`' );
144 $css = $comments->tokenize( $css );
145
146 // LTR->RTL fixes start here
147 $css = self::fixBodyDirection( $css );
148 if ( $swapLtrRtlInURL ) {
149 $css = self::fixLtrRtlInURL( $css );
150 }
151 if ( $swapLeftRightInURL ) {
152 $css = self::fixLeftRightInURL( $css );
153 }
154 $css = self::fixLeftAndRight( $css );
155 $css = self::fixCursorProperties( $css );
156 $css = self::fixFourPartNotation( $css );
157 $css = self::fixBackgroundPosition( $css );
158
159 // Detokenize stuff we tokenized before
160 $css = $comments->detokenize( $css );
161 $css = $noFlipClass->detokenize( $css );
162 $css = $noFlipSingle->detokenize( $css );
163 return $css;
164 }
165
166 /**
167 * Replace direction: ltr; with direction: rtl; and vice versa, but *only*
168 * those inside a body { .. } selector.
169 *
170 * Unlike the original implementation, this function doesn't suffer from
171 * the bug causing "body\n{\ndirection: ltr;\n}" to be missed.
172 * See http://code.google.com/p/cssjanus/issues/detail?id=15
173 */
174 private static function fixBodyDirection( $css ) {
175 $css = preg_replace( self::$patterns['body_direction_ltr'],
176 '$1' . self::$patterns['tmpToken'], $css );
177 $css = preg_replace( self::$patterns['body_direction_rtl'], '$1ltr', $css );
178 $css = str_replace( self::$patterns['tmpToken'], 'rtl', $css );
179 return $css;
180 }
181
182 /**
183 * Replace 'ltr' with 'rtl' and vice versa in background URLs
184 */
185 private static function fixLtrRtlInURL( $css ) {
186 $css = preg_replace( self::$patterns['ltr_in_url'], self::$patterns['tmpToken'], $css );
187 $css = preg_replace( self::$patterns['rtl_in_url'], 'ltr', $css );
188 $css = str_replace( self::$patterns['tmpToken'], 'rtl', $css );
189 return $css;
190 }
191
192 /**
193 * Replace 'left' with 'right' and vice versa in background URLs
194 */
195 private static function fixLeftRightInURL( $css ) {
196 $css = preg_replace( self::$patterns['left_in_url'], self::$patterns['tmpToken'], $css );
197 $css = preg_replace( self::$patterns['right_in_url'], 'left', $css );
198 $css = str_replace( self::$patterns['tmpToken'], 'right', $css );
199 return $css;
200 }
201
202 /**
203 * Flip rules like left: , padding-right: , etc.
204 */
205 private static function fixLeftAndRight( $css ) {
206 $css = preg_replace( self::$patterns['left'], self::$patterns['tmpToken'], $css );
207 $css = preg_replace( self::$patterns['right'], 'left', $css );
208 $css = str_replace( self::$patterns['tmpToken'], 'right', $css );
209 return $css;
210 }
211
212 /**
213 * Flip East and West in rules like cursor: nw-resize;
214 */
215 private static function fixCursorProperties( $css ) {
216 $css = preg_replace( self::$patterns['cursor_east'],
217 '$1' . self::$patterns['tmpToken'], $css );
218 $css = preg_replace( self::$patterns['cursor_west'], '$1e-resize', $css );
219 $css = str_replace( self::$patterns['tmpToken'], 'w-resize', $css );
220 return $css;
221 }
222
223 /**
224 * Swap the second and fourth parts in four-part notation rules like
225 * padding: 1px 2px 3px 4px;
226 *
227 * Unlike the original implementation, this function doesn't suffer from
228 * the bug where whitespace is not preserved when flipping four-part rules
229 * and four-part color rules with multiple whitespace characters between
230 * colors are not recognized.
231 * See http://code.google.com/p/cssjanus/issues/detail?id=16
232 */
233 private static function fixFourPartNotation( $css ) {
234 $css = preg_replace( self::$patterns['four_notation_quantity'], '$1$2$7$4$5$6$3', $css );
235 $css = preg_replace( self::$patterns['four_notation_color'], '$1$2$3$8$5$6$7$4', $css );
236 return $css;
237 }
238
239 /**
240 * Flip horizontal background percentages.
241 */
242 private static function fixBackgroundPosition( $css ) {
243 $css = preg_replace_callback( self::$patterns['bg_horizontal_percentage'],
244 array( 'self', 'calculateNewBackgroundPosition' ), $css );
245 $css = preg_replace_callback( self::$patterns['bg_horizontal_percentage_x'],
246 array( 'self', 'calculateNewBackgroundPosition' ), $css );
247 return $css;
248 }
249
250 /**
251 * Callback for calculateNewBackgroundPosition()
252 */
253 private static function calculateNewBackgroundPosition( $matches ) {
254 return $matches[1] . ( 100 - $matches[2] ) . $matches[3];
255 }
256 }
257
258 /**
259 * Utility class used by CSSJanus that tokenizes and untokenizes things we want
260 * to protect from being janused.
261 * @author Roan Kattouw
262 */
263 class CSSJanus_Tokenizer {
264 private $regex, $token;
265 private $originals;
266
267 /**
268 * Constructor
269 * @param $regex string Regular expression whose matches to replace by a token.
270 * @param $token string Token
271 */
272 public function __construct( $regex, $token ) {
273 $this->regex = $regex;
274 $this->token = $token;
275 $this->originals = array();
276 }
277
278 /**
279 * Replace all occurrences of $regex in $str with a token and remember
280 * the original strings.
281 * @param $str string String to tokenize
282 * @return string Tokenized string
283 */
284 public function tokenize( $str ) {
285 return preg_replace_callback( $this->regex, array( $this, 'tokenizeCallback' ), $str );
286 }
287
288 private function tokenizeCallback( $matches ) {
289 $this->originals[] = $matches[0];
290 return $this->token;
291 }
292
293 /**
294 * Replace tokens with their originals. If multiple strings were tokenized, it's important they be
295 * detokenized in exactly the SAME ORDER.
296 * @param string $str String previously run through tokenize()
297 * @return string Original string
298 */
299 public function detokenize( $str ) {
300 // PHP has no function to replace only the first occurrence or to
301 // replace occurrences of the same string with different values,
302 // so we use preg_replace_callback() even though we don't really need a regex
303 return preg_replace_callback( '/' . preg_quote( $this->token, '/' ) . '/',
304 array( $this, 'detokenizeCallback' ), $str );
305 }
306
307 private function detokenizeCallback( $matches ) {
308 $retval = current( $this->originals );
309 next( $this->originals );
310 return $retval;
311 }
312
313 }