Fix Bug #30322 “SVG metadata is read incorrectly” by applying supplied patch
[lhc/web/wiklou.git] / includes / media / GIFMetadataExtractor.php
1 <?php
2 /**
3 * GIF frame counter.
4 *
5 * Originally written in Perl by Steve Sanbeg.
6 * Ported to PHP by Andrew Garrett
7 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
8 * redistribution.
9 *
10 * @file
11 * @ingroup Media
12 */
13
14 /**
15 * GIF frame counter.
16 *
17 * @ingroup Media
18 */
19 class GIFMetadataExtractor {
20 static $gif_frame_sep;
21 static $gif_extension_sep;
22 static $gif_term;
23
24 const VERSION = 1;
25
26 // Each sub-block is less than or equal to 255 bytes.
27 // Most of the time its 255 bytes, except for in XMP
28 // blocks, where it's usually between 32-127 bytes each.
29 const MAX_SUBBLOCKS = 262144; // 5mb divided by 20.
30
31 /**
32 * @throws Exception
33 * @param $filename string
34 * @return array
35 */
36 static function getMetadata( $filename ) {
37 self::$gif_frame_sep = pack( "C", ord("," ) );
38 self::$gif_extension_sep = pack( "C", ord("!" ) );
39 self::$gif_term = pack( "C", ord(";" ) );
40
41 $frameCount = 0;
42 $duration = 0.0;
43 $isLooped = false;
44 $xmp = "";
45 $comment = array();
46
47 if ( !$filename ) {
48 throw new Exception( "No file name specified" );
49 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
50 throw new Exception( "File $filename does not exist" );
51 }
52
53 $fh = fopen( $filename, 'r' );
54
55 if ( !$fh ) {
56 throw new Exception( "Unable to open file $filename" );
57 }
58
59 // Check for the GIF header
60 $buf = fread( $fh, 6 );
61 if ( !($buf == 'GIF87a' || $buf == 'GIF89a') ) {
62 throw new Exception( "Not a valid GIF file; header: $buf" );
63 }
64
65 // Skip over width and height.
66 fread( $fh, 4 );
67
68 // Read BPP
69 $buf = fread( $fh, 1 );
70 $bpp = self::decodeBPP( $buf );
71
72 // Skip over background and aspect ratio
73 fread( $fh, 2 );
74
75 // Skip over the GCT
76 self::readGCT( $fh, $bpp );
77
78 while( !feof( $fh ) ) {
79 $buf = fread( $fh, 1 );
80
81 if ($buf == self::$gif_frame_sep) {
82 // Found a frame
83 $frameCount++;
84
85 ## Skip bounding box
86 fread( $fh, 8 );
87
88 ## Read BPP
89 $buf = fread( $fh, 1 );
90 $bpp = self::decodeBPP( $buf );
91
92 ## Read GCT
93 self::readGCT( $fh, $bpp );
94 fread( $fh, 1 );
95 self::skipBlock( $fh );
96 } elseif ( $buf == self::$gif_extension_sep ) {
97 $buf = fread( $fh, 1 );
98 $extension_code = unpack( 'C', $buf );
99 $extension_code = $extension_code[1];
100
101 if ($extension_code == 0xF9) {
102 // Graphics Control Extension.
103 fread( $fh, 1 ); // Block size
104
105 fread( $fh, 1 ); // Transparency, disposal method, user input
106
107 $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
108 $delay = unpack( 'v', $buf );
109 $delay = $delay[1];
110 $duration += $delay * 0.01;
111
112 fread( $fh, 1 ); // Transparent colour index
113
114 $term = fread( $fh, 1 ); // Should be a terminator
115 $term = unpack( 'C', $term );
116 $term = $term[1];
117 if ($term != 0 ) {
118 throw new Exception( "Malformed Graphics Control Extension block" );
119 }
120 } elseif ($extension_code == 0xFE) {
121 // Comment block(s).
122 $data = self::readBlock( $fh );
123 if ( $data === "" ) {
124 throw new Exception( 'Read error, zero-length comment block' );
125 }
126
127 // The standard says this should be ASCII, however its unclear if
128 // thats true in practise. Check to see if its valid utf-8, if so
129 // assume its that, otherwise assume its iso-8859-1
130 $dataCopy = $data;
131 // quickIsNFCVerify has the side effect of replacing any invalid characters
132 UtfNormal::quickIsNFCVerify( $dataCopy );
133
134 if ( $dataCopy !== $data ) {
135 wfSuppressWarnings();
136 $data = iconv( 'ISO-8859-1', 'UTF-8', $data );
137 wfRestoreWarnings();
138 }
139
140 $commentCount = count( $comment );
141 if ( $commentCount === 0
142 || $comment[$commentCount-1] !== $data )
143 {
144 // Some applications repeat the same comment on each
145 // frame of an animated GIF image, so if this comment
146 // is identical to the last, only extract once.
147 $comment[] = $data;
148 }
149 } elseif ($extension_code == 0xFF) {
150 // Application extension (Netscape info about the animated gif)
151 // or XMP (or theoretically any other type of extension block)
152 $blockLength = fread( $fh, 1 );
153 $blockLength = unpack( 'C', $blockLength );
154 $blockLength = $blockLength[1];
155 $data = fread( $fh, $blockLength );
156
157 if ($blockLength != 11 ) {
158 wfDebug( __METHOD__ . ' GIF application block with wrong length' );
159 fseek( $fh, -($blockLength + 1), SEEK_CUR );
160 self::skipBlock( $fh );
161 continue;
162 }
163
164 // NETSCAPE2.0 (application name for animated gif)
165 if ( $data == 'NETSCAPE2.0' ) {
166
167 $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
168
169 if ($data != "\x03\x01") {
170 throw new Exception( "Expected \x03\x01, got $data" );
171 }
172
173 // Unsigned little-endian integer, loop count or zero for "forever"
174 $loopData = fread( $fh, 2 );
175 $loopData = unpack( 'v', $loopData );
176 $loopCount = $loopData[1];
177
178 if ($loopCount != 1) {
179 $isLooped = true;
180 }
181
182 // Read out terminator byte
183 fread( $fh, 1 );
184 } elseif ( $data == 'XMP DataXMP' ) {
185 // application name for XMP data.
186 // see pg 18 of XMP spec part 3.
187
188 $xmp = self::readBlock( $fh, true );
189
190 if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
191 || substr( $xmp, -4 ) !== "\x03\x02\x01\x00" )
192 {
193 // this is just a sanity check.
194 throw new Exception( "XMP does not have magic trailer!" );
195 }
196
197 // strip out trailer.
198 $xmp = substr( $xmp, 0, -257 );
199
200 } else {
201 // unrecognized extension block
202 fseek( $fh, -($blockLength + 1), SEEK_CUR );
203 self::skipBlock( $fh );
204 continue;
205 }
206 } else {
207 self::skipBlock( $fh );
208 }
209 } elseif ( $buf == self::$gif_term ) {
210 break;
211 } else {
212 $byte = unpack( 'C', $buf );
213 $byte = $byte[1];
214 throw new Exception( "At position: ".ftell($fh). ", Unknown byte ".$byte );
215 }
216 }
217
218 return array(
219 'frameCount' => $frameCount,
220 'looped' => $isLooped,
221 'duration' => $duration,
222 'xmp' => $xmp,
223 'comment' => $comment,
224 );
225 }
226
227 /**
228 * @param $fh
229 * @param $bpp
230 * @return void
231 */
232 static function readGCT( $fh, $bpp ) {
233 if ( $bpp > 0 ) {
234 for( $i=1; $i<=pow( 2, $bpp ); ++$i ) {
235 fread( $fh, 3 );
236 }
237 }
238 }
239
240 /**
241 * @param $data
242 * @return int
243 */
244 static function decodeBPP( $data ) {
245 $buf = unpack( 'C', $data );
246 $buf = $buf[1];
247 $bpp = ( $buf & 7 ) + 1;
248 $buf >>= 7;
249
250 $have_map = $buf & 1;
251
252 return $have_map ? $bpp : 0;
253 }
254
255 /**
256 * @param $fh
257 * @return
258 */
259 static function skipBlock( $fh ) {
260 while ( !feof( $fh ) ) {
261 $buf = fread( $fh, 1 );
262 $block_len = unpack( 'C', $buf );
263 $block_len = $block_len[1];
264 if ($block_len == 0) {
265 return;
266 }
267 fread( $fh, $block_len );
268 }
269 }
270 /**
271 * Read a block. In the GIF format, a block is made up of
272 * several sub-blocks. Each sub block starts with one byte
273 * saying how long the sub-block is, followed by the sub-block.
274 * The entire block is terminated by a sub-block of length
275 * 0.
276 * @param $fh FileHandle
277 * @param $includeLengths Boolean Include the length bytes of the
278 * sub-blocks in the returned value. Normally this is false,
279 * except XMP is weird and does a hack where you need to keep
280 * these length bytes.
281 * @return The data.
282 */
283 static function readBlock( $fh, $includeLengths = false ) {
284 $data = '';
285 $subLength = fread( $fh, 1 );
286 $blocks = 0;
287
288 while( $subLength !== "\0" ) {
289 $blocks++;
290 if ( $blocks > self::MAX_SUBBLOCKS ) {
291 throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
292 }
293 if ( feof( $fh ) ) {
294 throw new Exception( "Read error: Unexpected EOF." );
295 }
296 if ( $includeLengths ) {
297 $data .= $subLength;
298 }
299
300 $data .= fread( $fh, ord( $subLength ) );
301 $subLength = fread( $fh, 1 );
302 }
303 return $data;
304 }
305
306 }