3 * PNG frame counter and metadata extractor.
5 * Slightly derived from GIFMetadataExtractor.php
6 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * http://www.gnu.org/copyleft/gpl.html
33 class PNGMetadataExtractor
{
35 private static $pngSig;
38 private static $crcSize;
41 private static $textChunks;
44 const MAX_CHUNK_SIZE
= 3145728; // 3 megabytes
46 static function getMetadata( $filename ) {
47 self
::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
49 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
50 * and https://www.w3.org/TR/PNG/#11keywords
53 'xml:com.adobe.xmp' => 'xmp',
54 # Artist is unofficial. Author is the recommended
55 # keyword in the PNG spec. However some people output
56 # Artist so support both.
61 'comment' => 'PNGFileComment',
62 'description' => 'ImageDescription',
63 'title' => 'ObjectName',
64 'copyright' => 'Copyright',
65 # Source as in original device used to make image
66 # not as in who gave you the image
68 'software' => 'Software',
69 'disclaimer' => 'Disclaimer',
70 'warning' => 'ContentWarning',
71 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
73 'creation time' => 'DateTimeDigitized',
74 /* Other potentially useful things - Document */
82 $colorType = 'unknown';
85 throw new Exception( __METHOD__
. ": No file name specified" );
86 } elseif ( !file_exists( $filename ) ||
is_dir( $filename ) ) {
87 throw new Exception( __METHOD__
. ": File $filename does not exist" );
90 $fh = fopen( $filename, 'rb' );
93 throw new Exception( __METHOD__
. ": Unable to open file $filename" );
96 // Check for the PNG header
97 $buf = fread( $fh, 8 );
98 if ( $buf != self
::$pngSig ) {
99 throw new Exception( __METHOD__
. ": Not a valid PNG file; header: $buf" );
103 while ( !feof( $fh ) ) {
104 $buf = fread( $fh, 4 );
105 if ( !$buf ||
strlen( $buf ) < 4 ) {
106 throw new Exception( __METHOD__
. ": Read error" );
108 $chunk_size = unpack( "N", $buf )[1];
110 if ( $chunk_size < 0 ) {
111 throw new Exception( __METHOD__
. ": Chunk size too big for unpack" );
114 $chunk_type = fread( $fh, 4 );
115 if ( !$chunk_type ||
strlen( $chunk_type ) < 4 ) {
116 throw new Exception( __METHOD__
. ": Read error" );
119 if ( $chunk_type == "IHDR" ) {
120 $buf = self
::read( $fh, $chunk_size );
121 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
122 throw new Exception( __METHOD__
. ": Read error" );
124 $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
125 $height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
126 $bitDepth = ord( substr( $buf, 8, 1 ) );
127 // Detect the color type in British English as per the spec
128 // https://www.w3.org/TR/PNG/#11IHDR
129 switch ( ord( substr( $buf, 9, 1 ) ) ) {
131 $colorType = 'greyscale';
134 $colorType = 'truecolour';
137 $colorType = 'index-coloured';
140 $colorType = 'greyscale-alpha';
143 $colorType = 'truecolour-alpha';
146 $colorType = 'unknown';
149 } elseif ( $chunk_type == "acTL" ) {
150 $buf = fread( $fh, $chunk_size );
151 if ( !$buf ||
strlen( $buf ) < $chunk_size ||
$chunk_size < 4 ) {
152 throw new Exception( __METHOD__
. ": Read error" );
155 $actl = unpack( "Nframes/Nplays", $buf );
156 $frameCount = $actl['frames'];
157 $loopCount = $actl['plays'];
158 } elseif ( $chunk_type == "fcTL" ) {
159 $buf = self
::read( $fh, $chunk_size );
160 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
161 throw new Exception( __METHOD__
. ": Read error" );
163 $buf = substr( $buf, 20 );
164 if ( strlen( $buf ) < 4 ) {
165 throw new Exception( __METHOD__
. ": Read error" );
168 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
169 if ( $fctldur['delay_den'] == 0 ) {
170 $fctldur['delay_den'] = 100;
172 if ( $fctldur['delay_num'] ) {
173 $duration +
= $fctldur['delay_num'] / $fctldur['delay_den'];
175 } elseif ( $chunk_type == "iTXt" ) {
176 // Extracts iTXt chunks, uncompressing if necessary.
177 $buf = self
::read( $fh, $chunk_size );
180 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
183 /* $items[1] = text chunk name, $items[2] = compressed flag,
184 * $items[3] = lang code (or ""), $items[4]= compression type.
185 * $items[5] = content
188 // Theoretically should be case-sensitive, but in practise...
189 $items[1] = strtolower( $items[1] );
190 if ( !isset( self
::$textChunks[$items[1]] ) ) {
191 // Only extract textual chunks on our list.
192 fseek( $fh, self
::$crcSize, SEEK_CUR
);
196 $items[3] = strtolower( $items[3] );
197 if ( $items[3] == '' ) {
198 // if no lang specified use x-default like in xmp.
199 $items[3] = 'x-default';
203 if ( $items[2] == "\x01" ) {
204 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
205 Wikimedia\
suppressWarnings();
206 $items[5] = gzuncompress( $items[5] );
207 Wikimedia\restoreWarnings
();
209 if ( $items[5] === false ) {
210 // decompression failed
211 wfDebug( __METHOD__
. ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
212 fseek( $fh, self
::$crcSize, SEEK_CUR
);
216 wfDebug( __METHOD__
. ' Skipping compressed png iTXt chunk due to lack of zlib,'
217 . " or potentially invalid compression method\n" );
218 fseek( $fh, self
::$crcSize, SEEK_CUR
);
222 $finalKeyword = self
::$textChunks[$items[1]];
223 $text[$finalKeyword][$items[3]] = $items[5];
224 $text[$finalKeyword]['_type'] = 'lang';
226 // Error reading iTXt chunk
227 throw new Exception( __METHOD__
. ": Read error on iTXt chunk" );
229 } elseif ( $chunk_type == 'tEXt' ) {
230 $buf = self
::read( $fh, $chunk_size );
232 // In case there is no \x00 which will make explode fail.
233 if ( strpos( $buf, "\x00" ) === false ) {
234 throw new Exception( __METHOD__
. ": Read error on tEXt chunk" );
237 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
238 if ( $keyword === '' ||
$content === '' ) {
239 throw new Exception( __METHOD__
. ": Read error on tEXt chunk" );
242 // Theoretically should be case-sensitive, but in practise...
243 $keyword = strtolower( $keyword );
244 if ( !isset( self
::$textChunks[$keyword] ) ) {
245 // Don't recognize chunk, so skip.
246 fseek( $fh, self
::$crcSize, SEEK_CUR
);
249 Wikimedia\
suppressWarnings();
250 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
251 Wikimedia\restoreWarnings
();
253 if ( $content === false ) {
254 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
257 $finalKeyword = self
::$textChunks[$keyword];
258 $text[$finalKeyword]['x-default'] = $content;
259 $text[$finalKeyword]['_type'] = 'lang';
260 } elseif ( $chunk_type == 'zTXt' ) {
261 if ( function_exists( 'gzuncompress' ) ) {
262 $buf = self
::read( $fh, $chunk_size );
264 // In case there is no \x00 which will make explode fail.
265 if ( strpos( $buf, "\x00" ) === false ) {
266 throw new Exception( __METHOD__
. ": Read error on zTXt chunk" );
269 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
270 if ( $keyword === '' ||
$postKeyword === '' ) {
271 throw new Exception( __METHOD__
. ": Read error on zTXt chunk" );
273 // Theoretically should be case-sensitive, but in practise...
274 $keyword = strtolower( $keyword );
276 if ( !isset( self
::$textChunks[$keyword] ) ) {
277 // Don't recognize chunk, so skip.
278 fseek( $fh, self
::$crcSize, SEEK_CUR
);
281 $compression = substr( $postKeyword, 0, 1 );
282 $content = substr( $postKeyword, 1 );
283 if ( $compression !== "\x00" ) {
284 wfDebug( __METHOD__
. " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
285 fseek( $fh, self
::$crcSize, SEEK_CUR
);
289 Wikimedia\
suppressWarnings();
290 $content = gzuncompress( $content );
291 Wikimedia\restoreWarnings
();
293 if ( $content === false ) {
294 // decompression failed
295 wfDebug( __METHOD__
. ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
296 fseek( $fh, self
::$crcSize, SEEK_CUR
);
300 Wikimedia\
suppressWarnings();
301 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
302 Wikimedia\restoreWarnings
();
304 if ( $content === false ) {
305 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
308 $finalKeyword = self
::$textChunks[$keyword];
309 $text[$finalKeyword]['x-default'] = $content;
310 $text[$finalKeyword]['_type'] = 'lang';
312 wfDebug( __METHOD__
. " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
313 fseek( $fh, $chunk_size, SEEK_CUR
);
315 } elseif ( $chunk_type == 'tIME' ) {
316 // last mod timestamp.
317 if ( $chunk_size !== 7 ) {
318 throw new Exception( __METHOD__
. ": tIME wrong size" );
320 $buf = self
::read( $fh, $chunk_size );
321 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
322 throw new Exception( __METHOD__
. ": Read error" );
325 // Note: spec says this should be UTC.
326 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
327 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
328 $t['y'], $t['m'], $t['d'], $t['h'],
329 $t['min'], $t['s'] );
331 $exifTime = wfTimestamp( TS_EXIF
, $strTime );
334 $text['DateTime'] = $exifTime;
336 } elseif ( $chunk_type == 'pHYs' ) {
337 // how big pixels are (dots per meter).
338 if ( $chunk_size !== 9 ) {
339 throw new Exception( __METHOD__
. ": pHYs wrong size" );
342 $buf = self
::read( $fh, $chunk_size );
343 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
344 throw new Exception( __METHOD__
. ": Read error" );
347 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
348 if ( $dim['unit'] == 1 ) {
349 // Need to check for negative because php
350 // doesn't deal with super-large unsigned 32-bit ints well
351 if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
353 // (as opposed to 0 = undefined )
354 $text['XResolution'] = $dim['width']
356 $text['YResolution'] = $dim['height']
358 $text['ResolutionUnit'] = 3;
359 // 3 = dots per cm (from Exif).
362 } elseif ( $chunk_type == "IEND" ) {
365 fseek( $fh, $chunk_size, SEEK_CUR
);
367 fseek( $fh, self
::$crcSize, SEEK_CUR
);
371 if ( $loopCount > 1 ) {
372 $duration *= $loopCount;
375 if ( isset( $text['DateTimeDigitized'] ) ) {
376 // Convert date format from rfc2822 to exif.
377 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
378 if ( $name === '_type' ) {
382 // @todo FIXME: Currently timezones are ignored.
383 // possibly should be wfTimestamp's
384 // responsibility. (at least for numeric TZ)
385 $formatted = wfTimestamp( TS_EXIF
, $value );
387 // Only change if we could convert the
389 // The png standard says it should be
390 // in rfc2822 format, but not required.
391 // In general for the exif stuff we
392 // prettify the date if we can, but we
393 // display as-is if we cannot or if
395 // So do the same here.
403 'frameCount' => $frameCount,
404 'loopCount' => $loopCount,
405 'duration' => $duration,
407 'bitDepth' => $bitDepth,
408 'colorType' => $colorType,
413 * Read a chunk, checking to make sure its not too big.
415 * @param resource $fh The file handle
416 * @param int $size Size in bytes.
417 * @throws Exception If too big
418 * @return string The chunk.
420 private static function read( $fh, $size ) {
421 if ( $size > self
::MAX_CHUNK_SIZE
) {
422 throw new Exception( __METHOD__
. ': Chunk size of ' . $size .
423 ' too big. Max size is: ' . self
::MAX_CHUNK_SIZE
);
426 return fread( $fh, $size );