3 * PNG frame counter and metadata extractor.
4 * Slightly derived from GIFMetadataExtractor.php
5 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
17 class PNGMetadataExtractor
{
23 const MAX_CHUNK_SIZE
= 3145728; // 3 megabytes
25 static function getMetadata( $filename ) {
26 self
::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
28 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
29 * and http://www.w3.org/TR/PNG/#11keywords
31 self
::$text_chunks = array(
32 'xml:com.adobe.xmp' => 'xmp',
33 # Artist is unofficial. Author is the recommended
34 # keyword in the PNG spec. However some people output
35 # Artist so support both.
40 'comment' => 'PNGFileComment',
41 'description' => 'ImageDescription',
42 'title' => 'ObjectName',
43 'copyright' => 'Copyright',
44 # Source as in original device used to make image
45 # not as in who gave you the image
47 'software' => 'Software',
48 'disclaimer' => 'Disclaimer',
49 'warning' => 'ContentWarning',
50 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
52 'creation time' => 'DateTimeDigitized',
53 /* Other potentially useful things - Document */
61 $colorType = 'unknown';
64 throw new Exception( __METHOD__
. ": No file name specified" );
65 } elseif ( !file_exists( $filename ) ||
is_dir( $filename ) ) {
66 throw new Exception( __METHOD__
. ": File $filename does not exist" );
69 $fh = fopen( $filename, 'r' );
72 throw new Exception( __METHOD__
. ": Unable to open file $filename" );
75 // Check for the PNG header
76 $buf = fread( $fh, 8 );
77 if ( $buf != self
::$png_sig ) {
78 throw new Exception( __METHOD__
. ": Not a valid PNG file; header: $buf" );
82 while ( !feof( $fh ) ) {
83 $buf = fread( $fh, 4 );
85 throw new Exception( __METHOD__
. ": Read error" );
87 $chunk_size = unpack( "N", $buf );
88 $chunk_size = $chunk_size[1];
90 $chunk_type = fread( $fh, 4 );
92 throw new Exception( __METHOD__
. ": Read error" );
95 if ( $chunk_type == "IHDR" ) {
96 $buf = self
::read( $fh, $chunk_size );
98 throw new Exception( __METHOD__
. ": Read error" );
100 $bitDepth = ord( substr( $buf, 8, 1 ) );
101 // Detect the color type in British English as per the spec
102 // http://www.w3.org/TR/PNG/#11IHDR
103 switch ( ord( substr( $buf, 9, 1 ) ) ) {
105 $colorType = 'greyscale';
108 $colorType = 'truecolour';
111 $colorType = 'index-coloured';
114 $colorType = 'greyscale-alpha';
117 $colorType = 'truecolour-alpha';
120 $colorType = 'unknown';
123 } elseif ( $chunk_type == "acTL" ) {
124 $buf = fread( $fh, $chunk_size );
126 throw new Exception( __METHOD__
. ": Read error" );
129 $actl = unpack( "Nframes/Nplays", $buf );
130 $frameCount = $actl['frames'];
131 $loopCount = $actl['plays'];
132 } elseif ( $chunk_type == "fcTL" ) {
133 $buf = self
::read( $fh, $chunk_size );
135 throw new Exception( __METHOD__
. ": Read error" );
137 $buf = substr( $buf, 20 );
139 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
140 if ( $fctldur['delay_den'] == 0 ) {
141 $fctldur['delay_den'] = 100;
143 if ( $fctldur['delay_num'] ) {
144 $duration +
= $fctldur['delay_num'] / $fctldur['delay_den'];
146 } elseif ( $chunk_type == "iTXt" ) {
147 // Extracts iTXt chunks, uncompressing if necessary.
148 $buf = self
::read( $fh, $chunk_size );
151 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
154 /* $items[1] = text chunk name, $items[2] = compressed flag,
155 * $items[3] = lang code (or ""), $items[4]= compression type.
156 * $items[5] = content
159 // Theoretically should be case-sensitive, but in practise...
160 $items[1] = strtolower( $items[1] );
161 if ( !isset( self
::$text_chunks[$items[1]] ) ) {
162 // Only extract textual chunks on our list.
163 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
167 $items[3] = strtolower( $items[3] );
168 if ( $items[3] == '' ) {
169 // if no lang specified use x-default like in xmp.
170 $items[3] = 'x-default';
174 if ( $items[2] == "\x01" ) {
175 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
176 wfSuppressWarnings();
177 $items[5] = gzuncompress( $items[5] );
180 if ( $items[5] === false ) {
181 // decompression failed
182 wfDebug( __METHOD__
. ' Error decompressing iTxt chunk - ' . $items[1] );
183 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
188 wfDebug( __METHOD__
. ' Skipping compressed png iTXt chunk due to lack of zlib,'
189 . ' or potentially invalid compression method' );
190 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
194 $finalKeyword = self
::$text_chunks[ $items[1] ];
195 $text[ $finalKeyword ][ $items[3] ] = $items[5];
196 $text[ $finalKeyword ]['_type'] = 'lang';
199 // Error reading iTXt chunk
200 throw new Exception( __METHOD__
. ": Read error on iTXt chunk" );
203 } elseif ( $chunk_type == 'tEXt' ) {
204 $buf = self
::read( $fh, $chunk_size );
206 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
207 if ( $keyword === '' ||
$content === '' ) {
208 throw new Exception( __METHOD__
. ": Read error on tEXt chunk" );
211 // Theoretically should be case-sensitive, but in practise...
212 $keyword = strtolower( $keyword );
213 if ( !isset( self
::$text_chunks[ $keyword ] ) ) {
214 // Don't recognize chunk, so skip.
215 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
218 wfSuppressWarnings();
219 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
222 if ( $content === false ) {
223 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
226 $finalKeyword = self
::$text_chunks[ $keyword ];
227 $text[ $finalKeyword ][ 'x-default' ] = $content;
228 $text[ $finalKeyword ]['_type'] = 'lang';
230 } elseif ( $chunk_type == 'zTXt' ) {
231 if ( function_exists( 'gzuncompress' ) ) {
232 $buf = self
::read( $fh, $chunk_size );
234 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
235 if ( $keyword === '' ||
$postKeyword === '' ) {
236 throw new Exception( __METHOD__
. ": Read error on zTXt chunk" );
238 // Theoretically should be case-sensitive, but in practise...
239 $keyword = strtolower( $keyword );
241 if ( !isset( self
::$text_chunks[ $keyword ] ) ) {
242 // Don't recognize chunk, so skip.
243 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
246 $compression = substr( $postKeyword, 0, 1 );
247 $content = substr( $postKeyword, 1 );
248 if ( $compression !== "\x00" ) {
249 wfDebug( __METHOD__
. " Unrecognized compression method in zTXt ($keyword). Skipping." );
250 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
254 wfSuppressWarnings();
255 $content = gzuncompress( $content );
258 if ( $content === false ) {
259 // decompression failed
260 wfDebug( __METHOD__
. ' Error decompressing zTXt chunk - ' . $keyword );
261 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
265 wfSuppressWarnings();
266 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
269 if ( $content === false ) {
270 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
273 $finalKeyword = self
::$text_chunks[ $keyword ];
274 $text[ $finalKeyword ][ 'x-default' ] = $content;
275 $text[ $finalKeyword ]['_type'] = 'lang';
278 wfDebug( __METHOD__
. " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
279 fseek( $fh, $chunk_size, SEEK_CUR
);
281 } elseif ( $chunk_type == 'tIME' ) {
282 // last mod timestamp.
283 if ( $chunk_size !== 7 ) {
284 throw new Exception( __METHOD__
. ": tIME wrong size" );
286 $buf = self
::read( $fh, $chunk_size );
288 throw new Exception( __METHOD__
. ": Read error" );
291 // Note: spec says this should be UTC.
292 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
293 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
294 $t['y'], $t['m'], $t['d'], $t['h'],
295 $t['min'], $t['s'] );
297 $exifTime = wfTimestamp( TS_EXIF
, $strTime );
300 $text['DateTime'] = $exifTime;
303 } elseif ( $chunk_type == 'pHYs' ) {
304 // how big pixels are (dots per meter).
305 if ( $chunk_size !== 9 ) {
306 throw new Exception( __METHOD__
. ": pHYs wrong size" );
309 $buf = self
::read( $fh, $chunk_size );
311 throw new Exception( __METHOD__
. ": Read error" );
314 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
315 if ( $dim['unit'] == 1 ) {
317 // (as opposed to 0 = undefined )
318 $text['XResolution'] = $dim['width']
320 $text['YResolution'] = $dim['height']
322 $text['ResolutionUnit'] = 3;
323 // 3 = dots per cm (from Exif).
326 } elseif ( $chunk_type == "IEND" ) {
329 fseek( $fh, $chunk_size, SEEK_CUR
);
331 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
335 if ( $loopCount > 1 ) {
336 $duration *= $loopCount;
339 if ( isset( $text['DateTimeDigitized'] ) ) {
340 // Convert date format from rfc2822 to exif.
341 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
342 if ( $name === '_type' ) {
346 // fixme: currently timezones are ignored.
347 // possibly should be wfTimestamp's
348 // responsibility. (at least for numeric TZ)
349 $formatted = wfTimestamp( TS_EXIF
, $value );
351 // Only change if we could convert the
353 // The png standard says it should be
354 // in rfc2822 format, but not required.
355 // In general for the exif stuff we
356 // prettify the date if we can, but we
357 // display as-is if we cannot or if
359 // So do the same here.
366 'frameCount' => $frameCount,
367 'loopCount' => $loopCount,
368 'duration' => $duration,
370 'bitDepth' => $bitDepth,
371 'colorType' => $colorType,
376 * Read a chunk, checking to make sure its not too big.
378 * @param $fh resource The file handle
379 * @param $size Integer size in bytes.
380 * @throws Exception if too big.
381 * @return String The chunk.
383 static private function read( $fh, $size ) {
384 if ( $size > self
::MAX_CHUNK_SIZE
) {
385 throw new Exception( __METHOD__
. ': Chunk size of ' . $size .
386 ' too big. Max size is: ' . self
::MAX_CHUNK_SIZE
);
388 return fread( $fh, $size );