3 * PNG frame counter and metadata extractor.
5 * Slightly derived from GIFMetadataExtractor.php
6 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * http://www.gnu.org/copyleft/gpl.html
33 class PNGMetadataExtractor
{
41 const MAX_CHUNK_SIZE
= 3145728; // 3 megabytes
43 static function getMetadata( $filename ) {
44 self
::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
46 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
47 * and http://www.w3.org/TR/PNG/#11keywords
49 self
::$text_chunks = array(
50 'xml:com.adobe.xmp' => 'xmp',
51 # Artist is unofficial. Author is the recommended
52 # keyword in the PNG spec. However some people output
53 # Artist so support both.
58 'comment' => 'PNGFileComment',
59 'description' => 'ImageDescription',
60 'title' => 'ObjectName',
61 'copyright' => 'Copyright',
62 # Source as in original device used to make image
63 # not as in who gave you the image
65 'software' => 'Software',
66 'disclaimer' => 'Disclaimer',
67 'warning' => 'ContentWarning',
68 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
70 'creation time' => 'DateTimeDigitized',
71 /* Other potentially useful things - Document */
79 $colorType = 'unknown';
82 throw new Exception( __METHOD__
. ": No file name specified" );
83 } elseif ( !file_exists( $filename ) ||
is_dir( $filename ) ) {
84 throw new Exception( __METHOD__
. ": File $filename does not exist" );
87 $fh = fopen( $filename, 'rb' );
90 throw new Exception( __METHOD__
. ": Unable to open file $filename" );
93 // Check for the PNG header
94 $buf = fread( $fh, 8 );
95 if ( $buf != self
::$png_sig ) {
96 throw new Exception( __METHOD__
. ": Not a valid PNG file; header: $buf" );
100 while ( !feof( $fh ) ) {
101 $buf = fread( $fh, 4 );
102 if ( !$buf ||
strlen( $buf ) < 4 ) {
103 throw new Exception( __METHOD__
. ": Read error" );
105 $chunk_size = unpack( "N", $buf );
106 $chunk_size = $chunk_size[1];
108 if ( $chunk_size < 0 ) {
109 throw new Exception( __METHOD__
. ": Chunk size too big for unpack" );
112 $chunk_type = fread( $fh, 4 );
113 if ( !$chunk_type ||
strlen( $chunk_type ) < 4 ) {
114 throw new Exception( __METHOD__
. ": Read error" );
117 if ( $chunk_type == "IHDR" ) {
118 $buf = self
::read( $fh, $chunk_size );
119 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
120 throw new Exception( __METHOD__
. ": Read error" );
122 $bitDepth = ord( substr( $buf, 8, 1 ) );
123 // Detect the color type in British English as per the spec
124 // http://www.w3.org/TR/PNG/#11IHDR
125 switch ( ord( substr( $buf, 9, 1 ) ) ) {
127 $colorType = 'greyscale';
130 $colorType = 'truecolour';
133 $colorType = 'index-coloured';
136 $colorType = 'greyscale-alpha';
139 $colorType = 'truecolour-alpha';
142 $colorType = 'unknown';
145 } elseif ( $chunk_type == "acTL" ) {
146 $buf = fread( $fh, $chunk_size );
147 if ( !$buf ||
strlen( $buf ) < $chunk_size ||
$chunk_size < 4 ) {
148 throw new Exception( __METHOD__
. ": Read error" );
151 $actl = unpack( "Nframes/Nplays", $buf );
152 $frameCount = $actl['frames'];
153 $loopCount = $actl['plays'];
154 } elseif ( $chunk_type == "fcTL" ) {
155 $buf = self
::read( $fh, $chunk_size );
156 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
157 throw new Exception( __METHOD__
. ": Read error" );
159 $buf = substr( $buf, 20 );
160 if ( strlen( $buf ) < 4 ) {
161 throw new Exception( __METHOD__
. ": Read error" );
164 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
165 if ( $fctldur['delay_den'] == 0 ) {
166 $fctldur['delay_den'] = 100;
168 if ( $fctldur['delay_num'] ) {
169 $duration +
= $fctldur['delay_num'] / $fctldur['delay_den'];
171 } elseif ( $chunk_type == "iTXt" ) {
172 // Extracts iTXt chunks, uncompressing if necessary.
173 $buf = self
::read( $fh, $chunk_size );
176 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
179 /* $items[1] = text chunk name, $items[2] = compressed flag,
180 * $items[3] = lang code (or ""), $items[4]= compression type.
181 * $items[5] = content
184 // Theoretically should be case-sensitive, but in practise...
185 $items[1] = strtolower( $items[1] );
186 if ( !isset( self
::$text_chunks[$items[1]] ) ) {
187 // Only extract textual chunks on our list.
188 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
192 $items[3] = strtolower( $items[3] );
193 if ( $items[3] == '' ) {
194 // if no lang specified use x-default like in xmp.
195 $items[3] = 'x-default';
199 if ( $items[2] == "\x01" ) {
200 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
201 wfSuppressWarnings();
202 $items[5] = gzuncompress( $items[5] );
205 if ( $items[5] === false ) {
206 // decompression failed
207 wfDebug( __METHOD__
. ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
208 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
212 wfDebug( __METHOD__
. ' Skipping compressed png iTXt chunk due to lack of zlib,'
213 . " or potentially invalid compression method\n" );
214 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
218 $finalKeyword = self
::$text_chunks[$items[1]];
219 $text[$finalKeyword][$items[3]] = $items[5];
220 $text[$finalKeyword]['_type'] = 'lang';
222 // Error reading iTXt chunk
223 throw new Exception( __METHOD__
. ": Read error on iTXt chunk" );
225 } elseif ( $chunk_type == 'tEXt' ) {
226 $buf = self
::read( $fh, $chunk_size );
228 // In case there is no \x00 which will make explode fail.
229 if ( strpos( $buf, "\x00" ) === false ) {
230 throw new Exception( __METHOD__
. ": Read error on tEXt chunk" );
233 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
234 if ( $keyword === '' ||
$content === '' ) {
235 throw new Exception( __METHOD__
. ": Read error on tEXt chunk" );
238 // Theoretically should be case-sensitive, but in practise...
239 $keyword = strtolower( $keyword );
240 if ( !isset( self
::$text_chunks[$keyword] ) ) {
241 // Don't recognize chunk, so skip.
242 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
245 wfSuppressWarnings();
246 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
249 if ( $content === false ) {
250 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
253 $finalKeyword = self
::$text_chunks[$keyword];
254 $text[$finalKeyword]['x-default'] = $content;
255 $text[$finalKeyword]['_type'] = 'lang';
256 } elseif ( $chunk_type == 'zTXt' ) {
257 if ( function_exists( 'gzuncompress' ) ) {
258 $buf = self
::read( $fh, $chunk_size );
260 // In case there is no \x00 which will make explode fail.
261 if ( strpos( $buf, "\x00" ) === false ) {
262 throw new Exception( __METHOD__
. ": Read error on zTXt chunk" );
265 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
266 if ( $keyword === '' ||
$postKeyword === '' ) {
267 throw new Exception( __METHOD__
. ": Read error on zTXt chunk" );
269 // Theoretically should be case-sensitive, but in practise...
270 $keyword = strtolower( $keyword );
272 if ( !isset( self
::$text_chunks[$keyword] ) ) {
273 // Don't recognize chunk, so skip.
274 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
277 $compression = substr( $postKeyword, 0, 1 );
278 $content = substr( $postKeyword, 1 );
279 if ( $compression !== "\x00" ) {
280 wfDebug( __METHOD__
. " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
281 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
285 wfSuppressWarnings();
286 $content = gzuncompress( $content );
289 if ( $content === false ) {
290 // decompression failed
291 wfDebug( __METHOD__
. ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
292 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
296 wfSuppressWarnings();
297 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
300 if ( $content === false ) {
301 throw new Exception( __METHOD__
. ": Read error (error with iconv)" );
304 $finalKeyword = self
::$text_chunks[$keyword];
305 $text[$finalKeyword]['x-default'] = $content;
306 $text[$finalKeyword]['_type'] = 'lang';
308 wfDebug( __METHOD__
. " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
309 fseek( $fh, $chunk_size, SEEK_CUR
);
311 } elseif ( $chunk_type == 'tIME' ) {
312 // last mod timestamp.
313 if ( $chunk_size !== 7 ) {
314 throw new Exception( __METHOD__
. ": tIME wrong size" );
316 $buf = self
::read( $fh, $chunk_size );
317 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
318 throw new Exception( __METHOD__
. ": Read error" );
321 // Note: spec says this should be UTC.
322 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
323 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
324 $t['y'], $t['m'], $t['d'], $t['h'],
325 $t['min'], $t['s'] );
327 $exifTime = wfTimestamp( TS_EXIF
, $strTime );
330 $text['DateTime'] = $exifTime;
332 } elseif ( $chunk_type == 'pHYs' ) {
333 // how big pixels are (dots per meter).
334 if ( $chunk_size !== 9 ) {
335 throw new Exception( __METHOD__
. ": pHYs wrong size" );
338 $buf = self
::read( $fh, $chunk_size );
339 if ( !$buf ||
strlen( $buf ) < $chunk_size ) {
340 throw new Exception( __METHOD__
. ": Read error" );
343 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
344 if ( $dim['unit'] == 1 ) {
345 // Need to check for negative because php
346 // doesn't deal with super-large unsigned 32-bit ints well
347 if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
349 // (as opposed to 0 = undefined )
350 $text['XResolution'] = $dim['width']
352 $text['YResolution'] = $dim['height']
354 $text['ResolutionUnit'] = 3;
355 // 3 = dots per cm (from Exif).
358 } elseif ( $chunk_type == "IEND" ) {
361 fseek( $fh, $chunk_size, SEEK_CUR
);
363 fseek( $fh, self
::$CRC_size, SEEK_CUR
);
367 if ( $loopCount > 1 ) {
368 $duration *= $loopCount;
371 if ( isset( $text['DateTimeDigitized'] ) ) {
372 // Convert date format from rfc2822 to exif.
373 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
374 if ( $name === '_type' ) {
378 // @todo FIXME: Currently timezones are ignored.
379 // possibly should be wfTimestamp's
380 // responsibility. (at least for numeric TZ)
381 $formatted = wfTimestamp( TS_EXIF
, $value );
383 // Only change if we could convert the
385 // The png standard says it should be
386 // in rfc2822 format, but not required.
387 // In general for the exif stuff we
388 // prettify the date if we can, but we
389 // display as-is if we cannot or if
391 // So do the same here.
399 'frameCount' => $frameCount,
400 'loopCount' => $loopCount,
401 'duration' => $duration,
403 'bitDepth' => $bitDepth,
404 'colorType' => $colorType,
409 * Read a chunk, checking to make sure its not too big.
411 * @param $fh resource The file handle
412 * @param $size Integer size in bytes.
413 * @throws Exception if too big.
414 * @return String The chunk.
416 private static function read( $fh, $size ) {
417 if ( $size > self
::MAX_CHUNK_SIZE
) {
418 throw new Exception( __METHOD__
. ': Chunk size of ' . $size .
419 ' too big. Max size is: ' . self
::MAX_CHUNK_SIZE
);
422 return fread( $fh, $size );