From b995e08cdf00d3b2f09585dd1424da6f317b857b Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Sun, 21 Aug 2011 17:16:57 +0000 Subject: [PATCH] follow-up r86169 - 2 minor issues found while writing unit tests # Some really obscure Exif properties did not have the Exif byte order taken into account and were being extracted with the bytes reversed (for example user comment when encoded as utf-16). Not a major issue as these properties are very rare in practise, but certainly not a good thing. ( One would think php's exif support would take care of that, but no it does not...) # Change the fallback encoding for Gif comments to be windows-1252 instead of iso 8859-1. More to be consitent with jpg and iptc then anything else. --- includes/media/BitmapMetadataHandler.php | 70 ++++++++++++++++++++++-- includes/media/Exif.php | 31 +++++++++-- includes/media/GIFMetadataExtractor.php | 4 +- includes/media/JpegMetadataExtractor.php | 26 +++++++-- includes/media/Tiff.php | 21 ++++--- 5 files changed, 127 insertions(+), 25 deletions(-) diff --git a/includes/media/BitmapMetadataHandler.php b/includes/media/BitmapMetadataHandler.php index 13188ecf48..d1caa67a38 100644 --- a/includes/media/BitmapMetadataHandler.php +++ b/includes/media/BitmapMetadataHandler.php @@ -40,16 +40,16 @@ class BitmapMetadataHandler { /** - * - * get exif info using exif class. + * Get exif info using exif class. * Basically what used to be in BitmapHandler::getMetadata(). * Just calls stuff in the Exif class. * * @param $filename string */ - function getExif ( $filename ) { - if ( file_exists( $filename ) ) { - $exif = new Exif( $filename ); + function getExif ( $filename, $byteOrder ) { + global $wgShowEXIF; + if ( file_exists( $filename ) && $wgShowEXIF ) { + $exif = new Exif( $filename, $byteOrder ); $data = $exif->getFilteredData(); if ( $data ) { $this->addMetadata( $data, 'exif' ); @@ -117,7 +117,6 @@ class BitmapMetadataHandler { static function Jpeg ( $filename ) { $showXMP = function_exists( 'xml_parser_create_ns' ); $meta = new self(); - $meta->getExif( $filename ); $seg = JpegMetadataExtractor::segmentSplitter( $filename ); if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) { @@ -141,6 +140,9 @@ class BitmapMetadataHandler { $meta->addMetadata( $array, $type ); } } + if ( isset( $seg['byteOrder'] ) ) { + $meta->getExif( $filename, $seg['byteOrder'] ); + } return $meta->getMetadataArray(); } @@ -208,4 +210,60 @@ class BitmapMetadataHandler { return $baseArray; } + /** + * This doesn't do much yet, but eventually I plan to add + * XMP support for Tiff. (PHP's exif support already extracts + * but needs some further processing because PHP's exif support + * is stupid...) + * + * @todo Add XMP support, so this function actually makes + * sense to put here. + * + * The various exceptions this throws are caught later. + * @param $filename String + * @return Array The metadata. + */ + static public function Tiff ( $filename ) { + if ( file_exists( $filename ) ) { + $byteOrder = self::getTiffByteOrder( $filename ); + if ( !$byteOrder ) { + throw new MWException( "Error determining byte order of $filename" ); + } + $exif = new Exif( $filename, $byteOrder ); + $data = $exif->getFilteredData(); + if ( $data ) { + $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); + return $data; + } else { + throw new MWException( "Could not extract data from tiff file $filename" ); + } + } else { + throw new MWException( "File doesn't exist - $filename" ); + } + } + /** + * Read the first 2 bytes of a tiff file to figure out + * Little Endian or Big Endian. Needed for exif stuff. + * + * @param $filename String The filename + * @return String 'BE' or 'LE' or false + */ + static function getTiffByteOrder( $filename ) { + $fh = fopen( $filename, 'rb' ); + if ( !$fh ) return false; + $head = fread( $fh, 2 ); + fclose( $fh ); + + switch( $head ) { + case 'II': + return 'LE'; // II for intel. + case 'MM': + return 'BE'; // MM for motorla. + default: + return false; // Something went wrong. + + } + } + + } diff --git a/includes/media/Exif.php b/includes/media/Exif.php index 2836d31bcc..345a6f19e4 100644 --- a/includes/media/Exif.php +++ b/includes/media/Exif.php @@ -90,6 +90,11 @@ class Exif { */ var $log = false; + /** + * The byte order of the file. Needed because php's + * extension doesn't fully process some obscure props. + */ + private $byteOrder; //@} /** @@ -102,7 +107,7 @@ class Exif { * DigitalZoomRatio = 0/0 is rejected. need to determine if that's valid. * possibly should treat 0/0 = 0. need to read exif spec on that. */ - function __construct( $file ) { + function __construct( $file, $byteOrder = '' ) { /** * Page numbers here refer to pages in the EXIF 2.2 standard * @@ -275,6 +280,16 @@ class Exif { $this->file = $file; $this->basename = wfBaseName( $this->file ); + if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) { + $this->byteOrder = $byteOrder; + } else { + // Only give a warning for b/c, since originally we didn't + // require this. The number of things affected by this is + // rather small. + wfWarn( 'Exif class did not have byte order specified. ' + . 'Some properties may be decoded incorrectly.' ); + $this->byteOrder = 'BE'; // BE seems about twice as popular as LE in jpg's. + } $this->debugFile( $this->basename, __FUNCTION__, true ); if( function_exists( 'exif_read_data' ) ) { @@ -394,7 +409,16 @@ class Exif { } $newVal .= ord( substr($val, $i, 1) ); } - $this->mFilteredExifData['GPSVersionID'] = $newVal; + if ( $this->byteOrder === 'LE' ) { + // Need to reverse the string + $newVal2 = ''; + for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- ) { + $newVal2 .= substr( $newVal, $i, 1 ); + } + $this->mFilteredExifData['GPSVersionID'] = $newVal2; + } else { + $this->mFilteredExifData['GPSVersionID'] = $newVal; + } unset( $this->mFilteredExifData['GPSVersion'] ); } @@ -415,7 +439,6 @@ class Exif { unset($this->mFilteredExifData[$prop]); return; } - $charCode = substr( $this->mFilteredExifData[$prop], 0, 8); $val = substr( $this->mFilteredExifData[$prop], 8); @@ -426,7 +449,7 @@ class Exif { $charset = "Shift-JIS"; break; case "UNICODE\x00": - $charset = "UTF-16"; + $charset = "UTF-16" . $this->byteOrder; break; default: //ascii or undefined. $charset = ""; diff --git a/includes/media/GIFMetadataExtractor.php b/includes/media/GIFMetadataExtractor.php index a3d3ced39a..02a9c60dfc 100644 --- a/includes/media/GIFMetadataExtractor.php +++ b/includes/media/GIFMetadataExtractor.php @@ -126,14 +126,14 @@ class GIFMetadataExtractor { // The standard says this should be ASCII, however its unclear if // thats true in practise. Check to see if its valid utf-8, if so - // assume its that, otherwise assume its iso-8859-1 + // assume its that, otherwise assume its windows-1252 (iso-8859-1) $dataCopy = $data; // quickIsNFCVerify has the side effect of replacing any invalid characters UtfNormal::quickIsNFCVerify( $dataCopy ); if ( $dataCopy !== $data ) { wfSuppressWarnings(); - $data = iconv( 'ISO-8859-1', 'UTF-8', $data ); + $data = iconv( 'windows-1252', 'UTF-8', $data ); wfRestoreWarnings(); } diff --git a/includes/media/JpegMetadataExtractor.php b/includes/media/JpegMetadataExtractor.php index 984bbe09dd..51b0ffa9d3 100644 --- a/includes/media/JpegMetadataExtractor.php +++ b/includes/media/JpegMetadataExtractor.php @@ -28,7 +28,10 @@ class JpegMetadataExtractor { $segmentCount = 0; - $segments = array( 'XMP_ext' => array(), 'COM' => array() ); + $segments = array( + 'XMP_ext' => array(), + 'COM' => array(), + ); if ( !$filename ) { throw new MWException( "No filename specified for " . __METHOD__ ); @@ -82,23 +85,34 @@ class JpegMetadataExtractor { wfDebug( __METHOD__ . ' Ignoring JPEG comment as is garbage.' ); } - } elseif ( $buffer === "\xE1" && $showXMP ) { + } elseif ( $buffer === "\xE1" ) { // APP1 section (Exif, XMP, and XMP extended) // only extract if XMP is enabled. $temp = self::jpegExtractMarker( $fh ); - // check what type of app segment this is. - if ( substr( $temp, 0, 29 ) === "http://ns.adobe.com/xap/1.0/\x00" ) { + if ( substr( $temp, 0, 29 ) === "http://ns.adobe.com/xap/1.0/\x00" && $showXMP ) { $segments["XMP"] = substr( $temp, 29 ); - } elseif ( substr( $temp, 0, 35 ) === "http://ns.adobe.com/xmp/extension/\x00" ) { + } elseif ( substr( $temp, 0, 35 ) === "http://ns.adobe.com/xmp/extension/\x00" && $showXMP ) { $segments["XMP_ext"][] = substr( $temp, 35 ); - } elseif ( substr( $temp, 0, 29 ) === "XMP\x00://ns.adobe.com/xap/1.0/\x00" ) { + } elseif ( substr( $temp, 0, 29 ) === "XMP\x00://ns.adobe.com/xap/1.0/\x00" && $showXMP ) { // Some images (especially flickr images) seem to have this. // I really have no idea what the deal is with them, but // whatever... $segments["XMP"] = substr( $temp, 29 ); wfDebug( __METHOD__ . ' Found XMP section with wrong app identifier ' . "Using anyways.\n" ); + } elseif ( substr( $temp, 0, 6 ) === "Exif\0\0" ) { + // Just need to find out what the byte order is. + // because php's exif plugin sucks... + // This is a II for little Endian, MM for big. Not a unicode BOM. + $byteOrderMarker = substr( $temp, 6, 2 ); + if ( $byteOrderMarker === 'MM' ) { + $segments['byteOrder'] = 'BE'; + } elseif ( $byteOrderMarker === 'II' ) { + $segments['byteOrder'] = 'LE'; + } else { + wfDebug( __METHOD__ . ' Invalid byte ordering?!' ); + } } } elseif ( $buffer === "\xED" ) { // APP13 - PSIR. IPTC and some photoshop stuff diff --git a/includes/media/Tiff.php b/includes/media/Tiff.php index 2674267424..0a103895f5 100644 --- a/includes/media/Tiff.php +++ b/includes/media/Tiff.php @@ -56,13 +56,20 @@ class TiffHandler extends ExifBitmapHandler { */ function getMetadata( $image, $filename ) { global $wgShowEXIF; - if ( $wgShowEXIF && file_exists( $filename ) ) { - $exif = new Exif( $filename ); - $data = $exif->getFilteredData(); - if ( $data ) { - $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); - return serialize( $data ); - } else { + if ( $wgShowEXIF ) { + try { + $meta = BitmapMetadataHandler::Tiff( $filename ); + if ( !is_array( $meta ) ) { + // This should never happen, but doesn't hurt to be paranoid. + throw new MWException('Metadata array is not an array'); + } + $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); + return serialize( $meta ); + } + catch ( MWException $e ) { + // BitmapMetadataHandler throws an exception in certain exceptional + // cases like if file does not exist. + wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" ); return ExifBitmapHandler::BROKEN_FILE; } } else { -- 2.20.1