40dd8bd855b72c34f23e3504d31eacf5c4c01383
2 /////////////////////////////////////////////////////////////////
3 /// getID3() by James Heinrich <info@getid3.org> //
4 // available at http://getid3.sourceforge.net //
5 // or http://www.getid3.org //
6 // also https://github.com/JamesHeinrich/getID3 //
7 /////////////////////////////////////////////////////////////////
8 // See readme.txt for more details //
9 /////////////////////////////////////////////////////////////////
11 // module.tag.xmp.php //
12 // module for analyzing XMP metadata (e.g. in JPEG files) //
13 // dependencies: NONE //
15 /////////////////////////////////////////////////////////////////
17 // Module originally written [2009-Mar-26] by //
18 // Nigel Barnes <ngbarnesĂhotmail*com> //
19 // Bundled into getID3 with permission //
20 // called by getID3 in module.graphic.jpg.php //
22 /////////////////////////////////////////////////////////////////
24 /**************************************************************************************************
25 * SWISScenter Source Nigel Barnes
27 * Provides functions for reading information from the 'APP1' Extensible Metadata
28 * Platform (XMP) segment of JPEG format files.
29 * This XMP segment is XML based and contains the Resource Description Framework (RDF)
30 * data, which itself can contain the Dublin Core Metadata Initiative (DCMI) information.
32 * This code uses segments from the JPEG Metadata Toolkit project by Evan Hunter.
33 *************************************************************************************************/
38 * The name of the image file that contains the XMP fields to extract and modify.
41 public $_sFilename = null;
45 * The XMP fields that were extracted from the image or updated by this class.
48 public $_aXMP = array();
52 * True if an APP1 segment was found to contain XMP metadata.
55 public $_bXMPParse = false;
58 * Returns the status of XMP parsing during instantiation
60 * You'll normally want to call this method before trying to get XMP fields.
63 * Returns true if an APP1 segment was found to contain XMP metadata.
65 public function isValid()
67 return $this->_bXMPParse
;
71 * Get a copy of all XMP tags extracted from the image
73 * @return array - An array of XMP fields as it extracted by the XMPparse() function
75 public function getAllTags()
81 * Reads all the JPEG header segments from an JPEG image file into an array
83 * @param string $filename - the filename of the JPEG file to read
84 * @return array $headerdata - Array of JPEG header segments
85 * @return boolean FALSE - if headers could not be read
87 public function _get_jpeg_header_data($filename)
89 // prevent refresh from aborting file operations and hosing file
90 ignore_user_abort(true);
92 // Attempt to open the jpeg file - the at symbol supresses the error message about
93 // not being able to open files. The file_exists would have been used, but it
94 // does not work with files fetched over http or ftp.
95 if (is_readable($filename) && is_file($filename) && ($filehnd = fopen($filename, 'rb'))) {
101 // Read the first two characters
102 $data = fread($filehnd, 2);
104 // Check that the first two characters are 0xFF 0xD8 (SOI - Start of image)
105 if ($data != "\xFF\xD8")
107 // No SOI (FF D8) at start of file - This probably isn't a JPEG file - close file and return;
108 echo '<p>This probably is not a JPEG file</p>'."\n";
113 // Read the third character
114 $data = fread($filehnd, 2);
116 // Check that the third character is 0xFF (Start of first segment header)
117 if ($data{0} != "\xFF")
119 // NO FF found - close file and return - JPEG is probably corrupted
124 // Flag that we havent yet hit the compressed image data
125 $hit_compressed_image_data = false;
127 // Cycle through the file until, one of: 1) an EOI (End of image) marker is hit,
128 // 2) we have hit the compressed image data (no more headers are allowed after data)
129 // 3) or end of file is hit
131 while (($data{1} != "\xD9") && (!$hit_compressed_image_data) && (!feof($filehnd)))
133 // Found a segment to look at.
134 // Check that the segment marker is not a Restart marker - restart markers don't have size or data after them
135 if ((ord($data{1}) < 0xD0) ||
(ord($data{1}) > 0xD7))
137 // Segment isn't a Restart marker
138 // Read the next two bytes (size)
139 $sizestr = fread($filehnd, 2);
141 // convert the size bytes to an integer
142 $decodedsize = unpack('nsize', $sizestr);
144 // Save the start position of the data
145 $segdatastart = ftell($filehnd);
147 // Read the segment data with length indicated by the previously read size
148 $segdata = fread($filehnd, $decodedsize['size'] - 2);
150 // Store the segment information in the output array
151 $headerdata[] = array(
152 'SegType' => ord($data{1}),
153 'SegName' => $GLOBALS['JPEG_Segment_Names'][ord($data{1})],
154 'SegDataStart' => $segdatastart,
155 'SegData' => $segdata,
159 // If this is a SOS (Start Of Scan) segment, then there is no more header data - the compressed image data follows
160 if ($data{1} == "\xDA")
162 // Flag that we have hit the compressed image data - exit loop as no more headers available.
163 $hit_compressed_image_data = true;
167 // Not an SOS - Read the next two bytes - should be the segment marker for the next segment
168 $data = fread($filehnd, 2);
170 // Check that the first byte of the two is 0xFF as it should be for a marker
171 if ($data{0} != "\xFF")
173 // NO FF found - close file and return - JPEG is probably corrupted
182 // Alow the user to abort from now on
183 ignore_user_abort(false);
185 // Return the header data retrieved
191 * Retrieves XMP information from an APP1 JPEG segment and returns the raw XML text as a string.
193 * @param string $filename - the filename of the JPEG file to read
194 * @return string $xmp_data - the string of raw XML text
195 * @return boolean FALSE - if an APP 1 XMP segment could not be found, or if an error occured
197 public function _get_XMP_text($filename)
199 //Get JPEG header data
200 $jpeg_header_data = $this->_get_jpeg_header_data($filename);
202 //Cycle through the header segments
203 for ($i = 0; $i < count($jpeg_header_data); $i++
)
205 // If we find an APP1 header,
206 if (strcmp($jpeg_header_data[$i]['SegName'], 'APP1') == 0)
208 // And if it has the Adobe XMP/RDF label (http://ns.adobe.com/xap/1.0/\x00) ,
209 if (strncmp($jpeg_header_data[$i]['SegData'], 'http://ns.adobe.com/xap/1.0/'."\x00", 29) == 0)
211 // Found a XMP/RDF block
212 // Return the XMP text
213 $xmp_data = substr($jpeg_header_data[$i]['SegData'], 29);
215 return trim($xmp_data); // trim() should not be neccesary, but some files found in the wild with null-terminated block (known samples from Apple Aperture) causes problems elsewhere (see http://www.getid3.org/phpBB3/viewtopic.php?f=4&t=1153)
223 * Parses a string containing XMP data (XML), and returns an array
224 * which contains all the XMP (XML) information.
226 * @param string $xml_text - a string containing the XMP data (XML) to be parsed
227 * @return array $xmp_array - an array containing all xmp details retrieved.
228 * @return boolean FALSE - couldn't parse the XMP data
230 public function read_XMP_array_from_text($xmltext)
232 // Check if there actually is any text to parse
233 if (trim($xmltext) == '')
238 // Create an instance of a xml parser to parse the XML text
239 $xml_parser = xml_parser_create('UTF-8');
241 // Change: Fixed problem that caused the whitespace (especially newlines) to be destroyed when converting xml text to an xml array, as of revision 1.10
243 // We would like to remove unneccessary white space, but this will also
244 // remove things like newlines (
) in the XML values, so white space
245 // will have to be removed later
246 if (xml_parser_set_option($xml_parser, XML_OPTION_SKIP_WHITE
, 0) == false)
248 // Error setting case folding - destroy the parser and return
249 xml_parser_free($xml_parser);
253 // to use XML code correctly we have to turn case folding
254 // (uppercasing) off. XML is case sensitive and upper
255 // casing is in reality XML standards violation
256 if (xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING
, 0) == false)
258 // Error setting case folding - destroy the parser and return
259 xml_parser_free($xml_parser);
263 // Parse the XML text into a array structure
264 if (xml_parse_into_struct($xml_parser, $xmltext, $values, $tags) == 0)
266 // Error Parsing XML - destroy the parser and return
267 xml_parser_free($xml_parser);
271 // Destroy the xml parser
272 xml_parser_free($xml_parser);
274 // Clear the output array
275 $xmp_array = array();
277 // The XMP data has now been parsed into an array ...
279 // Cycle through each of the array elements
280 $current_property = ''; // current property being processed
281 $container_index = -1; // -1 = no container open, otherwise index of container content
282 foreach ($values as $xml_elem)
284 // Syntax and Class names
285 switch ($xml_elem['tag'])
288 // only defined attribute is x:xmptk written by Adobe XMP Toolkit; value is the version of the toolkit
292 // required element immediately within x:xmpmeta; no data here
295 case 'rdf:Description':
296 switch ($xml_elem['type'])
300 if (array_key_exists('attributes', $xml_elem))
302 // rdf:Description may contain wanted attributes
303 foreach (array_keys($xml_elem['attributes']) as $key)
305 // Check whether we want this details from this attribute
306 // if (in_array($key, $GLOBALS['XMP_tag_captions']))
310 $xmp_array[$key] = $xml_elem['attributes'][$key];
321 // Attributes are ignored
326 if ($xml_elem['type'] == 'complete')
328 if (array_key_exists('attributes', $xml_elem))
330 // If Lang Alt (language alternatives) then ensure we take the default language
331 if (isset($xml_elem['attributes']['xml:lang']) && ($xml_elem['attributes']['xml:lang'] != 'x-default'))
336 if ($current_property != '')
338 $xmp_array[$current_property][$container_index] = (isset($xml_elem['value']) ?
$xml_elem['value'] : '');
339 $container_index +
= 1;
341 //else unidentified attribute!!
349 switch ($xml_elem['type'])
352 $container_index = 0;
355 $container_index = -1;
363 // Check whether we want the details from this attribute
364 // if (in_array($xml_elem['tag'], $GLOBALS['XMP_tag_captions']))
367 switch ($xml_elem['type'])
370 // open current element
371 $current_property = $xml_elem['tag'];
375 // close current element
376 $current_property = '';
380 // store attribute value
381 $xmp_array[$xml_elem['tag']] = (isset($xml_elem['attributes']) ?
$xml_elem['attributes'] : (isset($xml_elem['value']) ?
$xml_elem['value'] : ''));
400 * @param string - Name of the image file to access and extract XMP information from.
402 public function __construct($sFilename)
404 $this->_sFilename
= $sFilename;
406 if (is_file($this->_sFilename
))
409 $xmp_data = $this->_get_XMP_text($sFilename);
412 $this->_aXMP
= $this->read_XMP_array_from_text($xmp_data);
413 $this->_bXMPParse
= true;
421 * Global Variable: XMP_tag_captions
423 * The Property names of all known XMP fields.
424 * Note: this is a full list with unrequired properties commented out.
427 $GLOBALS['XMP_tag_captions'] = array(
429 'Iptc4xmpCore:CiAdrCity',
430 'Iptc4xmpCore:CiAdrCtry',
431 'Iptc4xmpCore:CiAdrExtadr',
432 'Iptc4xmpCore:CiAdrPcode',
433 'Iptc4xmpCore:CiAdrRegion',
434 'Iptc4xmpCore:CiEmailWork',
435 'Iptc4xmpCore:CiTelWork',
436 'Iptc4xmpCore:CiUrlWork',
437 'Iptc4xmpCore:CountryCode',
438 'Iptc4xmpCore:CreatorContactInfo',
439 'Iptc4xmpCore:IntellectualGenre',
440 'Iptc4xmpCore:Location',
441 'Iptc4xmpCore:Scene',
442 'Iptc4xmpCore:SubjectCode',
443 // Dublin Core Schema
472 // XMP Rights Management Schema
473 'xmpRights:Certificate',
476 'xmpRights:UsageTerms',
477 'xmpRights:WebStatement',
478 // These are not in spec but Photoshop CS seems to use them
490 'xapRights:Certificate',
491 'xapRights:Copyright',
494 'xapRights:UsageTerms',
495 'xapRights:WebStatement',
496 // XMP Media Management Schema
505 'xapMM:ManagerVariant',
506 'xapMM:RenditionClass',
507 'xapMM:RenditionParams',
513 // XMP Basic Job Ticket Schema
515 // XMP Paged-Text Schema
516 'xmpTPg:MaxPageSize',
526 'photoshop:AuthorsPosition',
527 'photoshop:CaptionWriter',
528 'photoshop:Category',
532 'photoshop:DateCreated',
533 'photoshop:Headline',
536 'photoshop:Instructions',
539 'photoshop:SupplementalCategories',
540 'photoshop:TransmissionReference',
545 'tiff:BitsPerSample',
547 'tiff:PhotometricInterpretation',
549 'tiff:SamplesPerPixel',
550 'tiff:PlanarConfiguration',
551 'tiff:YCbCrSubSampling',
552 'tiff:YCbCrPositioning',
555 'tiff:ResolutionUnit',
556 'tiff:TransferFunction',
558 'tiff:PrimaryChromaticities',
559 'tiff:YCbCrCoefficients',
560 'tiff:ReferenceBlackWhite',
562 'tiff:ImageDescription',
569 'exif:FlashpixVersion',
571 'exif:ComponentsConfiguration',
572 'exif:CompressedBitsPerPixel',
573 'exif:PixelXDimension',
574 'exif:PixelYDimension',
577 'exif:RelatedSoundFile',
578 'exif:DateTimeOriginal',
579 'exif:DateTimeDigitized',
582 'exif:ExposureProgram',
583 'exif:SpectralSensitivity',
584 'exif:ISOSpeedRatings',
586 'exif:ShutterSpeedValue',
587 'exif:ApertureValue',
588 'exif:BrightnessValue',
589 'exif:ExposureBiasValue',
590 'exif:MaxApertureValue',
591 'exif:SubjectDistance',
598 'exif:SpatialFrequencyResponse',
599 'exif:FocalPlaneXResolution',
600 'exif:FocalPlaneYResolution',
601 'exif:FocalPlaneResolutionUnit',
602 'exif:SubjectLocation',
603 'exif:SensingMethod',
607 'exif:CustomRendered',
610 'exif:DigitalZoomRatio',
611 'exif:FocalLengthIn35mmFilm',
612 'exif:SceneCaptureType',
617 'exif:DeviceSettingDescription',
618 'exif:SubjectDistanceRange',
619 'exif:ImageUniqueID',
623 'exif:GPSAltitudeRef',
626 'exif:GPSSatellites',
628 'exif:GPSMeasureMode',
634 'exif:GPSImgDirectionRef',
635 'exif:GPSImgDirection',
637 'exif:GPSDestLatitude',
638 'exif:GPSDestLongitude',
639 'exif:GPSDestBearingRef',
640 'exif:GPSDestBearing',
641 'exif:GPSDestDistanceRef',
642 'exif:GPSDestDistance',
643 'exif:GPSProcessingMethod',
644 'exif:GPSAreaInformation',
645 'exif:GPSDifferential',
656 'stEvt:softwareAgent',
661 'stRef:renditionClass',
662 'stRef:renditionParams',
664 'stRef:managerVariant',
690 // Exif DeviceSettings
698 * Global Variable: JPEG_Segment_Names
700 * The names of the JPEG segment markers, indexed by their marker number
702 $GLOBALS['JPEG_Segment_Names'] = array(