a976ef8eaa2cf85479c8ffb066630996137bf1f1
3 /////////////////////////////////////////////////////////////////
4 /// getID3() by James Heinrich <info@getid3.org> //
5 // available at https://github.com/JamesHeinrich/getID3 //
6 // or https://www.getid3.org //
7 // or http://getid3.sourceforge.net //
8 // see readme.txt for more details //
9 /////////////////////////////////////////////////////////////////
11 // module.tag.xmp.php //
12 // module for analyzing XMP metadata (e.g. in JPEG files) //
13 // dependencies: NONE //
15 /////////////////////////////////////////////////////////////////
17 // Module originally written [2009-Mar-26] by //
18 // Nigel Barnes <ngbarnesĂhotmail*com> //
19 // Bundled into getID3 with permission //
20 // called by getID3 in module.graphic.jpg.php //
22 /////////////////////////////////////////////////////////////////
24 /**************************************************************************************************
25 * SWISScenter Source Nigel Barnes
27 * Provides functions for reading information from the 'APP1' Extensible Metadata
28 * Platform (XMP) segment of JPEG format files.
29 * This XMP segment is XML based and contains the Resource Description Framework (RDF)
30 * data, which itself can contain the Dublin Core Metadata Initiative (DCMI) information.
32 * This code uses segments from the JPEG Metadata Toolkit project by Evan Hunter.
33 *************************************************************************************************/
38 * The name of the image file that contains the XMP fields to extract and modify.
41 public $_sFilename = null;
45 * The XMP fields that were extracted from the image or updated by this class.
48 public $_aXMP = array();
52 * True if an APP1 segment was found to contain XMP metadata.
55 public $_bXMPParse = false;
58 * Returns the status of XMP parsing during instantiation
60 * You'll normally want to call this method before trying to get XMP fields.
63 * Returns true if an APP1 segment was found to contain XMP metadata.
65 public function isValid()
67 return $this->_bXMPParse
;
71 * Get a copy of all XMP tags extracted from the image
73 * @return array - An array of XMP fields as it extracted by the XMPparse() function
75 public function getAllTags()
81 * Reads all the JPEG header segments from an JPEG image file into an array
83 * @param string $filename - the filename of the JPEG file to read
84 * @return array|boolean $headerdata - Array of JPEG header segments,
85 * FALSE - if headers could not be read
87 public function _get_jpeg_header_data($filename)
89 // prevent refresh from aborting file operations and hosing file
90 ignore_user_abort(true);
92 // Attempt to open the jpeg file - the at symbol supresses the error message about
93 // not being able to open files. The file_exists would have been used, but it
94 // does not work with files fetched over http or ftp.
95 if (is_readable($filename) && is_file($filename) && ($filehnd = fopen($filename, 'rb'))) {
101 // Read the first two characters
102 $data = fread($filehnd, 2);
104 // Check that the first two characters are 0xFF 0xD8 (SOI - Start of image)
105 if ($data != "\xFF\xD8")
107 // No SOI (FF D8) at start of file - This probably isn't a JPEG file - close file and return;
108 echo '<p>This probably is not a JPEG file</p>'."\n";
113 // Read the third character
114 $data = fread($filehnd, 2);
116 // Check that the third character is 0xFF (Start of first segment header)
117 if ($data{0} != "\xFF")
119 // NO FF found - close file and return - JPEG is probably corrupted
124 // Flag that we havent yet hit the compressed image data
125 $hit_compressed_image_data = false;
127 $headerdata = array();
128 // Cycle through the file until, one of: 1) an EOI (End of image) marker is hit,
129 // 2) we have hit the compressed image data (no more headers are allowed after data)
130 // 3) or end of file is hit
132 while (($data{1} != "\xD9") && (!$hit_compressed_image_data) && (!feof($filehnd)))
134 // Found a segment to look at.
135 // Check that the segment marker is not a Restart marker - restart markers don't have size or data after them
136 if ((ord($data{1}) < 0xD0) ||
(ord($data{1}) > 0xD7))
138 // Segment isn't a Restart marker
139 // Read the next two bytes (size)
140 $sizestr = fread($filehnd, 2);
142 // convert the size bytes to an integer
143 $decodedsize = unpack('nsize', $sizestr);
145 // Save the start position of the data
146 $segdatastart = ftell($filehnd);
148 // Read the segment data with length indicated by the previously read size
149 $segdata = fread($filehnd, $decodedsize['size'] - 2);
151 // Store the segment information in the output array
152 $headerdata[] = array(
153 'SegType' => ord($data{1}),
154 'SegName' => $GLOBALS['JPEG_Segment_Names'][ord($data{1})],
155 'SegDataStart' => $segdatastart,
156 'SegData' => $segdata,
160 // If this is a SOS (Start Of Scan) segment, then there is no more header data - the compressed image data follows
161 if ($data{1} == "\xDA")
163 // Flag that we have hit the compressed image data - exit loop as no more headers available.
164 $hit_compressed_image_data = true;
168 // Not an SOS - Read the next two bytes - should be the segment marker for the next segment
169 $data = fread($filehnd, 2);
171 // Check that the first byte of the two is 0xFF as it should be for a marker
172 if ($data{0} != "\xFF")
174 // NO FF found - close file and return - JPEG is probably corrupted
183 // Alow the user to abort from now on
184 ignore_user_abort(false);
186 // Return the header data retrieved
192 * Retrieves XMP information from an APP1 JPEG segment and returns the raw XML text as a string.
194 * @param string $filename - the filename of the JPEG file to read
195 * @return string|boolean $xmp_data - the string of raw XML text,
196 * FALSE - if an APP 1 XMP segment could not be found, or if an error occured
198 public function _get_XMP_text($filename)
200 //Get JPEG header data
201 $jpeg_header_data = $this->_get_jpeg_header_data($filename);
203 //Cycle through the header segments
204 for ($i = 0; $i < count($jpeg_header_data); $i++
)
206 // If we find an APP1 header,
207 if (strcmp($jpeg_header_data[$i]['SegName'], 'APP1') == 0)
209 // And if it has the Adobe XMP/RDF label (http://ns.adobe.com/xap/1.0/\x00) ,
210 if (strncmp($jpeg_header_data[$i]['SegData'], 'http://ns.adobe.com/xap/1.0/'."\x00", 29) == 0)
212 // Found a XMP/RDF block
213 // Return the XMP text
214 $xmp_data = substr($jpeg_header_data[$i]['SegData'], 29);
216 return trim($xmp_data); // trim() should not be neccesary, but some files found in the wild with null-terminated block (known samples from Apple Aperture) causes problems elsewhere (see https://www.getid3.org/phpBB3/viewtopic.php?f=4&t=1153)
224 * Parses a string containing XMP data (XML), and returns an array
225 * which contains all the XMP (XML) information.
227 * @param string $xmltext - a string containing the XMP data (XML) to be parsed
228 * @return array|boolean $xmp_array - an array containing all xmp details retrieved,
229 * FALSE - couldn't parse the XMP data.
231 public function read_XMP_array_from_text($xmltext)
233 // Check if there actually is any text to parse
234 if (trim($xmltext) == '')
239 // Create an instance of a xml parser to parse the XML text
240 $xml_parser = xml_parser_create('UTF-8');
242 // Change: Fixed problem that caused the whitespace (especially newlines) to be destroyed when converting xml text to an xml array, as of revision 1.10
244 // We would like to remove unneccessary white space, but this will also
245 // remove things like newlines (
) in the XML values, so white space
246 // will have to be removed later
247 if (xml_parser_set_option($xml_parser, XML_OPTION_SKIP_WHITE
, 0) == false)
249 // Error setting case folding - destroy the parser and return
250 xml_parser_free($xml_parser);
254 // to use XML code correctly we have to turn case folding
255 // (uppercasing) off. XML is case sensitive and upper
256 // casing is in reality XML standards violation
257 if (xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING
, 0) == false)
259 // Error setting case folding - destroy the parser and return
260 xml_parser_free($xml_parser);
264 // Parse the XML text into a array structure
265 if (xml_parse_into_struct($xml_parser, $xmltext, $values, $tags) == 0)
267 // Error Parsing XML - destroy the parser and return
268 xml_parser_free($xml_parser);
272 // Destroy the xml parser
273 xml_parser_free($xml_parser);
275 // Clear the output array
276 $xmp_array = array();
278 // The XMP data has now been parsed into an array ...
280 // Cycle through each of the array elements
281 $current_property = ''; // current property being processed
282 $container_index = -1; // -1 = no container open, otherwise index of container content
283 foreach ($values as $xml_elem)
285 // Syntax and Class names
286 switch ($xml_elem['tag'])
289 // only defined attribute is x:xmptk written by Adobe XMP Toolkit; value is the version of the toolkit
293 // required element immediately within x:xmpmeta; no data here
296 case 'rdf:Description':
297 switch ($xml_elem['type'])
301 if (array_key_exists('attributes', $xml_elem))
303 // rdf:Description may contain wanted attributes
304 foreach (array_keys($xml_elem['attributes']) as $key)
306 // Check whether we want this details from this attribute
307 // if (in_array($key, $GLOBALS['XMP_tag_captions']))
311 $xmp_array[$key] = $xml_elem['attributes'][$key];
324 // Attributes are ignored
329 if ($xml_elem['type'] == 'complete')
331 if (array_key_exists('attributes', $xml_elem))
333 // If Lang Alt (language alternatives) then ensure we take the default language
334 if (isset($xml_elem['attributes']['xml:lang']) && ($xml_elem['attributes']['xml:lang'] != 'x-default'))
339 if ($current_property != '')
341 $xmp_array[$current_property][$container_index] = (isset($xml_elem['value']) ?
$xml_elem['value'] : '');
342 $container_index +
= 1;
344 //else unidentified attribute!!
352 switch ($xml_elem['type'])
355 $container_index = 0;
358 $container_index = -1;
366 // Check whether we want the details from this attribute
367 // if (in_array($xml_elem['tag'], $GLOBALS['XMP_tag_captions']))
370 switch ($xml_elem['type'])
373 // open current element
374 $current_property = $xml_elem['tag'];
378 // close current element
379 $current_property = '';
383 // store attribute value
384 $xmp_array[$xml_elem['tag']] = (isset($xml_elem['attributes']) ?
$xml_elem['attributes'] : (isset($xml_elem['value']) ?
$xml_elem['value'] : ''));
403 * @param string $sFilename - Name of the image file to access and extract XMP information from.
405 public function __construct($sFilename)
407 $this->_sFilename
= $sFilename;
409 if (is_file($this->_sFilename
))
412 $xmp_data = $this->_get_XMP_text($sFilename);
415 $aXMP = $this->read_XMP_array_from_text($xmp_data);
416 if ($aXMP !== false) {
417 $this->_aXMP
= (array) $aXMP;
418 $this->_bXMPParse
= true;
427 * Global Variable: XMP_tag_captions
429 * The Property names of all known XMP fields.
430 * Note: this is a full list with unrequired properties commented out.
433 $GLOBALS['XMP_tag_captions'] = array(
435 'Iptc4xmpCore:CiAdrCity',
436 'Iptc4xmpCore:CiAdrCtry',
437 'Iptc4xmpCore:CiAdrExtadr',
438 'Iptc4xmpCore:CiAdrPcode',
439 'Iptc4xmpCore:CiAdrRegion',
440 'Iptc4xmpCore:CiEmailWork',
441 'Iptc4xmpCore:CiTelWork',
442 'Iptc4xmpCore:CiUrlWork',
443 'Iptc4xmpCore:CountryCode',
444 'Iptc4xmpCore:CreatorContactInfo',
445 'Iptc4xmpCore:IntellectualGenre',
446 'Iptc4xmpCore:Location',
447 'Iptc4xmpCore:Scene',
448 'Iptc4xmpCore:SubjectCode',
449 // Dublin Core Schema
478 // XMP Rights Management Schema
479 'xmpRights:Certificate',
482 'xmpRights:UsageTerms',
483 'xmpRights:WebStatement',
484 // These are not in spec but Photoshop CS seems to use them
496 'xapRights:Certificate',
497 'xapRights:Copyright',
500 'xapRights:UsageTerms',
501 'xapRights:WebStatement',
502 // XMP Media Management Schema
511 'xapMM:ManagerVariant',
512 'xapMM:RenditionClass',
513 'xapMM:RenditionParams',
519 // XMP Basic Job Ticket Schema
521 // XMP Paged-Text Schema
522 'xmpTPg:MaxPageSize',
532 'photoshop:AuthorsPosition',
533 'photoshop:CaptionWriter',
534 'photoshop:Category',
538 'photoshop:DateCreated',
539 'photoshop:Headline',
542 'photoshop:Instructions',
545 'photoshop:SupplementalCategories',
546 'photoshop:TransmissionReference',
551 'tiff:BitsPerSample',
553 'tiff:PhotometricInterpretation',
555 'tiff:SamplesPerPixel',
556 'tiff:PlanarConfiguration',
557 'tiff:YCbCrSubSampling',
558 'tiff:YCbCrPositioning',
561 'tiff:ResolutionUnit',
562 'tiff:TransferFunction',
564 'tiff:PrimaryChromaticities',
565 'tiff:YCbCrCoefficients',
566 'tiff:ReferenceBlackWhite',
568 'tiff:ImageDescription',
575 'exif:FlashpixVersion',
577 'exif:ComponentsConfiguration',
578 'exif:CompressedBitsPerPixel',
579 'exif:PixelXDimension',
580 'exif:PixelYDimension',
583 'exif:RelatedSoundFile',
584 'exif:DateTimeOriginal',
585 'exif:DateTimeDigitized',
588 'exif:ExposureProgram',
589 'exif:SpectralSensitivity',
590 'exif:ISOSpeedRatings',
592 'exif:ShutterSpeedValue',
593 'exif:ApertureValue',
594 'exif:BrightnessValue',
595 'exif:ExposureBiasValue',
596 'exif:MaxApertureValue',
597 'exif:SubjectDistance',
604 'exif:SpatialFrequencyResponse',
605 'exif:FocalPlaneXResolution',
606 'exif:FocalPlaneYResolution',
607 'exif:FocalPlaneResolutionUnit',
608 'exif:SubjectLocation',
609 'exif:SensingMethod',
613 'exif:CustomRendered',
616 'exif:DigitalZoomRatio',
617 'exif:FocalLengthIn35mmFilm',
618 'exif:SceneCaptureType',
623 'exif:DeviceSettingDescription',
624 'exif:SubjectDistanceRange',
625 'exif:ImageUniqueID',
629 'exif:GPSAltitudeRef',
632 'exif:GPSSatellites',
634 'exif:GPSMeasureMode',
640 'exif:GPSImgDirectionRef',
641 'exif:GPSImgDirection',
643 'exif:GPSDestLatitude',
644 'exif:GPSDestLongitude',
645 'exif:GPSDestBearingRef',
646 'exif:GPSDestBearing',
647 'exif:GPSDestDistanceRef',
648 'exif:GPSDestDistance',
649 'exif:GPSProcessingMethod',
650 'exif:GPSAreaInformation',
651 'exif:GPSDifferential',
662 'stEvt:softwareAgent',
667 'stRef:renditionClass',
668 'stRef:renditionParams',
670 'stRef:managerVariant',
696 // Exif DeviceSettings
704 * Global Variable: JPEG_Segment_Names
706 * The names of the JPEG segment markers, indexed by their marker number
708 $GLOBALS['JPEG_Segment_Names'] = array(