From: Kunal Mehta Date: Wed, 21 Sep 2016 03:20:10 +0000 (-0700) Subject: Move XMP* classes to includes/libs X-Git-Tag: 1.31.0-rc.0~5430^2 X-Git-Url: http://git.cyclocoop.org/%22.%24image2.%22?a=commitdiff_plain;h=8669bced5a62491f7b4a145a931913e0a4dab2a2;p=lhc%2Fweb%2Fwiklou.git Move XMP* classes to includes/libs These no longer have any dependencies upon MediaWiki and can be split out into a separate library. Tests were updated for the new location and to no longer depend upon MediaWikiTestCase. Bug: T100922 Change-Id: I6ad507fa883d5934b807f9e31c507659d0730b52 --- diff --git a/autoload.php b/autoload.php index 198e477439..b6e0b8c75a 100644 --- a/autoload.php +++ b/autoload.php @@ -1561,9 +1561,9 @@ $wgAutoloadLocalClasses = [ 'XCFHandler' => __DIR__ . '/includes/media/XCF.php', 'XCacheBagOStuff' => __DIR__ . '/includes/libs/objectcache/XCacheBagOStuff.php', 'XMLRCFeedFormatter' => __DIR__ . '/includes/rcfeed/XMLRCFeedFormatter.php', - 'XMPInfo' => __DIR__ . '/includes/media/XMPInfo.php', - 'XMPReader' => __DIR__ . '/includes/media/XMP.php', - 'XMPValidate' => __DIR__ . '/includes/media/XMPValidate.php', + 'XMPInfo' => __DIR__ . '/includes/libs/xmp/XMPInfo.php', + 'XMPReader' => __DIR__ . '/includes/libs/xmp/XMP.php', + 'XMPValidate' => __DIR__ . '/includes/libs/xmp/XMPValidate.php', 'Xhprof' => __DIR__ . '/includes/libs/Xhprof.php', 'XhprofData' => __DIR__ . '/includes/libs/XhprofData.php', 'Xml' => __DIR__ . '/includes/Xml.php', diff --git a/includes/libs/xmp/XMP.php b/includes/libs/xmp/XMP.php new file mode 100644 index 0000000000..70f67b7858 --- /dev/null +++ b/includes/libs/xmp/XMP.php @@ -0,0 +1,1383 @@ +setLogger( $logger ); + } else { + $this->setLogger( new NullLogger() ); + } + + $this->items = XMPInfo::getItems(); + + $this->resetXMLParser(); + } + + public function setLogger( LoggerInterface $logger ) { + $this->logger = $logger; + } + + /** + * free the XML parser. + * + * @note It is unclear to me if we really need to do this ourselves + * or if php garbage collection will automatically free the xmlParser + * when it is no longer needed. + */ + private function destroyXMLParser() { + if ( $this->xmlParser ) { + xml_parser_free( $this->xmlParser ); + $this->xmlParser = null; + } + } + + /** + * Main use is if a single item has multiple xmp documents describing it. + * For example in jpeg's with extendedXMP + */ + private function resetXMLParser() { + + $this->destroyXMLParser(); + + $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' ); + xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 ); + xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 ); + + xml_set_element_handler( $this->xmlParser, + [ $this, 'startElement' ], + [ $this, 'endElement' ] ); + + xml_set_character_data_handler( $this->xmlParser, [ $this, 'char' ] ); + + $this->parsable = self::PARSABLE_UNKNOWN; + $this->xmlParsableBuffer = ''; + } + + /** + * Check if this instance supports using this class + */ + public static function isSupported() { + return function_exists( 'xml_parser_create_ns' ) && class_exists( 'XMLReader' ); + } + + /** Get the result array. Do some post-processing before returning + * the array, and transform any metadata that is special-cased. + * + * @return array Array of results as an array of arrays suitable for + * FormatMetadata::getFormattedData(). + */ + public function getResults() { + // xmp-special is for metadata that affects how stuff + // is extracted. For example xmpNote:HasExtendedXMP. + + // It is also used to handle photoshop:AuthorsPosition + // which is weird and really part of another property, + // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard. + // The location fields also use it. + + $data = $this->results; + + if ( isset( $data['xmp-special']['AuthorsPosition'] ) + && is_string( $data['xmp-special']['AuthorsPosition'] ) + && isset( $data['xmp-general']['Artist'][0] ) + ) { + // Note, if there is more than one creator, + // this only applies to first. This also will + // only apply to the dc:Creator prop, not the + // exif:Artist prop. + + $data['xmp-general']['Artist'][0] = + $data['xmp-special']['AuthorsPosition'] . ', ' + . $data['xmp-general']['Artist'][0]; + } + + // Go through the LocationShown and LocationCreated + // changing it to the non-hierarchal form used by + // the other location fields. + + if ( isset( $data['xmp-special']['LocationShown'][0] ) + && is_array( $data['xmp-special']['LocationShown'][0] ) + ) { + // the is_array is just paranoia. It should always + // be an array. + foreach ( $data['xmp-special']['LocationShown'] as $loc ) { + if ( !is_array( $loc ) ) { + // To avoid copying over the _type meta-fields. + continue; + } + foreach ( $loc as $field => $val ) { + $data['xmp-general'][$field . 'Dest'][] = $val; + } + } + } + if ( isset( $data['xmp-special']['LocationCreated'][0] ) + && is_array( $data['xmp-special']['LocationCreated'][0] ) + ) { + // the is_array is just paranoia. It should always + // be an array. + foreach ( $data['xmp-special']['LocationCreated'] as $loc ) { + if ( !is_array( $loc ) ) { + // To avoid copying over the _type meta-fields. + continue; + } + foreach ( $loc as $field => $val ) { + $data['xmp-general'][$field . 'Created'][] = $val; + } + } + } + + // We don't want to return the special values, since they're + // special and not info to be stored about the file. + unset( $data['xmp-special'] ); + + // Convert GPSAltitude to negative if below sea level. + if ( isset( $data['xmp-exif']['GPSAltitudeRef'] ) + && isset( $data['xmp-exif']['GPSAltitude'] ) + ) { + + // Must convert to a real before multiplying by -1 + // XMPValidate guarantees there will always be a '/' in this value. + list( $nom, $denom ) = explode( '/', $data['xmp-exif']['GPSAltitude'] ); + $data['xmp-exif']['GPSAltitude'] = $nom / $denom; + + if ( $data['xmp-exif']['GPSAltitudeRef'] == '1' ) { + $data['xmp-exif']['GPSAltitude'] *= -1; + } + unset( $data['xmp-exif']['GPSAltitudeRef'] ); + } + + return $data; + } + + /** + * Main function to call to parse XMP. Use getResults to + * get results. + * + * Also catches any errors during processing, writes them to + * debug log, blanks result array and returns false. + * + * @param string $content XMP data + * @param bool $allOfIt If this is all the data (true) or if its split up (false). Default true + * @throws RuntimeException + * @return bool Success. + */ + public function parse( $content, $allOfIt = true ) { + if ( !$this->xmlParser ) { + $this->resetXMLParser(); + } + try { + + // detect encoding by looking for BOM which is supposed to be in processing instruction. + // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf + if ( !$this->charset ) { + $bom = []; + if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/', + $content, $bom ) + ) { + switch ( $bom[0] ) { + case "\xFE\xFF": + $this->charset = 'UTF-16BE'; + break; + case "\xFF\xFE": + $this->charset = 'UTF-16LE'; + break; + case "\x00\x00\xFE\xFF": + $this->charset = 'UTF-32BE'; + break; + case "\xFF\xFE\x00\x00": + $this->charset = 'UTF-32LE'; + break; + case "\xEF\xBB\xBF": + $this->charset = 'UTF-8'; + break; + default: + // this should be impossible to get to + throw new RuntimeException( "Invalid BOM" ); + } + } else { + // standard specifically says, if no bom assume utf-8 + $this->charset = 'UTF-8'; + } + } + if ( $this->charset !== 'UTF-8' ) { + // don't convert if already utf-8 + MediaWiki\suppressWarnings(); + $content = iconv( $this->charset, 'UTF-8//IGNORE', $content ); + MediaWiki\restoreWarnings(); + } + + // Ensure the XMP block does not have an xml doctype declaration, which + // could declare entities unsafe to parse with xml_parse (T85848/T71210). + if ( $this->parsable !== self::PARSABLE_OK ) { + if ( $this->parsable === self::PARSABLE_NO ) { + throw new RuntimeException( 'Unsafe doctype declaration in XML.' ); + } + + $content = $this->xmlParsableBuffer . $content; + if ( !$this->checkParseSafety( $content ) ) { + if ( !$allOfIt && $this->parsable !== self::PARSABLE_NO ) { + // parse wasn't Unsuccessful yet, so return true + // in this case. + return true; + } + $msg = ( $this->parsable === self::PARSABLE_NO ) ? + 'Unsafe doctype declaration in XML.' : + 'No root element found in XML.'; + throw new RuntimeException( $msg ); + } + } + + $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); + if ( !$ok ) { + $code = xml_get_error_code( $this->xmlParser ); + $error = xml_error_string( $code ); + $line = xml_get_current_line_number( $this->xmlParser ); + $col = xml_get_current_column_number( $this->xmlParser ); + $offset = xml_get_current_byte_index( $this->xmlParser ); + + $this->logger->warning( + '{method} : Error reading XMP content: {error} ' . + '(line: {line} column: {column} byte offset: {offset})', + [ + 'method' => __METHOD__, + 'error_code' => $code, + 'error' => $error, + 'line' => $line, + 'column' => $col, + 'offset' => $offset, + 'content' => $content, + ] ); + $this->results = []; // blank if error. + $this->destroyXMLParser(); + return false; + } + } catch ( Exception $e ) { + $this->logger->warning( + '{method} Exception caught while parsing: ' . $e->getMessage(), + [ + 'method' => __METHOD__, + 'exception' => $e, + 'content' => $content, + ] + ); + $this->results = []; + return false; + } + if ( $allOfIt ) { + $this->destroyXMLParser(); + } + + return true; + } + + /** Entry point for XMPExtended blocks in jpeg files + * + * @todo In serious need of testing + * @see http://www.adobe.ge/devnet/xmp/pdfs/XMPSpecificationPart3.pdf XMP spec part 3 page 20 + * @param string $content XMPExtended block minus the namespace signature + * @return bool If it succeeded. + */ + public function parseExtended( $content ) { + // @todo FIXME: This is untested. Hard to find example files + // or programs that make such files.. + $guid = substr( $content, 0, 32 ); + if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] ) + || $this->results['xmp-special']['HasExtendedXMP'] !== $guid + ) { + $this->logger->info( __METHOD__ . + " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" ); + + return false; + } + $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) ); + + if ( !$len || + $len['length'] < 4 || + $len['offset'] < 0 || + $len['offset'] > $len['length'] + ) { + $this->logger->info( + __METHOD__ . 'Error reading extended XMP block, invalid length or offset.' + ); + + return false; + } + + // we're not very robust here. we should accept it in the wrong order. + // To quote the XMP standard: + // "A JPEG writer should write the ExtendedXMP marker segments in order, + // immediately following the StandardXMP. However, the JPEG standard + // does not require preservation of marker segment order. A robust JPEG + // reader should tolerate the marker segments in any order." + // On the other hand, the probability that an image will have more than + // 128k of metadata is rather low... so the probability that it will have + // > 128k, and be in the wrong order is very low... + + if ( $len['offset'] !== $this->extendedXMPOffset ) { + $this->logger->info( __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was ' + . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')' ); + + return false; + } + + if ( $len['offset'] === 0 ) { + // if we're starting the extended block, we've probably already + // done the XMPStandard block, so reset. + $this->resetXMLParser(); + } + + $this->extendedXMPOffset += $len['length']; + + $actualContent = substr( $content, 40 ); + + if ( $this->extendedXMPOffset === strlen( $actualContent ) ) { + $atEnd = true; + } else { + $atEnd = false; + } + + $this->logger->debug( __METHOD__ . 'Parsing a XMPExtended block' ); + + return $this->parse( $actualContent, $atEnd ); + } + + /** + * Character data handler + * Called whenever character data is found in the xmp document. + * + * does nothing if we're in MODE_IGNORE or if the data is whitespace + * throws an error if we're not in MODE_SIMPLE (as we're not allowed to have character + * data in the other modes). + * + * As an example, this happens when we encounter XMP like: + * 0/10 + * and are processing the 0/10 bit. + * + * @param XMLParser $parser XMLParser reference to the xml parser + * @param string $data Character data + * @throws RuntimeException On invalid data + */ + function char( $parser, $data ) { + + $data = trim( $data ); + if ( trim( $data ) === "" ) { + return; + } + + if ( !isset( $this->mode[0] ) ) { + throw new RuntimeException( 'Unexpected character data before first rdf:Description element' ); + } + + if ( $this->mode[0] === self::MODE_IGNORE ) { + return; + } + + if ( $this->mode[0] !== self::MODE_SIMPLE + && $this->mode[0] !== self::MODE_QDESC + ) { + throw new RuntimeException( 'character data where not expected. (mode ' . $this->mode[0] . ')' ); + } + + // to check, how does this handle w.s. + if ( $this->charContent === false ) { + $this->charContent = $data; + } else { + $this->charContent .= $data; + } + } + + /** + * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't + * contain a doctype declaration which could contain a dos attack if we + * parse it and expand internal entities (T85848). + * + * @param string $content xml string to check for parse safety + * @return bool true if the xml is safe to parse, false otherwise + */ + private function checkParseSafety( $content ) { + $reader = new XMLReader(); + $result = null; + + // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed + // instead of using XML(). + $reader->open( + 'data://text/plain,' . urlencode( $content ), + null, + LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET + ); + + $oldDisable = libxml_disable_entity_loader( true ); + /** @noinspection PhpUnusedLocalVariableInspection */ + $reset = new ScopedCallback( + 'libxml_disable_entity_loader', + [ $oldDisable ] + ); + $reader->setParserProperty( XMLReader::SUBST_ENTITIES, false ); + + // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning + // when parsing truncated XML, which causes unit tests to fail. + MediaWiki\suppressWarnings(); + while ( $reader->read() ) { + if ( $reader->nodeType === XMLReader::ELEMENT ) { + // Reached the first element without hitting a doctype declaration + $this->parsable = self::PARSABLE_OK; + $result = true; + break; + } + if ( $reader->nodeType === XMLReader::DOC_TYPE ) { + $this->parsable = self::PARSABLE_NO; + $result = false; + break; + } + } + MediaWiki\restoreWarnings(); + + if ( !is_null( $result ) ) { + return $result; + } + + // Reached the end of the parsable xml without finding an element + // or doctype. Buffer and try again. + $this->parsable = self::PARSABLE_BUFFERING; + $this->xmlParsableBuffer = $content; + return false; + } + + /** When we hit a closing element in MODE_IGNORE + * Check to see if this is the element we started to ignore, + * in which case we get out of MODE_IGNORE + * + * @param string $elm Namespace of element followed by a space and then tag name of element. + */ + private function endElementModeIgnore( $elm ) { + if ( $this->curItem[0] === $elm ) { + array_shift( $this->curItem ); + array_shift( $this->mode ); + } + } + + /** + * Hit a closing element when in MODE_SIMPLE. + * This generally means that we finished processing a + * property value, and now have to save the result to the + * results array + * + * For example, when processing: + * 0/10 + * this deals with when we hit . + * + * Or it could be if we hit the end element of a property + * of a compound data structure (like a member of an array). + * + * @param string $elm Namespace, space, and tag name. + */ + private function endElementModeSimple( $elm ) { + if ( $this->charContent !== false ) { + if ( $this->processingArray ) { + // if we're processing an array, use the original element + // name instead of rdf:li. + list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); + } else { + list( $ns, $tag ) = explode( ' ', $elm, 2 ); + } + $this->saveValue( $ns, $tag, $this->charContent ); + + $this->charContent = false; // reset + } + array_shift( $this->curItem ); + array_shift( $this->mode ); + } + + /** + * Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG + * generally means we've finished processing a nested structure. + * resets some internal variables to indicate that. + * + * Note this means we hit the closing element not the "". + * + * @par For example, when processing: + * @code{,xml} + * 64 + * + * @endcode + * + * This method is called when we hit the "" tag. + * + * @param string $elm Namespace . space . tag name. + * @throws RuntimeException + */ + private function endElementNested( $elm ) { + + /* cur item must be the same as $elm, unless if in MODE_STRUCT + in which case it could also be rdf:Description */ + if ( $this->curItem[0] !== $elm + && !( $elm === self::NS_RDF . ' Description' + && $this->mode[0] === self::MODE_STRUCT ) + ) { + throw new RuntimeException( "nesting mismatch. got a but expected a curItem[0] . '>' ); + } + + // Validate structures. + list( $ns, $tag ) = explode( ' ', $elm, 2 ); + if ( isset( $this->items[$ns][$tag]['validate'] ) ) { + $info =& $this->items[$ns][$tag]; + $finalName = isset( $info['map_name'] ) + ? $info['map_name'] : $tag; + + if ( is_array( $info['validate'] ) ) { + $validate = $info['validate']; + } else { + $validator = new XMPValidate( $this->logger ); + $validate = [ $validator, $info['validate'] ]; + } + + if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { + // This can happen if all the members of the struct failed validation. + $this->logger->debug( __METHOD__ . " <$ns:$tag> has no valid members." ); + } elseif ( is_callable( $validate ) ) { + $val =& $this->results['xmp-' . $info['map_group']][$finalName]; + call_user_func_array( $validate, [ $info, &$val, false ] ); + if ( is_null( $val ) ) { + // the idea being the validation function will unset the variable if + // its invalid. + $this->logger->info( __METHOD__ . " <$ns:$tag> failed validation." ); + unset( $this->results['xmp-' . $info['map_group']][$finalName] ); + } + } else { + $this->logger->warning( __METHOD__ . " Validation function for $finalName (" + . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); + } + } + + array_shift( $this->curItem ); + array_shift( $this->mode ); + $this->ancestorStruct = false; + $this->processingArray = false; + $this->itemLang = false; + } + + /** + * Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag ) + * Add information about what type of element this is. + * + * Note we still have to hit the outer "" + * + * @par For example, when processing: + * @code{,xml} + * 64 + * + * @endcode + * + * This method is called when we hit the "". + * (For comparison, we call endElementModeSimple when we + * hit the "") + * + * @param string $elm Namespace . ' ' . element name + * @throws RuntimeException + */ + private function endElementModeLi( $elm ) { + list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); + $info = $this->items[$ns][$tag]; + $finalName = isset( $info['map_name'] ) + ? $info['map_name'] : $tag; + + array_shift( $this->mode ); + + if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { + $this->logger->debug( __METHOD__ . " Empty compund element $finalName." ); + + return; + } + + if ( $elm === self::NS_RDF . ' Seq' ) { + $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol'; + } elseif ( $elm === self::NS_RDF . ' Bag' ) { + $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul'; + } elseif ( $elm === self::NS_RDF . ' Alt' ) { + // extra if needed as you could theoretically have a non-language alt. + if ( $info['mode'] === self::MODE_LANG ) { + $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang'; + } + } else { + throw new RuntimeException( + __METHOD__ . " expected or but instead got $elm." + ); + } + } + + /** + * End element while in MODE_QDESC + * mostly when ending an element when we have a simple value + * that has qualifiers. + * + * Qualifiers aren't all that common, and we don't do anything + * with them. + * + * @param string $elm Namespace and element + */ + private function endElementModeQDesc( $elm ) { + + if ( $elm === self::NS_RDF . ' value' ) { + list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); + $this->saveValue( $ns, $tag, $this->charContent ); + + return; + } else { + array_shift( $this->mode ); + array_shift( $this->curItem ); + } + } + + /** + * Handler for hitting a closing element. + * + * generally just calls a helper function depending on what + * mode we're in. + * + * Ignores the outer wrapping elements that are optional in + * xmp and have no meaning. + * + * @param XMLParser $parser + * @param string $elm Namespace . ' ' . element name + * @throws RuntimeException + */ + function endElement( $parser, $elm ) { + if ( $elm === ( self::NS_RDF . ' RDF' ) + || $elm === 'adobe:ns:meta/ xmpmeta' + || $elm === 'adobe:ns:meta/ xapmeta' + ) { + // ignore these. + return; + } + + if ( $elm === self::NS_RDF . ' type' ) { + // these aren't really supported properly yet. + // However, it appears they almost never used. + $this->logger->info( __METHOD__ . ' encountered ' ); + } + + if ( strpos( $elm, ' ' ) === false ) { + // This probably shouldn't happen. + // However, there is a bug in an adobe product + // that forgets the namespace on some things. + // (Luckily they are unimportant things). + $this->logger->info( __METHOD__ . " Encountered which has no namespace. Skipping." ); + + return; + } + + if ( count( $this->mode[0] ) === 0 ) { + // This should never ever happen and means + // there is a pretty major bug in this class. + throw new RuntimeException( 'Encountered end element with no mode' ); + } + + if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) { + // just to be paranoid. Should always have a curItem, except for initially + // (aka during MODE_INITAL). + throw new RuntimeException( "Hit end element but no curItem" ); + } + + switch ( $this->mode[0] ) { + case self::MODE_IGNORE: + $this->endElementModeIgnore( $elm ); + break; + case self::MODE_SIMPLE: + $this->endElementModeSimple( $elm ); + break; + case self::MODE_STRUCT: + case self::MODE_SEQ: + case self::MODE_BAG: + case self::MODE_LANG: + case self::MODE_BAGSTRUCT: + $this->endElementNested( $elm ); + break; + case self::MODE_INITIAL: + if ( $elm === self::NS_RDF . ' Description' ) { + array_shift( $this->mode ); + } else { + throw new RuntimeException( 'Element ended unexpectedly while in MODE_INITIAL' ); + } + break; + case self::MODE_LI: + case self::MODE_LI_LANG: + $this->endElementModeLi( $elm ); + break; + case self::MODE_QDESC: + $this->endElementModeQDesc( $elm ); + break; + default: + $this->logger->warning( __METHOD__ . " no mode (elm = $elm)" ); + break; + } + } + + /** + * Hit an opening element while in MODE_IGNORE + * + * XMP is extensible, so ignore any tag we don't understand. + * + * Mostly ignores, unless we encounter the element that we are ignoring. + * in which case we add it to the item stack, so we can ignore things + * that are nested, correctly. + * + * @param string $elm Namespace . ' ' . tag name + */ + private function startElementModeIgnore( $elm ) { + if ( $elm === $this->curItem[0] ) { + array_unshift( $this->curItem, $elm ); + array_unshift( $this->mode, self::MODE_IGNORE ); + } + } + + /** + * Start element in MODE_BAG (unordered array) + * this should always be + * + * @param string $elm Namespace . ' ' . tag + * @throws RuntimeException If we have an element that's not + */ + private function startElementModeBag( $elm ) { + if ( $elm === self::NS_RDF . ' Bag' ) { + array_unshift( $this->mode, self::MODE_LI ); + } else { + throw new RuntimeException( "Expected but got $elm." ); + } + } + + /** + * Start element in MODE_SEQ (ordered array) + * this should always be + * + * @param string $elm Namespace . ' ' . tag + * @throws RuntimeException If we have an element that's not + */ + private function startElementModeSeq( $elm ) { + if ( $elm === self::NS_RDF . ' Seq' ) { + array_unshift( $this->mode, self::MODE_LI ); + } elseif ( $elm === self::NS_RDF . ' Bag' ) { + # bug 27105 + $this->logger->info( __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending' + . ' it is a Seq, since some buggy software is known to screw this up.' ); + array_unshift( $this->mode, self::MODE_LI ); + } else { + throw new RuntimeException( "Expected but got $elm." ); + } + } + + /** + * Start element in MODE_LANG (language alternative) + * this should always be + * + * This tag tends to be used for metadata like describe this + * picture, which can be translated into multiple languages. + * + * XMP supports non-linguistic alternative selections, + * which are really only used for thumbnails, which + * we don't care about. + * + * @param string $elm Namespace . ' ' . tag + * @throws RuntimeException If we have an element that's not + */ + private function startElementModeLang( $elm ) { + if ( $elm === self::NS_RDF . ' Alt' ) { + array_unshift( $this->mode, self::MODE_LI_LANG ); + } else { + throw new RuntimeException( "Expected but got $elm." ); + } + } + + /** + * Handle an opening element when in MODE_SIMPLE + * + * This should not happen often. This is for if a simple element + * already opened has a child element. Could happen for a + * qualified element. + * + * For example: + * 0/10 + * Bar + * + * + * This method is called when processing the element + * + * @param string $elm Namespace and tag names separated by space. + * @param array $attribs Attributes of the element. + * @throws RuntimeException + */ + private function startElementModeSimple( $elm, $attribs ) { + if ( $elm === self::NS_RDF . ' Description' ) { + // If this value has qualifiers + array_unshift( $this->mode, self::MODE_QDESC ); + array_unshift( $this->curItem, $this->curItem[0] ); + + if ( isset( $attribs[self::NS_RDF . ' value'] ) ) { + list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); + $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] ); + } + } elseif ( $elm === self::NS_RDF . ' value' ) { + // This should not be here. + throw new RuntimeException( __METHOD__ . ' Encountered where it was unexpected.' ); + } else { + // something else we don't recognize, like a qualifier maybe. + $this->logger->info( __METHOD__ . + " Encountered element <$elm> where only expecting character data as value of " . + $this->curItem[0] ); + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $elm ); + } + } + + /** + * Start an element when in MODE_QDESC. + * This generally happens when a simple element has an inner + * rdf:Description to hold qualifier elements. + * + * For example in: + * 0/10 + * Bar + * + * Called when processing the or . + * + * @param string $elm Namespace and tag name separated by a space. + * + */ + private function startElementModeQDesc( $elm ) { + if ( $elm === self::NS_RDF . ' value' ) { + return; // do nothing + } else { + // otherwise its a qualifier, which we ignore + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $elm ); + } + } + + /** + * Starting an element when in MODE_INITIAL + * This usually happens when we hit an element inside + * the outer rdf:Description + * + * This is generally where most properties start. + * + * @param string $ns Namespace + * @param string $tag Tag name (without namespace prefix) + * @param array $attribs Array of attributes + * @throws RuntimeException + */ + private function startElementModeInitial( $ns, $tag, $attribs ) { + if ( $ns !== self::NS_RDF ) { + + if ( isset( $this->items[$ns][$tag] ) ) { + if ( isset( $this->items[$ns][$tag]['structPart'] ) ) { + // If this element is supposed to appear only as + // a child of a structure, but appears here (not as + // a child of a struct), then something weird is + // happening, so ignore this element and its children. + + $this->logger->warning( "Encountered <$ns:$tag> outside" + . " of its expected parent. Ignoring." ); + + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $ns . ' ' . $tag ); + + return; + } + $mode = $this->items[$ns][$tag]['mode']; + array_unshift( $this->mode, $mode ); + array_unshift( $this->curItem, $ns . ' ' . $tag ); + if ( $mode === self::MODE_STRUCT ) { + $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] ) + ? $this->items[$ns][$tag]['map_name'] : $tag; + } + if ( $this->charContent !== false ) { + // Something weird. + // Should not happen in valid XMP. + throw new RuntimeException( 'tag nested in non-whitespace characters.' ); + } + } else { + // This element is not on our list of allowed elements so ignore. + $this->logger->debug( __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $ns . ' ' . $tag ); + + return; + } + } + // process attributes + $this->doAttribs( $attribs ); + } + + /** + * Hit an opening element when in a Struct (MODE_STRUCT) + * This is generally for fields of a compound property. + * + * Example of a struct (abbreviated; flash has more properties): + * + * True + * 1 + * + * or: + * + * True + * 1 + * + * @param string $ns Namespace + * @param string $tag Tag name (no ns) + * @param array $attribs Array of attribs w/ values. + * @throws RuntimeException + */ + private function startElementModeStruct( $ns, $tag, $attribs ) { + if ( $ns !== self::NS_RDF ) { + + if ( isset( $this->items[$ns][$tag] ) ) { + if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] ) + && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) + ) { + // This assumes that we don't have inter-namespace nesting + // which we don't in all the properties we're interested in. + throw new RuntimeException( " <$tag> appeared nested in <" . $this->ancestorStruct + . "> where it is not allowed." ); + } + array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] ); + array_unshift( $this->curItem, $ns . ' ' . $tag ); + if ( $this->charContent !== false ) { + // Something weird. + // Should not happen in valid XMP. + throw new RuntimeException( "tag <$tag> nested in non-whitespace characters (" . + $this->charContent . ")." ); + } + } else { + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $elm ); + + return; + } + } + + if ( $ns === self::NS_RDF && $tag === 'Description' ) { + $this->doAttribs( $attribs ); + array_unshift( $this->mode, self::MODE_STRUCT ); + array_unshift( $this->curItem, $this->curItem[0] ); + } + } + + /** + * opening element in MODE_LI + * process elements of arrays. + * + * Example: + * 64 + * + * This method is called when we hit the element. + * + * @param string $elm Namespace . ' ' . tagname + * @param array $attribs Attributes. (needed for BAGSTRUCTS) + * @throws RuntimeException If gets a tag other than + */ + private function startElementModeLi( $elm, $attribs ) { + if ( ( $elm ) !== self::NS_RDF . ' li' ) { + throw new RuntimeException( " expected but got $elm." ); + } + + if ( !isset( $this->mode[1] ) ) { + // This should never ever ever happen. Checking for it + // to be paranoid. + throw new RuntimeException( 'In mode Li, but no 2xPrevious mode!' ); + } + + if ( $this->mode[1] === self::MODE_BAGSTRUCT ) { + // This list item contains a compound (STRUCT) value. + array_unshift( $this->mode, self::MODE_STRUCT ); + array_unshift( $this->curItem, $elm ); + $this->processingArray = true; + + if ( !isset( $this->curItem[1] ) ) { + // be paranoid. + throw new RuntimeException( 'Can not find parent of BAGSTRUCT.' ); + } + list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] ); + $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] ) + ? $this->items[$curNS][$curTag]['map_name'] : $curTag; + + $this->doAttribs( $attribs ); + } else { + // Normal BAG or SEQ containing simple values. + array_unshift( $this->mode, self::MODE_SIMPLE ); + // need to add curItem[0] on again since one is for the specific item + // and one is for the entire group. + array_unshift( $this->curItem, $this->curItem[0] ); + $this->processingArray = true; + } + } + + /** + * Opening element in MODE_LI_LANG. + * process elements of language alternatives + * + * Example: + * My house + * + * + * This method is called when we hit the element. + * + * @param string $elm Namespace . ' ' . tag + * @param array $attribs Array of elements (most importantly xml:lang) + * @throws RuntimeException If gets a tag other than or if no xml:lang + */ + private function startElementModeLiLang( $elm, $attribs ) { + if ( $elm !== self::NS_RDF . ' li' ) { + throw new RuntimeException( __METHOD__ . " expected but got $elm." ); + } + if ( !isset( $attribs[self::NS_XML . ' lang'] ) + || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] ) + ) { + throw new RuntimeException( __METHOD__ + . " did not contain, or has invalid xml:lang attribute in lang alternative" ); + } + + // Lang is case-insensitive. + $this->itemLang = strtolower( $attribs[self::NS_XML . ' lang'] ); + + // need to add curItem[0] on again since one is for the specific item + // and one is for the entire group. + array_unshift( $this->curItem, $this->curItem[0] ); + array_unshift( $this->mode, self::MODE_SIMPLE ); + $this->processingArray = true; + } + + /** + * Hits an opening element. + * Generally just calls a helper based on what MODE we're in. + * Also does some initial set up for the wrapper element + * + * @param XMLParser $parser + * @param string $elm Namespace "" element + * @param array $attribs Attribute name => value + * @throws RuntimeException + */ + function startElement( $parser, $elm, $attribs ) { + + if ( $elm === self::NS_RDF . ' RDF' + || $elm === 'adobe:ns:meta/ xmpmeta' + || $elm === 'adobe:ns:meta/ xapmeta' + ) { + /* ignore. */ + return; + } elseif ( $elm === self::NS_RDF . ' Description' ) { + if ( count( $this->mode ) === 0 ) { + // outer rdf:desc + array_unshift( $this->mode, self::MODE_INITIAL ); + } + } elseif ( $elm === self::NS_RDF . ' type' ) { + // This doesn't support rdf:type properly. + // In practise I have yet to see a file that + // uses this element, however it is mentioned + // on page 25 of part 1 of the xmp standard. + // Also it seems as if exiv2 and exiftool do not support + // this either (That or I misunderstand the standard) + $this->logger->info( __METHOD__ . ' Encountered which isn\'t currently supported' ); + } + + if ( strpos( $elm, ' ' ) === false ) { + // This probably shouldn't happen. + $this->logger->info( __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." ); + + return; + } + + list( $ns, $tag ) = explode( ' ', $elm, 2 ); + + if ( count( $this->mode ) === 0 ) { + // This should not happen. + throw new RuntimeException( 'Error extracting XMP, ' + . "encountered <$elm> with no mode" ); + } + + switch ( $this->mode[0] ) { + case self::MODE_IGNORE: + $this->startElementModeIgnore( $elm ); + break; + case self::MODE_SIMPLE: + $this->startElementModeSimple( $elm, $attribs ); + break; + case self::MODE_INITIAL: + $this->startElementModeInitial( $ns, $tag, $attribs ); + break; + case self::MODE_STRUCT: + $this->startElementModeStruct( $ns, $tag, $attribs ); + break; + case self::MODE_BAG: + case self::MODE_BAGSTRUCT: + $this->startElementModeBag( $elm ); + break; + case self::MODE_SEQ: + $this->startElementModeSeq( $elm ); + break; + case self::MODE_LANG: + $this->startElementModeLang( $elm ); + break; + case self::MODE_LI_LANG: + $this->startElementModeLiLang( $elm, $attribs ); + break; + case self::MODE_LI: + $this->startElementModeLi( $elm, $attribs ); + break; + case self::MODE_QDESC: + $this->startElementModeQDesc( $elm ); + break; + default: + throw new RuntimeException( 'StartElement in unknown mode: ' . $this->mode[0] ); + } + } + + // @codingStandardsIgnoreStart Generic.Files.LineLength + /** + * Process attributes. + * Simple values can be stored as either a tag or attribute + * + * Often the initial "" tag just has all the simple + * properties as attributes. + * + * @par Example: + * @code + * + * @endcode + * + * @param array $attribs Array attribute=>value + * @throws RuntimeException + */ + // @codingStandardsIgnoreEnd + private function doAttribs( $attribs ) { + // first check for rdf:parseType attribute, as that can change + // how the attributes are interperted. + + if ( isset( $attribs[self::NS_RDF . ' parseType'] ) + && $attribs[self::NS_RDF . ' parseType'] === 'Resource' + && $this->mode[0] === self::MODE_SIMPLE + ) { + // this is equivalent to having an inner rdf:Description + $this->mode[0] = self::MODE_QDESC; + } + foreach ( $attribs as $name => $val ) { + if ( strpos( $name, ' ' ) === false ) { + // This shouldn't happen, but so far some old software forgets namespace + // on rdf:about. + $this->logger->info( __METHOD__ . ' Encountered non-namespaced attribute: ' + . " $name=\"$val\". Skipping. " ); + continue; + } + list( $ns, $tag ) = explode( ' ', $name, 2 ); + if ( $ns === self::NS_RDF ) { + if ( $tag === 'value' || $tag === 'resource' ) { + // resource is for url. + // value attribute is a weird way of just putting the contents. + $this->char( $this->xmlParser, $val ); + } + } elseif ( isset( $this->items[$ns][$tag] ) ) { + if ( $this->mode[0] === self::MODE_SIMPLE ) { + throw new RuntimeException( __METHOD__ + . " $ns:$tag found as attribute where not allowed" ); + } + $this->saveValue( $ns, $tag, $val ); + } else { + $this->logger->debug( __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); + } + } + } + + /** + * Given an extracted value, save it to results array + * + * note also uses $this->ancestorStruct and + * $this->processingArray to determine what name to + * save the value under. (in addition to $tag). + * + * @param string $ns Namespace of tag this is for + * @param string $tag Tag name + * @param string $val Value to save + */ + private function saveValue( $ns, $tag, $val ) { + + $info =& $this->items[$ns][$tag]; + $finalName = isset( $info['map_name'] ) + ? $info['map_name'] : $tag; + if ( isset( $info['validate'] ) ) { + if ( is_array( $info['validate'] ) ) { + $validate = $info['validate']; + } else { + $validator = new XMPValidate( $this->logger ); + $validate = [ $validator, $info['validate'] ]; + } + + if ( is_callable( $validate ) ) { + call_user_func_array( $validate, [ $info, &$val, true ] ); + // the reasoning behind using &$val instead of using the return value + // is to be consistent between here and validating structures. + if ( is_null( $val ) ) { + $this->logger->info( __METHOD__ . " <$ns:$tag> failed validation." ); + + return; + } + } else { + $this->logger->warning( __METHOD__ . " Validation function for $finalName (" + . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); + } + } + + if ( $this->ancestorStruct && $this->processingArray ) { + // Aka both an array and a struct. ( self::MODE_BAGSTRUCT ) + $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val; + } elseif ( $this->ancestorStruct ) { + $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val; + } elseif ( $this->processingArray ) { + if ( $this->itemLang === false ) { + // normal array + $this->results['xmp-' . $info['map_group']][$finalName][] = $val; + } else { + // lang array. + $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val; + } + } else { + $this->results['xmp-' . $info['map_group']][$finalName] = $val; + } + } +} diff --git a/includes/libs/xmp/XMPInfo.php b/includes/libs/xmp/XMPInfo.php new file mode 100644 index 0000000000..052be33a4b --- /dev/null +++ b/includes/libs/xmp/XMPInfo.php @@ -0,0 +1,1168 @@ + true ). + * Only used with validateClosed. + * * rangeLow and rangeHigh - Alternative to choices for numeric ranges. + * Again for validateClosed only. + * * children - For MODE_STRUCT items, allowed children. + * * structPart - Indicates that this element can only appear as a member + * of a structure. + * + * Currently this just has a bunch of EXIF values as this class is only half-done. + */ + static private $items = [ + 'http://ns.adobe.com/exif/1.0/' => [ + 'ApertureValue' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'BrightnessValue' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'CompressedBitsPerPixel' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'DigitalZoomRatio' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'ExposureBiasValue' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'ExposureIndex' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'ExposureTime' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'FlashEnergy' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational', + ], + 'FNumber' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'FocalLength' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'FocalPlaneXResolution' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'FocalPlaneYResolution' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'GPSAltitude' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational', + ], + 'GPSDestBearing' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'GPSDestDistance' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'GPSDOP' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'GPSImgDirection' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'GPSSpeed' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'GPSTrack' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'MaxApertureValue' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'ShutterSpeedValue' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + 'SubjectDistance' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ], + /* Flash */ + 'Flash' => [ + 'mode' => XMPReader::MODE_STRUCT, + 'children' => [ + 'Fired' => true, + 'Function' => true, + 'Mode' => true, + 'RedEyeMode' => true, + 'Return' => true, + ], + 'validate' => 'validateFlash', + 'map_group' => 'exif', + ], + 'Fired' => [ + 'map_group' => 'exif', + 'validate' => 'validateBoolean', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'Function' => [ + 'map_group' => 'exif', + 'validate' => 'validateBoolean', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'Mode' => [ + 'map_group' => 'exif', + 'validate' => 'validateClosed', + 'mode' => XMPReader::MODE_SIMPLE, + 'choices' => [ '0' => true, '1' => true, + '2' => true, '3' => true ], + 'structPart' => true, + ], + 'Return' => [ + 'map_group' => 'exif', + 'validate' => 'validateClosed', + 'mode' => XMPReader::MODE_SIMPLE, + 'choices' => [ '0' => true, + '2' => true, '3' => true ], + 'structPart' => true, + ], + 'RedEyeMode' => [ + 'map_group' => 'exif', + 'validate' => 'validateBoolean', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + /* End Flash */ + 'ISOSpeedRatings' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateInteger' + ], + /* end rational things */ + 'ColorSpace' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '1' => true, '65535' => true ], + ], + 'ComponentsConfiguration' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateClosed', + 'choices' => [ '1' => true, '2' => true, '3' => true, '4' => true, + '5' => true, '6' => true ] + ], + 'Contrast' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '0' => true, '1' => true, '2' => true ] + ], + 'CustomRendered' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '0' => true, '1' => true ] + ], + 'DateTimeOriginal' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ], + 'DateTimeDigitized' => [ /* xmp:CreateDate */ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ], + /* todo: there might be interesting information in + * exif:DeviceSettingDescription, but need to find an + * example + */ + 'ExifVersion' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'ExposureMode' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 2, + ], + 'ExposureProgram' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 8, + ], + 'FileSource' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '3' => true ] + ], + 'FlashpixVersion' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'FocalLengthIn35mmFilm' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ], + 'FocalPlaneResolutionUnit' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '2' => true, '3' => true ], + ], + 'GainControl' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 4, + ], + /* this value is post-processed out later */ + 'GPSAltitudeRef' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '0' => true, '1' => true ], + ], + 'GPSAreaInformation' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'GPSDestBearingRef' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ 'T' => true, 'M' => true ], + ], + 'GPSDestDistanceRef' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ 'K' => true, 'M' => true, + 'N' => true ], + ], + 'GPSDestLatitude' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateGPS', + ], + 'GPSDestLongitude' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateGPS', + ], + 'GPSDifferential' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '0' => true, '1' => true ], + ], + 'GPSImgDirectionRef' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ 'T' => true, 'M' => true ], + ], + 'GPSLatitude' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateGPS', + ], + 'GPSLongitude' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateGPS', + ], + 'GPSMapDatum' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'GPSMeasureMode' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '2' => true, '3' => true ] + ], + 'GPSProcessingMethod' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'GPSSatellites' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'GPSSpeedRef' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ 'K' => true, 'M' => true, + 'N' => true ], + ], + 'GPSStatus' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ 'A' => true, 'V' => true ] + ], + 'GPSTimeStamp' => [ + 'map_group' => 'exif', + // Note: in exif, GPSDateStamp does not include + // the time, where here it does. + 'map_name' => 'GPSDateStamp', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ], + 'GPSTrackRef' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ 'T' => true, 'M' => true ] + ], + 'GPSVersionID' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'ImageUniqueID' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'LightSource' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + /* can't use a range, as it skips... */ + 'choices' => [ '0' => true, '1' => true, + '2' => true, '3' => true, '4' => true, + '9' => true, '10' => true, '11' => true, + '12' => true, '13' => true, + '14' => true, '15' => true, + '17' => true, '18' => true, + '19' => true, '20' => true, + '21' => true, '22' => true, + '23' => true, '24' => true, + '255' => true, + ], + ], + 'MeteringMode' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 6, + 'choices' => [ '255' => true ], + ], + /* Pixel(X|Y)Dimension are rather useless, but for + * completeness since we do it with exif. + */ + 'PixelXDimension' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ], + 'PixelYDimension' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ], + 'Saturation' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 2, + ], + 'SceneCaptureType' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 3, + ], + 'SceneType' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '1' => true ], + ], + // Note, 6 is not valid SensingMethod. + 'SensingMethod' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 1, + 'rangeHigh' => 5, + 'choices' => [ '7' => true, 8 => true ], + ], + 'Sharpness' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 2, + ], + 'SpectralSensitivity' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + // This tag should perhaps be displayed to user better. + 'SubjectArea' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateInteger', + ], + 'SubjectDistanceRange' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 3, + ], + 'SubjectLocation' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateInteger', + ], + 'UserComment' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_LANG, + ], + 'WhiteBalance' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '0' => true, '1' => true ] + ], + ], + 'http://ns.adobe.com/tiff/1.0/' => [ + 'Artist' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'BitsPerSample' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateInteger', + ], + 'Compression' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '1' => true, '6' => true ], + ], + /* this prop should not be used in XMP. dc:rights is the correct prop */ + 'Copyright' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_LANG, + ], + 'DateTime' => [ /* proper prop is xmp:ModifyDate */ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ], + 'ImageDescription' => [ /* proper one is dc:description */ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_LANG, + ], + 'ImageLength' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ], + 'ImageWidth' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ], + 'Make' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'Model' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + /**** Do not extract this property + * It interferes with auto exif rotation. + * 'Orientation' => array( + * 'map_group' => 'exif', + * 'mode' => XMPReader::MODE_SIMPLE, + * 'validate' => 'validateClosed', + * 'choices' => array( '1' => true, '2' => true, '3' => true, '4' => true, 5 => true, + * '6' => true, '7' => true, '8' => true ), + *), + ******/ + 'PhotometricInterpretation' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '2' => true, '6' => true ], + ], + 'PlanerConfiguration' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '1' => true, '2' => true ], + ], + 'PrimaryChromaticities' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateRational', + ], + 'ReferenceBlackWhite' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateRational', + ], + 'ResolutionUnit' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '2' => true, '3' => true ], + ], + 'SamplesPerPixel' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ], + 'Software' => [ /* see xmp:CreatorTool */ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + /* ignore TransferFunction */ + 'WhitePoint' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateRational', + ], + 'XResolution' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational', + ], + 'YResolution' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational', + ], + 'YCbCrCoefficients' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateRational', + ], + 'YCbCrPositioning' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => [ '1' => true, '2' => true ], + ], + /******** + * Disable extracting this property (bug 31944) + * Several files have a string instead of a Seq + * for this property. XMPReader doesn't handle + * mismatched types very gracefully (it marks + * the entire file as invalid, instead of just + * the relavent prop). Since this prop + * doesn't communicate all that useful information + * just disable this prop for now, until such + * XMPReader is more graceful (bug 32172) + * 'YCbCrSubSampling' => array( + * 'map_group' => 'exif', + * 'mode' => XMPReader::MODE_SEQ, + * 'validate' => 'validateClosed', + * 'choices' => array( '1' => true, '2' => true ), + * ), + */ + ], + 'http://ns.adobe.com/exif/1.0/aux/' => [ + 'Lens' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'SerialNumber' => [ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'OwnerName' => [ + 'map_group' => 'exif', + 'map_name' => 'CameraOwnerName', + 'mode' => XMPReader::MODE_SIMPLE, + ], + ], + 'http://purl.org/dc/elements/1.1/' => [ + 'title' => [ + 'map_group' => 'general', + 'map_name' => 'ObjectName', + 'mode' => XMPReader::MODE_LANG + ], + 'description' => [ + 'map_group' => 'general', + 'map_name' => 'ImageDescription', + 'mode' => XMPReader::MODE_LANG + ], + 'contributor' => [ + 'map_group' => 'general', + 'map_name' => 'dc-contributor', + 'mode' => XMPReader::MODE_BAG + ], + 'coverage' => [ + 'map_group' => 'general', + 'map_name' => 'dc-coverage', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'creator' => [ + 'map_group' => 'general', + 'map_name' => 'Artist', // map with exif Artist, iptc byline (2:80) + 'mode' => XMPReader::MODE_SEQ, + ], + 'date' => [ + 'map_group' => 'general', + // Note, not mapped with other date properties, as this type of date is + // non-specific: "A point or period of time associated with an event in + // the lifecycle of the resource" + 'map_name' => 'dc-date', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateDate', + ], + /* Do not extract dc:format, as we've got better ways to determine MIME type */ + 'identifier' => [ + 'map_group' => 'deprecated', + 'map_name' => 'Identifier', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'language' => [ + 'map_group' => 'general', + 'map_name' => 'LanguageCode', /* mapped with iptc 2:135 */ + 'mode' => XMPReader::MODE_BAG, + 'validate' => 'validateLangCode', + ], + 'publisher' => [ + 'map_group' => 'general', + 'map_name' => 'dc-publisher', + 'mode' => XMPReader::MODE_BAG, + ], + // for related images/resources + 'relation' => [ + 'map_group' => 'general', + 'map_name' => 'dc-relation', + 'mode' => XMPReader::MODE_BAG, + ], + 'rights' => [ + 'map_group' => 'general', + 'map_name' => 'Copyright', + 'mode' => XMPReader::MODE_LANG, + ], + // Note: source is not mapped with iptc source, since iptc + // source describes the source of the image in terms of a person + // who provided the image, where this is to describe an image that the + // current one is based on. + 'source' => [ + 'map_group' => 'general', + 'map_name' => 'dc-source', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'subject' => [ + 'map_group' => 'general', + 'map_name' => 'Keywords', /* maps to iptc 2:25 */ + 'mode' => XMPReader::MODE_BAG, + ], + 'type' => [ + 'map_group' => 'general', + 'map_name' => 'dc-type', + 'mode' => XMPReader::MODE_BAG, + ], + ], + 'http://ns.adobe.com/xap/1.0/' => [ + 'CreateDate' => [ + 'map_group' => 'general', + 'map_name' => 'DateTimeDigitized', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ], + 'CreatorTool' => [ + 'map_group' => 'general', + 'map_name' => 'Software', + 'mode' => XMPReader::MODE_SIMPLE + ], + 'Identifier' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + ], + 'Label' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'ModifyDate' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'DateTime', + 'validate' => 'validateDate', + ], + 'MetadataDate' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + // map_name to be consistent with other date names. + 'map_name' => 'DateTimeMetadata', + 'validate' => 'validateDate', + ], + 'Nickname' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'Rating' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRating', + ], + ], + 'http://ns.adobe.com/xap/1.0/rights/' => [ + 'Certificate' => [ + 'map_group' => 'general', + 'map_name' => 'RightsCertificate', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'Marked' => [ + 'map_group' => 'general', + 'map_name' => 'Copyrighted', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateBoolean', + ], + 'Owner' => [ + 'map_group' => 'general', + 'map_name' => 'CopyrightOwner', + 'mode' => XMPReader::MODE_BAG, + ], + // this seems similar to dc:rights. + 'UsageTerms' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_LANG, + ], + 'WebStatement' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + ], + // XMP media management. + 'http://ns.adobe.com/xap/1.0/mm/' => [ + // if we extract the exif UniqueImageID, might + // as well do this too. + 'OriginalDocumentID' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + // It might also be useful to do xmpMM:LastURL + // and xmpMM:DerivedFrom as you can potentially, + // get the url of this document/source for this + // document. However whats more likely is you'd + // get a file:// url for the path of the doc, + // which is somewhat of a privacy issue. + ], + 'http://creativecommons.org/ns#' => [ + 'license' => [ + 'map_name' => 'LicenseUrl', + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'morePermissions' => [ + 'map_name' => 'MorePermissionsUrl', + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'attributionURL' => [ + 'map_group' => 'general', + 'map_name' => 'AttributionUrl', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'attributionName' => [ + 'map_group' => 'general', + 'map_name' => 'PreferredAttributionName', + 'mode' => XMPReader::MODE_SIMPLE, + ], + ], + // Note, this property affects how jpeg metadata is extracted. + 'http://ns.adobe.com/xmp/note/' => [ + 'HasExtendedXMP' => [ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + ], + ], + /* Note, in iptc schemas, the legacy properties are denoted + * as deprecated, since other properties should used instead, + * and properties marked as deprecated in the standard are + * are marked as general here as they don't have replacements + */ + 'http://ns.adobe.com/photoshop/1.0/' => [ + 'City' => [ + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'CityDest', + ], + 'Country' => [ + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'CountryDest', + ], + 'State' => [ + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'ProvinceOrStateDest', + ], + 'DateCreated' => [ + 'map_group' => 'deprecated', + // marking as deprecated as the xmp prop preferred + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'DateTimeOriginal', + 'validate' => 'validateDate', + // note this prop is an XMP, not IPTC date + ], + 'CaptionWriter' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'Writer', + ], + 'Instructions' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'SpecialInstructions', + ], + 'TransmissionReference' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'OriginalTransmissionRef', + ], + 'AuthorsPosition' => [ + /* This corresponds with 2:85 + * By-line Title, which needs to be + * handled weirdly to correspond + * with iptc/exif. */ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE + ], + 'Credit' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'Source' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'Urgency' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'Category' => [ + // Note, this prop is deprecated, but in general + // group since it doesn't have a replacement. + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'iimCategory', + ], + 'SupplementalCategories' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + 'map_name' => 'iimSupplementalCategory', + ], + 'Headline' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE + ], + ], + 'http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/' => [ + 'CountryCode' => [ + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'CountryCodeDest', + ], + 'IntellectualGenre' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + // Note, this is a six digit code. + // See: http://cv.iptc.org/newscodes/scene/ + // Since these aren't really all that common, + // we just show the number. + 'Scene' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + 'validate' => 'validateInteger', + 'map_name' => 'SceneCode', + ], + /* Note: SubjectCode should be an 8 ascii digits. + * it is not really an integer (has leading 0's, + * cannot have a +/- sign), but validateInteger + * will let it through. + */ + 'SubjectCode' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + 'map_name' => 'SubjectNewsCode', + 'validate' => 'validateInteger' + ], + 'Location' => [ + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'SublocationDest', + ], + 'CreatorContactInfo' => [ + /* Note this maps to 2:118 in iim + * (Contact) field. However those field + * types are slightly different - 2:118 + * is free form text field, where this + * is more structured. + */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_STRUCT, + 'map_name' => 'Contact', + 'children' => [ + 'CiAdrExtadr' => true, + 'CiAdrCity' => true, + 'CiAdrCtry' => true, + 'CiEmailWork' => true, + 'CiTelWork' => true, + 'CiAdrPcode' => true, + 'CiAdrRegion' => true, + 'CiUrlWork' => true, + ], + ], + 'CiAdrExtadr' => [ /* address */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'CiAdrCity' => [ /* city */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'CiAdrCtry' => [ /* country */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'CiEmailWork' => [ /* email (possibly separated by ',') */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'CiTelWork' => [ /* telephone */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'CiAdrPcode' => [ /* postal code */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'CiAdrRegion' => [ /* province/state */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'CiUrlWork' => [ /* url. Multiple may be separated by comma. */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + /* End contact info struct properties */ + ], + 'http://iptc.org/std/Iptc4xmpExt/2008-02-29/' => [ + 'Event' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ], + 'OrganisationInImageName' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + 'map_name' => 'OrganisationInImage' + ], + 'PersonInImage' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + ], + 'MaxAvailHeight' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + 'map_name' => 'OriginalImageHeight', + ], + 'MaxAvailWidth' => [ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + 'map_name' => 'OriginalImageWidth', + ], + // LocationShown and LocationCreated are handled + // specially because they are hierarchical, but we + // also want to merge with the old non-hierarchical. + 'LocationShown' => [ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_BAGSTRUCT, + 'children' => [ + 'WorldRegion' => true, + 'CountryCode' => true, /* iso code */ + 'CountryName' => true, + 'ProvinceState' => true, + 'City' => true, + 'Sublocation' => true, + ], + ], + 'LocationCreated' => [ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_BAGSTRUCT, + 'children' => [ + 'WorldRegion' => true, + 'CountryCode' => true, /* iso code */ + 'CountryName' => true, + 'ProvinceState' => true, + 'City' => true, + 'Sublocation' => true, + ], + ], + 'WorldRegion' => [ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'CountryCode' => [ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'CountryName' => [ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + 'map_name' => 'Country', + ], + 'ProvinceState' => [ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + 'map_name' => 'ProvinceOrState', + ], + 'City' => [ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + 'Sublocation' => [ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart' => true, + ], + + /* Other props that might be interesting but + * Not currently extracted: + * ArtworkOrObject, (info about objects in picture) + * DigitalSourceType + * RegistryId + */ + ], + + /* Plus props we might want to consider: + * (Note: some of these have unclear/incomplete definitions + * from the iptc4xmp standard). + * ImageSupplier (kind of like iptc source field) + * ImageSupplierId (id code for image from supplier) + * CopyrightOwner + * ImageCreator + * Licensor + * Various model release fields + * Property release fields. + */ + ]; +} diff --git a/includes/libs/xmp/XMPValidate.php b/includes/libs/xmp/XMPValidate.php new file mode 100644 index 0000000000..32a3340fa1 --- /dev/null +++ b/includes/libs/xmp/XMPValidate.php @@ -0,0 +1,400 @@ +setLogger( $logger ); + } + + public function setLogger( LoggerInterface $logger ) { + $this->logger = $logger; + } + /** + * Function to validate boolean properties ( True or False ) + * + * @param array $info Information about current property + * @param mixed &$val Current value to validate + * @param bool $standalone If this is a simple property or array + */ + public function validateBoolean( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( $val !== 'True' && $val !== 'False' ) { + $this->logger->info( __METHOD__ . " Expected True or False but got $val" ); + $val = null; + } + } + + /** + * function to validate rational properties ( 12/10 ) + * + * @param array $info Information about current property + * @param mixed &$val Current value to validate + * @param bool $standalone If this is a simple property or array + */ + public function validateRational( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( !preg_match( '/^(?:-?\d+)\/(?:\d+[1-9]|[1-9]\d*)$/D', $val ) ) { + $this->logger->info( __METHOD__ . " Expected rational but got $val" ); + $val = null; + } + } + + /** + * function to validate rating properties -1, 0-5 + * + * if its outside of range put it into range. + * + * @see MWG spec + * @param array $info Information about current property + * @param mixed &$val Current value to validate + * @param bool $standalone If this is a simple property or array + */ + public function validateRating( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( !preg_match( '/^[-+]?\d*(?:\.?\d*)$/D', $val ) + || !is_numeric( $val ) + ) { + $this->logger->info( __METHOD__ . " Expected rating but got $val" ); + $val = null; + + return; + } else { + $nVal = (float)$val; + if ( $nVal < 0 ) { + // We do < 0 here instead of < -1 here, since + // the values between 0 and -1 are also illegal + // as -1 is meant as a special reject rating. + $this->logger->info( __METHOD__ . " Rating too low, setting to -1 (Rejected)" ); + $val = '-1'; + + return; + } + if ( $nVal > 5 ) { + $this->logger->info( __METHOD__ . " Rating too high, setting to 5" ); + $val = '5'; + + return; + } + } + } + + /** + * function to validate integers + * + * @param array $info Information about current property + * @param mixed &$val Current value to validate + * @param bool $standalone If this is a simple property or array + */ + public function validateInteger( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( !preg_match( '/^[-+]?\d+$/D', $val ) ) { + $this->logger->info( __METHOD__ . " Expected integer but got $val" ); + $val = null; + } + } + + /** + * function to validate properties with a fixed number of allowed + * choices. (closed choice) + * + * @param array $info Information about current property + * @param mixed &$val Current value to validate + * @param bool $standalone If this is a simple property or array + */ + public function validateClosed( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + + // check if its in a numeric range + $inRange = false; + if ( isset( $info['rangeLow'] ) + && isset( $info['rangeHigh'] ) + && is_numeric( $val ) + && ( intval( $val ) <= $info['rangeHigh'] ) + && ( intval( $val ) >= $info['rangeLow'] ) + ) { + $inRange = true; + } + + if ( !isset( $info['choices'][$val] ) && !$inRange ) { + $this->logger->info( __METHOD__ . " Expected closed choice, but got $val" ); + $val = null; + } + } + + /** + * function to validate and modify flash structure + * + * @param array $info Information about current property + * @param mixed &$val Current value to validate + * @param bool $standalone If this is a simple property or array + */ + public function validateFlash( $info, &$val, $standalone ) { + if ( $standalone ) { + // this only validates flash structs, not individual properties + return; + } + if ( !( isset( $val['Fired'] ) + && isset( $val['Function'] ) + && isset( $val['Mode'] ) + && isset( $val['RedEyeMode'] ) + && isset( $val['Return'] ) + ) ) { + $this->logger->info( __METHOD__ . " Flash structure did not have all the required components" ); + $val = null; + } else { + $val = ( "\0" | ( $val['Fired'] === 'True' ) + | ( intval( $val['Return'] ) << 1 ) + | ( intval( $val['Mode'] ) << 3 ) + | ( ( $val['Function'] === 'True' ) << 5 ) + | ( ( $val['RedEyeMode'] === 'True' ) << 6 ) ); + } + } + + /** + * function to validate LangCode properties ( en-GB, etc ) + * + * This is just a naive check to make sure it somewhat looks like a lang code. + * + * @see BCP 47 + * @see https://wwwimages2.adobe.com/content/dam/Adobe/en/devnet/xmp/pdfs/ + * XMP%20SDK%20Release%20cc-2014-12/XMPSpecificationPart1.pdf page 22 (section 8.2.2.4) + * + * @param array $info Information about current property + * @param mixed &$val Current value to validate + * @param bool $standalone If this is a simple property or array + */ + public function validateLangCode( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $val ) ) { + // this is a rather naive check. + $this->logger->info( __METHOD__ . " Expected Lang code but got $val" ); + $val = null; + } + } + + /** + * function to validate date properties, and convert to (partial) Exif format. + * + * Dates can be one of the following formats: + * YYYY + * YYYY-MM + * YYYY-MM-DD + * YYYY-MM-DDThh:mmTZD + * YYYY-MM-DDThh:mm:ssTZD + * YYYY-MM-DDThh:mm:ss.sTZD + * + * @param array $info Information about current property + * @param mixed &$val Current value to validate. Converts to TS_EXIF as a side-effect. + * in cases where there's only a partial date, it will give things like + * 2011:04. + * @param bool $standalone If this is a simple property or array + */ + public function validateDate( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + $res = []; + // @codingStandardsIgnoreStart Long line that cannot be broken + if ( !preg_match( + /* ahh! scary regex... */ + '/^([0-3]\d{3})(?:-([01]\d)(?:-([0-3]\d)(?:T([0-2]\d):([0-6]\d)(?::([0-6]\d)(?:\.\d+)?)?([-+]\d{2}:\d{2}|Z)?)?)?)?$/D', + $val, $res ) + ) { + // @codingStandardsIgnoreEnd + + $this->logger->info( __METHOD__ . " Expected date but got $val" ); + $val = null; + } else { + /* + * $res is formatted as follows: + * 0 -> full date. + * 1 -> year, 2-> month, 3-> day, 4-> hour, 5-> minute, 6->second + * 7-> Timezone specifier (Z or something like +12:30 ) + * many parts are optional, some aren't. For example if you specify + * minute, you must specify hour, day, month, and year but not second or TZ. + */ + + /* + * First of all, if year = 0000, Something is wrongish, + * so don't extract. This seems to happen when + * some programs convert between metadata formats. + */ + if ( $res[1] === '0000' ) { + $this->logger->info( __METHOD__ . " Invalid date (year 0): $val" ); + $val = null; + + return; + } + + if ( !isset( $res[4] ) ) { // hour + // just have the year month day (if that) + $val = $res[1]; + if ( isset( $res[2] ) ) { + $val .= ':' . $res[2]; + } + if ( isset( $res[3] ) ) { + $val .= ':' . $res[3]; + } + + return; + } + + if ( !isset( $res[7] ) || $res[7] === 'Z' ) { + // if hour is set, then minute must also be or regex above will fail. + $val = $res[1] . ':' . $res[2] . ':' . $res[3] + . ' ' . $res[4] . ':' . $res[5]; + if ( isset( $res[6] ) && $res[6] !== '' ) { + $val .= ':' . $res[6]; + } + + return; + } + + // Extra check for empty string necessary due to TZ but no second case. + $stripSeconds = false; + if ( !isset( $res[6] ) || $res[6] === '' ) { + $res[6] = '00'; + $stripSeconds = true; + } + + // Do timezone processing. We've already done the case that tz = Z. + + // We know that if we got to this step, year, month day hour and min must be set + // by virtue of regex not failing. + + $unix = ( new ConvertableTimestamp( + $res[1] . $res[2] . $res[3] . $res[4] . $res[5] . $res[6] + ) )->getTimestamp( TS_UNIX ); + $offset = intval( substr( $res[7], 1, 2 ) ) * 60 * 60; + $offset += intval( substr( $res[7], 4, 2 ) ) * 60; + if ( substr( $res[7], 0, 1 ) === '-' ) { + $offset = -$offset; + } + $val = ( new ConvertableTimestamp( $unix + $offset ) )->getTimestamp( TS_EXIF ); + + if ( $stripSeconds ) { + // If seconds weren't specified, remove the trailing ':00'. + $val = substr( $val, 0, -3 ); + } + } + } + + /** function to validate, and more importantly + * translate the XMP DMS form of gps coords to + * the decimal form we use. + * + * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf + * section 1.2.7.4 on page 23 + * + * @param array $info Unused (info about prop) + * @param string &$val GPS string in either DDD,MM,SSk or + * or DDD,MM.mmk form + * @param bool $standalone If its a simple prop (should always be true) + */ + public function validateGPS( $info, &$val, $standalone ) { + if ( !$standalone ) { + return; + } + + $m = []; + if ( preg_match( + '/(\d{1,3}),(\d{1,2}),(\d{1,2})([NWSE])/D', + $val, $m ) + ) { + $coord = intval( $m[1] ); + $coord += intval( $m[2] ) * ( 1 / 60 ); + $coord += intval( $m[3] ) * ( 1 / 3600 ); + if ( $m[4] === 'S' || $m[4] === 'W' ) { + $coord = -$coord; + } + $val = $coord; + + return; + } elseif ( preg_match( + '/(\d{1,3}),(\d{1,2}(?:.\d*)?)([NWSE])/D', + $val, $m ) + ) { + $coord = intval( $m[1] ); + $coord += floatval( $m[2] ) * ( 1 / 60 ); + if ( $m[3] === 'S' || $m[3] === 'W' ) { + $coord = -$coord; + } + $val = $coord; + + return; + } else { + $this->logger->info( __METHOD__ + . " Expected GPSCoordinate, but got $val." ); + $val = null; + + return; + } + } +} diff --git a/includes/media/XMP.php b/includes/media/XMP.php deleted file mode 100644 index 70f67b7858..0000000000 --- a/includes/media/XMP.php +++ /dev/null @@ -1,1383 +0,0 @@ -setLogger( $logger ); - } else { - $this->setLogger( new NullLogger() ); - } - - $this->items = XMPInfo::getItems(); - - $this->resetXMLParser(); - } - - public function setLogger( LoggerInterface $logger ) { - $this->logger = $logger; - } - - /** - * free the XML parser. - * - * @note It is unclear to me if we really need to do this ourselves - * or if php garbage collection will automatically free the xmlParser - * when it is no longer needed. - */ - private function destroyXMLParser() { - if ( $this->xmlParser ) { - xml_parser_free( $this->xmlParser ); - $this->xmlParser = null; - } - } - - /** - * Main use is if a single item has multiple xmp documents describing it. - * For example in jpeg's with extendedXMP - */ - private function resetXMLParser() { - - $this->destroyXMLParser(); - - $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' ); - xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 ); - xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 ); - - xml_set_element_handler( $this->xmlParser, - [ $this, 'startElement' ], - [ $this, 'endElement' ] ); - - xml_set_character_data_handler( $this->xmlParser, [ $this, 'char' ] ); - - $this->parsable = self::PARSABLE_UNKNOWN; - $this->xmlParsableBuffer = ''; - } - - /** - * Check if this instance supports using this class - */ - public static function isSupported() { - return function_exists( 'xml_parser_create_ns' ) && class_exists( 'XMLReader' ); - } - - /** Get the result array. Do some post-processing before returning - * the array, and transform any metadata that is special-cased. - * - * @return array Array of results as an array of arrays suitable for - * FormatMetadata::getFormattedData(). - */ - public function getResults() { - // xmp-special is for metadata that affects how stuff - // is extracted. For example xmpNote:HasExtendedXMP. - - // It is also used to handle photoshop:AuthorsPosition - // which is weird and really part of another property, - // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard. - // The location fields also use it. - - $data = $this->results; - - if ( isset( $data['xmp-special']['AuthorsPosition'] ) - && is_string( $data['xmp-special']['AuthorsPosition'] ) - && isset( $data['xmp-general']['Artist'][0] ) - ) { - // Note, if there is more than one creator, - // this only applies to first. This also will - // only apply to the dc:Creator prop, not the - // exif:Artist prop. - - $data['xmp-general']['Artist'][0] = - $data['xmp-special']['AuthorsPosition'] . ', ' - . $data['xmp-general']['Artist'][0]; - } - - // Go through the LocationShown and LocationCreated - // changing it to the non-hierarchal form used by - // the other location fields. - - if ( isset( $data['xmp-special']['LocationShown'][0] ) - && is_array( $data['xmp-special']['LocationShown'][0] ) - ) { - // the is_array is just paranoia. It should always - // be an array. - foreach ( $data['xmp-special']['LocationShown'] as $loc ) { - if ( !is_array( $loc ) ) { - // To avoid copying over the _type meta-fields. - continue; - } - foreach ( $loc as $field => $val ) { - $data['xmp-general'][$field . 'Dest'][] = $val; - } - } - } - if ( isset( $data['xmp-special']['LocationCreated'][0] ) - && is_array( $data['xmp-special']['LocationCreated'][0] ) - ) { - // the is_array is just paranoia. It should always - // be an array. - foreach ( $data['xmp-special']['LocationCreated'] as $loc ) { - if ( !is_array( $loc ) ) { - // To avoid copying over the _type meta-fields. - continue; - } - foreach ( $loc as $field => $val ) { - $data['xmp-general'][$field . 'Created'][] = $val; - } - } - } - - // We don't want to return the special values, since they're - // special and not info to be stored about the file. - unset( $data['xmp-special'] ); - - // Convert GPSAltitude to negative if below sea level. - if ( isset( $data['xmp-exif']['GPSAltitudeRef'] ) - && isset( $data['xmp-exif']['GPSAltitude'] ) - ) { - - // Must convert to a real before multiplying by -1 - // XMPValidate guarantees there will always be a '/' in this value. - list( $nom, $denom ) = explode( '/', $data['xmp-exif']['GPSAltitude'] ); - $data['xmp-exif']['GPSAltitude'] = $nom / $denom; - - if ( $data['xmp-exif']['GPSAltitudeRef'] == '1' ) { - $data['xmp-exif']['GPSAltitude'] *= -1; - } - unset( $data['xmp-exif']['GPSAltitudeRef'] ); - } - - return $data; - } - - /** - * Main function to call to parse XMP. Use getResults to - * get results. - * - * Also catches any errors during processing, writes them to - * debug log, blanks result array and returns false. - * - * @param string $content XMP data - * @param bool $allOfIt If this is all the data (true) or if its split up (false). Default true - * @throws RuntimeException - * @return bool Success. - */ - public function parse( $content, $allOfIt = true ) { - if ( !$this->xmlParser ) { - $this->resetXMLParser(); - } - try { - - // detect encoding by looking for BOM which is supposed to be in processing instruction. - // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf - if ( !$this->charset ) { - $bom = []; - if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/', - $content, $bom ) - ) { - switch ( $bom[0] ) { - case "\xFE\xFF": - $this->charset = 'UTF-16BE'; - break; - case "\xFF\xFE": - $this->charset = 'UTF-16LE'; - break; - case "\x00\x00\xFE\xFF": - $this->charset = 'UTF-32BE'; - break; - case "\xFF\xFE\x00\x00": - $this->charset = 'UTF-32LE'; - break; - case "\xEF\xBB\xBF": - $this->charset = 'UTF-8'; - break; - default: - // this should be impossible to get to - throw new RuntimeException( "Invalid BOM" ); - } - } else { - // standard specifically says, if no bom assume utf-8 - $this->charset = 'UTF-8'; - } - } - if ( $this->charset !== 'UTF-8' ) { - // don't convert if already utf-8 - MediaWiki\suppressWarnings(); - $content = iconv( $this->charset, 'UTF-8//IGNORE', $content ); - MediaWiki\restoreWarnings(); - } - - // Ensure the XMP block does not have an xml doctype declaration, which - // could declare entities unsafe to parse with xml_parse (T85848/T71210). - if ( $this->parsable !== self::PARSABLE_OK ) { - if ( $this->parsable === self::PARSABLE_NO ) { - throw new RuntimeException( 'Unsafe doctype declaration in XML.' ); - } - - $content = $this->xmlParsableBuffer . $content; - if ( !$this->checkParseSafety( $content ) ) { - if ( !$allOfIt && $this->parsable !== self::PARSABLE_NO ) { - // parse wasn't Unsuccessful yet, so return true - // in this case. - return true; - } - $msg = ( $this->parsable === self::PARSABLE_NO ) ? - 'Unsafe doctype declaration in XML.' : - 'No root element found in XML.'; - throw new RuntimeException( $msg ); - } - } - - $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); - if ( !$ok ) { - $code = xml_get_error_code( $this->xmlParser ); - $error = xml_error_string( $code ); - $line = xml_get_current_line_number( $this->xmlParser ); - $col = xml_get_current_column_number( $this->xmlParser ); - $offset = xml_get_current_byte_index( $this->xmlParser ); - - $this->logger->warning( - '{method} : Error reading XMP content: {error} ' . - '(line: {line} column: {column} byte offset: {offset})', - [ - 'method' => __METHOD__, - 'error_code' => $code, - 'error' => $error, - 'line' => $line, - 'column' => $col, - 'offset' => $offset, - 'content' => $content, - ] ); - $this->results = []; // blank if error. - $this->destroyXMLParser(); - return false; - } - } catch ( Exception $e ) { - $this->logger->warning( - '{method} Exception caught while parsing: ' . $e->getMessage(), - [ - 'method' => __METHOD__, - 'exception' => $e, - 'content' => $content, - ] - ); - $this->results = []; - return false; - } - if ( $allOfIt ) { - $this->destroyXMLParser(); - } - - return true; - } - - /** Entry point for XMPExtended blocks in jpeg files - * - * @todo In serious need of testing - * @see http://www.adobe.ge/devnet/xmp/pdfs/XMPSpecificationPart3.pdf XMP spec part 3 page 20 - * @param string $content XMPExtended block minus the namespace signature - * @return bool If it succeeded. - */ - public function parseExtended( $content ) { - // @todo FIXME: This is untested. Hard to find example files - // or programs that make such files.. - $guid = substr( $content, 0, 32 ); - if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] ) - || $this->results['xmp-special']['HasExtendedXMP'] !== $guid - ) { - $this->logger->info( __METHOD__ . - " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" ); - - return false; - } - $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) ); - - if ( !$len || - $len['length'] < 4 || - $len['offset'] < 0 || - $len['offset'] > $len['length'] - ) { - $this->logger->info( - __METHOD__ . 'Error reading extended XMP block, invalid length or offset.' - ); - - return false; - } - - // we're not very robust here. we should accept it in the wrong order. - // To quote the XMP standard: - // "A JPEG writer should write the ExtendedXMP marker segments in order, - // immediately following the StandardXMP. However, the JPEG standard - // does not require preservation of marker segment order. A robust JPEG - // reader should tolerate the marker segments in any order." - // On the other hand, the probability that an image will have more than - // 128k of metadata is rather low... so the probability that it will have - // > 128k, and be in the wrong order is very low... - - if ( $len['offset'] !== $this->extendedXMPOffset ) { - $this->logger->info( __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was ' - . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')' ); - - return false; - } - - if ( $len['offset'] === 0 ) { - // if we're starting the extended block, we've probably already - // done the XMPStandard block, so reset. - $this->resetXMLParser(); - } - - $this->extendedXMPOffset += $len['length']; - - $actualContent = substr( $content, 40 ); - - if ( $this->extendedXMPOffset === strlen( $actualContent ) ) { - $atEnd = true; - } else { - $atEnd = false; - } - - $this->logger->debug( __METHOD__ . 'Parsing a XMPExtended block' ); - - return $this->parse( $actualContent, $atEnd ); - } - - /** - * Character data handler - * Called whenever character data is found in the xmp document. - * - * does nothing if we're in MODE_IGNORE or if the data is whitespace - * throws an error if we're not in MODE_SIMPLE (as we're not allowed to have character - * data in the other modes). - * - * As an example, this happens when we encounter XMP like: - * 0/10 - * and are processing the 0/10 bit. - * - * @param XMLParser $parser XMLParser reference to the xml parser - * @param string $data Character data - * @throws RuntimeException On invalid data - */ - function char( $parser, $data ) { - - $data = trim( $data ); - if ( trim( $data ) === "" ) { - return; - } - - if ( !isset( $this->mode[0] ) ) { - throw new RuntimeException( 'Unexpected character data before first rdf:Description element' ); - } - - if ( $this->mode[0] === self::MODE_IGNORE ) { - return; - } - - if ( $this->mode[0] !== self::MODE_SIMPLE - && $this->mode[0] !== self::MODE_QDESC - ) { - throw new RuntimeException( 'character data where not expected. (mode ' . $this->mode[0] . ')' ); - } - - // to check, how does this handle w.s. - if ( $this->charContent === false ) { - $this->charContent = $data; - } else { - $this->charContent .= $data; - } - } - - /** - * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't - * contain a doctype declaration which could contain a dos attack if we - * parse it and expand internal entities (T85848). - * - * @param string $content xml string to check for parse safety - * @return bool true if the xml is safe to parse, false otherwise - */ - private function checkParseSafety( $content ) { - $reader = new XMLReader(); - $result = null; - - // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed - // instead of using XML(). - $reader->open( - 'data://text/plain,' . urlencode( $content ), - null, - LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET - ); - - $oldDisable = libxml_disable_entity_loader( true ); - /** @noinspection PhpUnusedLocalVariableInspection */ - $reset = new ScopedCallback( - 'libxml_disable_entity_loader', - [ $oldDisable ] - ); - $reader->setParserProperty( XMLReader::SUBST_ENTITIES, false ); - - // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning - // when parsing truncated XML, which causes unit tests to fail. - MediaWiki\suppressWarnings(); - while ( $reader->read() ) { - if ( $reader->nodeType === XMLReader::ELEMENT ) { - // Reached the first element without hitting a doctype declaration - $this->parsable = self::PARSABLE_OK; - $result = true; - break; - } - if ( $reader->nodeType === XMLReader::DOC_TYPE ) { - $this->parsable = self::PARSABLE_NO; - $result = false; - break; - } - } - MediaWiki\restoreWarnings(); - - if ( !is_null( $result ) ) { - return $result; - } - - // Reached the end of the parsable xml without finding an element - // or doctype. Buffer and try again. - $this->parsable = self::PARSABLE_BUFFERING; - $this->xmlParsableBuffer = $content; - return false; - } - - /** When we hit a closing element in MODE_IGNORE - * Check to see if this is the element we started to ignore, - * in which case we get out of MODE_IGNORE - * - * @param string $elm Namespace of element followed by a space and then tag name of element. - */ - private function endElementModeIgnore( $elm ) { - if ( $this->curItem[0] === $elm ) { - array_shift( $this->curItem ); - array_shift( $this->mode ); - } - } - - /** - * Hit a closing element when in MODE_SIMPLE. - * This generally means that we finished processing a - * property value, and now have to save the result to the - * results array - * - * For example, when processing: - * 0/10 - * this deals with when we hit . - * - * Or it could be if we hit the end element of a property - * of a compound data structure (like a member of an array). - * - * @param string $elm Namespace, space, and tag name. - */ - private function endElementModeSimple( $elm ) { - if ( $this->charContent !== false ) { - if ( $this->processingArray ) { - // if we're processing an array, use the original element - // name instead of rdf:li. - list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); - } else { - list( $ns, $tag ) = explode( ' ', $elm, 2 ); - } - $this->saveValue( $ns, $tag, $this->charContent ); - - $this->charContent = false; // reset - } - array_shift( $this->curItem ); - array_shift( $this->mode ); - } - - /** - * Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG - * generally means we've finished processing a nested structure. - * resets some internal variables to indicate that. - * - * Note this means we hit the closing element not the "". - * - * @par For example, when processing: - * @code{,xml} - * 64 - * - * @endcode - * - * This method is called when we hit the "" tag. - * - * @param string $elm Namespace . space . tag name. - * @throws RuntimeException - */ - private function endElementNested( $elm ) { - - /* cur item must be the same as $elm, unless if in MODE_STRUCT - in which case it could also be rdf:Description */ - if ( $this->curItem[0] !== $elm - && !( $elm === self::NS_RDF . ' Description' - && $this->mode[0] === self::MODE_STRUCT ) - ) { - throw new RuntimeException( "nesting mismatch. got a but expected a curItem[0] . '>' ); - } - - // Validate structures. - list( $ns, $tag ) = explode( ' ', $elm, 2 ); - if ( isset( $this->items[$ns][$tag]['validate'] ) ) { - $info =& $this->items[$ns][$tag]; - $finalName = isset( $info['map_name'] ) - ? $info['map_name'] : $tag; - - if ( is_array( $info['validate'] ) ) { - $validate = $info['validate']; - } else { - $validator = new XMPValidate( $this->logger ); - $validate = [ $validator, $info['validate'] ]; - } - - if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { - // This can happen if all the members of the struct failed validation. - $this->logger->debug( __METHOD__ . " <$ns:$tag> has no valid members." ); - } elseif ( is_callable( $validate ) ) { - $val =& $this->results['xmp-' . $info['map_group']][$finalName]; - call_user_func_array( $validate, [ $info, &$val, false ] ); - if ( is_null( $val ) ) { - // the idea being the validation function will unset the variable if - // its invalid. - $this->logger->info( __METHOD__ . " <$ns:$tag> failed validation." ); - unset( $this->results['xmp-' . $info['map_group']][$finalName] ); - } - } else { - $this->logger->warning( __METHOD__ . " Validation function for $finalName (" - . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); - } - } - - array_shift( $this->curItem ); - array_shift( $this->mode ); - $this->ancestorStruct = false; - $this->processingArray = false; - $this->itemLang = false; - } - - /** - * Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag ) - * Add information about what type of element this is. - * - * Note we still have to hit the outer "" - * - * @par For example, when processing: - * @code{,xml} - * 64 - * - * @endcode - * - * This method is called when we hit the "". - * (For comparison, we call endElementModeSimple when we - * hit the "") - * - * @param string $elm Namespace . ' ' . element name - * @throws RuntimeException - */ - private function endElementModeLi( $elm ) { - list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); - $info = $this->items[$ns][$tag]; - $finalName = isset( $info['map_name'] ) - ? $info['map_name'] : $tag; - - array_shift( $this->mode ); - - if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { - $this->logger->debug( __METHOD__ . " Empty compund element $finalName." ); - - return; - } - - if ( $elm === self::NS_RDF . ' Seq' ) { - $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol'; - } elseif ( $elm === self::NS_RDF . ' Bag' ) { - $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul'; - } elseif ( $elm === self::NS_RDF . ' Alt' ) { - // extra if needed as you could theoretically have a non-language alt. - if ( $info['mode'] === self::MODE_LANG ) { - $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang'; - } - } else { - throw new RuntimeException( - __METHOD__ . " expected or but instead got $elm." - ); - } - } - - /** - * End element while in MODE_QDESC - * mostly when ending an element when we have a simple value - * that has qualifiers. - * - * Qualifiers aren't all that common, and we don't do anything - * with them. - * - * @param string $elm Namespace and element - */ - private function endElementModeQDesc( $elm ) { - - if ( $elm === self::NS_RDF . ' value' ) { - list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); - $this->saveValue( $ns, $tag, $this->charContent ); - - return; - } else { - array_shift( $this->mode ); - array_shift( $this->curItem ); - } - } - - /** - * Handler for hitting a closing element. - * - * generally just calls a helper function depending on what - * mode we're in. - * - * Ignores the outer wrapping elements that are optional in - * xmp and have no meaning. - * - * @param XMLParser $parser - * @param string $elm Namespace . ' ' . element name - * @throws RuntimeException - */ - function endElement( $parser, $elm ) { - if ( $elm === ( self::NS_RDF . ' RDF' ) - || $elm === 'adobe:ns:meta/ xmpmeta' - || $elm === 'adobe:ns:meta/ xapmeta' - ) { - // ignore these. - return; - } - - if ( $elm === self::NS_RDF . ' type' ) { - // these aren't really supported properly yet. - // However, it appears they almost never used. - $this->logger->info( __METHOD__ . ' encountered ' ); - } - - if ( strpos( $elm, ' ' ) === false ) { - // This probably shouldn't happen. - // However, there is a bug in an adobe product - // that forgets the namespace on some things. - // (Luckily they are unimportant things). - $this->logger->info( __METHOD__ . " Encountered which has no namespace. Skipping." ); - - return; - } - - if ( count( $this->mode[0] ) === 0 ) { - // This should never ever happen and means - // there is a pretty major bug in this class. - throw new RuntimeException( 'Encountered end element with no mode' ); - } - - if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) { - // just to be paranoid. Should always have a curItem, except for initially - // (aka during MODE_INITAL). - throw new RuntimeException( "Hit end element but no curItem" ); - } - - switch ( $this->mode[0] ) { - case self::MODE_IGNORE: - $this->endElementModeIgnore( $elm ); - break; - case self::MODE_SIMPLE: - $this->endElementModeSimple( $elm ); - break; - case self::MODE_STRUCT: - case self::MODE_SEQ: - case self::MODE_BAG: - case self::MODE_LANG: - case self::MODE_BAGSTRUCT: - $this->endElementNested( $elm ); - break; - case self::MODE_INITIAL: - if ( $elm === self::NS_RDF . ' Description' ) { - array_shift( $this->mode ); - } else { - throw new RuntimeException( 'Element ended unexpectedly while in MODE_INITIAL' ); - } - break; - case self::MODE_LI: - case self::MODE_LI_LANG: - $this->endElementModeLi( $elm ); - break; - case self::MODE_QDESC: - $this->endElementModeQDesc( $elm ); - break; - default: - $this->logger->warning( __METHOD__ . " no mode (elm = $elm)" ); - break; - } - } - - /** - * Hit an opening element while in MODE_IGNORE - * - * XMP is extensible, so ignore any tag we don't understand. - * - * Mostly ignores, unless we encounter the element that we are ignoring. - * in which case we add it to the item stack, so we can ignore things - * that are nested, correctly. - * - * @param string $elm Namespace . ' ' . tag name - */ - private function startElementModeIgnore( $elm ) { - if ( $elm === $this->curItem[0] ) { - array_unshift( $this->curItem, $elm ); - array_unshift( $this->mode, self::MODE_IGNORE ); - } - } - - /** - * Start element in MODE_BAG (unordered array) - * this should always be - * - * @param string $elm Namespace . ' ' . tag - * @throws RuntimeException If we have an element that's not - */ - private function startElementModeBag( $elm ) { - if ( $elm === self::NS_RDF . ' Bag' ) { - array_unshift( $this->mode, self::MODE_LI ); - } else { - throw new RuntimeException( "Expected but got $elm." ); - } - } - - /** - * Start element in MODE_SEQ (ordered array) - * this should always be - * - * @param string $elm Namespace . ' ' . tag - * @throws RuntimeException If we have an element that's not - */ - private function startElementModeSeq( $elm ) { - if ( $elm === self::NS_RDF . ' Seq' ) { - array_unshift( $this->mode, self::MODE_LI ); - } elseif ( $elm === self::NS_RDF . ' Bag' ) { - # bug 27105 - $this->logger->info( __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending' - . ' it is a Seq, since some buggy software is known to screw this up.' ); - array_unshift( $this->mode, self::MODE_LI ); - } else { - throw new RuntimeException( "Expected but got $elm." ); - } - } - - /** - * Start element in MODE_LANG (language alternative) - * this should always be - * - * This tag tends to be used for metadata like describe this - * picture, which can be translated into multiple languages. - * - * XMP supports non-linguistic alternative selections, - * which are really only used for thumbnails, which - * we don't care about. - * - * @param string $elm Namespace . ' ' . tag - * @throws RuntimeException If we have an element that's not - */ - private function startElementModeLang( $elm ) { - if ( $elm === self::NS_RDF . ' Alt' ) { - array_unshift( $this->mode, self::MODE_LI_LANG ); - } else { - throw new RuntimeException( "Expected but got $elm." ); - } - } - - /** - * Handle an opening element when in MODE_SIMPLE - * - * This should not happen often. This is for if a simple element - * already opened has a child element. Could happen for a - * qualified element. - * - * For example: - * 0/10 - * Bar - * - * - * This method is called when processing the element - * - * @param string $elm Namespace and tag names separated by space. - * @param array $attribs Attributes of the element. - * @throws RuntimeException - */ - private function startElementModeSimple( $elm, $attribs ) { - if ( $elm === self::NS_RDF . ' Description' ) { - // If this value has qualifiers - array_unshift( $this->mode, self::MODE_QDESC ); - array_unshift( $this->curItem, $this->curItem[0] ); - - if ( isset( $attribs[self::NS_RDF . ' value'] ) ) { - list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); - $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] ); - } - } elseif ( $elm === self::NS_RDF . ' value' ) { - // This should not be here. - throw new RuntimeException( __METHOD__ . ' Encountered where it was unexpected.' ); - } else { - // something else we don't recognize, like a qualifier maybe. - $this->logger->info( __METHOD__ . - " Encountered element <$elm> where only expecting character data as value of " . - $this->curItem[0] ); - array_unshift( $this->mode, self::MODE_IGNORE ); - array_unshift( $this->curItem, $elm ); - } - } - - /** - * Start an element when in MODE_QDESC. - * This generally happens when a simple element has an inner - * rdf:Description to hold qualifier elements. - * - * For example in: - * 0/10 - * Bar - * - * Called when processing the or . - * - * @param string $elm Namespace and tag name separated by a space. - * - */ - private function startElementModeQDesc( $elm ) { - if ( $elm === self::NS_RDF . ' value' ) { - return; // do nothing - } else { - // otherwise its a qualifier, which we ignore - array_unshift( $this->mode, self::MODE_IGNORE ); - array_unshift( $this->curItem, $elm ); - } - } - - /** - * Starting an element when in MODE_INITIAL - * This usually happens when we hit an element inside - * the outer rdf:Description - * - * This is generally where most properties start. - * - * @param string $ns Namespace - * @param string $tag Tag name (without namespace prefix) - * @param array $attribs Array of attributes - * @throws RuntimeException - */ - private function startElementModeInitial( $ns, $tag, $attribs ) { - if ( $ns !== self::NS_RDF ) { - - if ( isset( $this->items[$ns][$tag] ) ) { - if ( isset( $this->items[$ns][$tag]['structPart'] ) ) { - // If this element is supposed to appear only as - // a child of a structure, but appears here (not as - // a child of a struct), then something weird is - // happening, so ignore this element and its children. - - $this->logger->warning( "Encountered <$ns:$tag> outside" - . " of its expected parent. Ignoring." ); - - array_unshift( $this->mode, self::MODE_IGNORE ); - array_unshift( $this->curItem, $ns . ' ' . $tag ); - - return; - } - $mode = $this->items[$ns][$tag]['mode']; - array_unshift( $this->mode, $mode ); - array_unshift( $this->curItem, $ns . ' ' . $tag ); - if ( $mode === self::MODE_STRUCT ) { - $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] ) - ? $this->items[$ns][$tag]['map_name'] : $tag; - } - if ( $this->charContent !== false ) { - // Something weird. - // Should not happen in valid XMP. - throw new RuntimeException( 'tag nested in non-whitespace characters.' ); - } - } else { - // This element is not on our list of allowed elements so ignore. - $this->logger->debug( __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); - array_unshift( $this->mode, self::MODE_IGNORE ); - array_unshift( $this->curItem, $ns . ' ' . $tag ); - - return; - } - } - // process attributes - $this->doAttribs( $attribs ); - } - - /** - * Hit an opening element when in a Struct (MODE_STRUCT) - * This is generally for fields of a compound property. - * - * Example of a struct (abbreviated; flash has more properties): - * - * True - * 1 - * - * or: - * - * True - * 1 - * - * @param string $ns Namespace - * @param string $tag Tag name (no ns) - * @param array $attribs Array of attribs w/ values. - * @throws RuntimeException - */ - private function startElementModeStruct( $ns, $tag, $attribs ) { - if ( $ns !== self::NS_RDF ) { - - if ( isset( $this->items[$ns][$tag] ) ) { - if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] ) - && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) - ) { - // This assumes that we don't have inter-namespace nesting - // which we don't in all the properties we're interested in. - throw new RuntimeException( " <$tag> appeared nested in <" . $this->ancestorStruct - . "> where it is not allowed." ); - } - array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] ); - array_unshift( $this->curItem, $ns . ' ' . $tag ); - if ( $this->charContent !== false ) { - // Something weird. - // Should not happen in valid XMP. - throw new RuntimeException( "tag <$tag> nested in non-whitespace characters (" . - $this->charContent . ")." ); - } - } else { - array_unshift( $this->mode, self::MODE_IGNORE ); - array_unshift( $this->curItem, $elm ); - - return; - } - } - - if ( $ns === self::NS_RDF && $tag === 'Description' ) { - $this->doAttribs( $attribs ); - array_unshift( $this->mode, self::MODE_STRUCT ); - array_unshift( $this->curItem, $this->curItem[0] ); - } - } - - /** - * opening element in MODE_LI - * process elements of arrays. - * - * Example: - * 64 - * - * This method is called when we hit the element. - * - * @param string $elm Namespace . ' ' . tagname - * @param array $attribs Attributes. (needed for BAGSTRUCTS) - * @throws RuntimeException If gets a tag other than - */ - private function startElementModeLi( $elm, $attribs ) { - if ( ( $elm ) !== self::NS_RDF . ' li' ) { - throw new RuntimeException( " expected but got $elm." ); - } - - if ( !isset( $this->mode[1] ) ) { - // This should never ever ever happen. Checking for it - // to be paranoid. - throw new RuntimeException( 'In mode Li, but no 2xPrevious mode!' ); - } - - if ( $this->mode[1] === self::MODE_BAGSTRUCT ) { - // This list item contains a compound (STRUCT) value. - array_unshift( $this->mode, self::MODE_STRUCT ); - array_unshift( $this->curItem, $elm ); - $this->processingArray = true; - - if ( !isset( $this->curItem[1] ) ) { - // be paranoid. - throw new RuntimeException( 'Can not find parent of BAGSTRUCT.' ); - } - list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] ); - $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] ) - ? $this->items[$curNS][$curTag]['map_name'] : $curTag; - - $this->doAttribs( $attribs ); - } else { - // Normal BAG or SEQ containing simple values. - array_unshift( $this->mode, self::MODE_SIMPLE ); - // need to add curItem[0] on again since one is for the specific item - // and one is for the entire group. - array_unshift( $this->curItem, $this->curItem[0] ); - $this->processingArray = true; - } - } - - /** - * Opening element in MODE_LI_LANG. - * process elements of language alternatives - * - * Example: - * My house - * - * - * This method is called when we hit the element. - * - * @param string $elm Namespace . ' ' . tag - * @param array $attribs Array of elements (most importantly xml:lang) - * @throws RuntimeException If gets a tag other than or if no xml:lang - */ - private function startElementModeLiLang( $elm, $attribs ) { - if ( $elm !== self::NS_RDF . ' li' ) { - throw new RuntimeException( __METHOD__ . " expected but got $elm." ); - } - if ( !isset( $attribs[self::NS_XML . ' lang'] ) - || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] ) - ) { - throw new RuntimeException( __METHOD__ - . " did not contain, or has invalid xml:lang attribute in lang alternative" ); - } - - // Lang is case-insensitive. - $this->itemLang = strtolower( $attribs[self::NS_XML . ' lang'] ); - - // need to add curItem[0] on again since one is for the specific item - // and one is for the entire group. - array_unshift( $this->curItem, $this->curItem[0] ); - array_unshift( $this->mode, self::MODE_SIMPLE ); - $this->processingArray = true; - } - - /** - * Hits an opening element. - * Generally just calls a helper based on what MODE we're in. - * Also does some initial set up for the wrapper element - * - * @param XMLParser $parser - * @param string $elm Namespace "" element - * @param array $attribs Attribute name => value - * @throws RuntimeException - */ - function startElement( $parser, $elm, $attribs ) { - - if ( $elm === self::NS_RDF . ' RDF' - || $elm === 'adobe:ns:meta/ xmpmeta' - || $elm === 'adobe:ns:meta/ xapmeta' - ) { - /* ignore. */ - return; - } elseif ( $elm === self::NS_RDF . ' Description' ) { - if ( count( $this->mode ) === 0 ) { - // outer rdf:desc - array_unshift( $this->mode, self::MODE_INITIAL ); - } - } elseif ( $elm === self::NS_RDF . ' type' ) { - // This doesn't support rdf:type properly. - // In practise I have yet to see a file that - // uses this element, however it is mentioned - // on page 25 of part 1 of the xmp standard. - // Also it seems as if exiv2 and exiftool do not support - // this either (That or I misunderstand the standard) - $this->logger->info( __METHOD__ . ' Encountered which isn\'t currently supported' ); - } - - if ( strpos( $elm, ' ' ) === false ) { - // This probably shouldn't happen. - $this->logger->info( __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." ); - - return; - } - - list( $ns, $tag ) = explode( ' ', $elm, 2 ); - - if ( count( $this->mode ) === 0 ) { - // This should not happen. - throw new RuntimeException( 'Error extracting XMP, ' - . "encountered <$elm> with no mode" ); - } - - switch ( $this->mode[0] ) { - case self::MODE_IGNORE: - $this->startElementModeIgnore( $elm ); - break; - case self::MODE_SIMPLE: - $this->startElementModeSimple( $elm, $attribs ); - break; - case self::MODE_INITIAL: - $this->startElementModeInitial( $ns, $tag, $attribs ); - break; - case self::MODE_STRUCT: - $this->startElementModeStruct( $ns, $tag, $attribs ); - break; - case self::MODE_BAG: - case self::MODE_BAGSTRUCT: - $this->startElementModeBag( $elm ); - break; - case self::MODE_SEQ: - $this->startElementModeSeq( $elm ); - break; - case self::MODE_LANG: - $this->startElementModeLang( $elm ); - break; - case self::MODE_LI_LANG: - $this->startElementModeLiLang( $elm, $attribs ); - break; - case self::MODE_LI: - $this->startElementModeLi( $elm, $attribs ); - break; - case self::MODE_QDESC: - $this->startElementModeQDesc( $elm ); - break; - default: - throw new RuntimeException( 'StartElement in unknown mode: ' . $this->mode[0] ); - } - } - - // @codingStandardsIgnoreStart Generic.Files.LineLength - /** - * Process attributes. - * Simple values can be stored as either a tag or attribute - * - * Often the initial "" tag just has all the simple - * properties as attributes. - * - * @par Example: - * @code - * - * @endcode - * - * @param array $attribs Array attribute=>value - * @throws RuntimeException - */ - // @codingStandardsIgnoreEnd - private function doAttribs( $attribs ) { - // first check for rdf:parseType attribute, as that can change - // how the attributes are interperted. - - if ( isset( $attribs[self::NS_RDF . ' parseType'] ) - && $attribs[self::NS_RDF . ' parseType'] === 'Resource' - && $this->mode[0] === self::MODE_SIMPLE - ) { - // this is equivalent to having an inner rdf:Description - $this->mode[0] = self::MODE_QDESC; - } - foreach ( $attribs as $name => $val ) { - if ( strpos( $name, ' ' ) === false ) { - // This shouldn't happen, but so far some old software forgets namespace - // on rdf:about. - $this->logger->info( __METHOD__ . ' Encountered non-namespaced attribute: ' - . " $name=\"$val\". Skipping. " ); - continue; - } - list( $ns, $tag ) = explode( ' ', $name, 2 ); - if ( $ns === self::NS_RDF ) { - if ( $tag === 'value' || $tag === 'resource' ) { - // resource is for url. - // value attribute is a weird way of just putting the contents. - $this->char( $this->xmlParser, $val ); - } - } elseif ( isset( $this->items[$ns][$tag] ) ) { - if ( $this->mode[0] === self::MODE_SIMPLE ) { - throw new RuntimeException( __METHOD__ - . " $ns:$tag found as attribute where not allowed" ); - } - $this->saveValue( $ns, $tag, $val ); - } else { - $this->logger->debug( __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); - } - } - } - - /** - * Given an extracted value, save it to results array - * - * note also uses $this->ancestorStruct and - * $this->processingArray to determine what name to - * save the value under. (in addition to $tag). - * - * @param string $ns Namespace of tag this is for - * @param string $tag Tag name - * @param string $val Value to save - */ - private function saveValue( $ns, $tag, $val ) { - - $info =& $this->items[$ns][$tag]; - $finalName = isset( $info['map_name'] ) - ? $info['map_name'] : $tag; - if ( isset( $info['validate'] ) ) { - if ( is_array( $info['validate'] ) ) { - $validate = $info['validate']; - } else { - $validator = new XMPValidate( $this->logger ); - $validate = [ $validator, $info['validate'] ]; - } - - if ( is_callable( $validate ) ) { - call_user_func_array( $validate, [ $info, &$val, true ] ); - // the reasoning behind using &$val instead of using the return value - // is to be consistent between here and validating structures. - if ( is_null( $val ) ) { - $this->logger->info( __METHOD__ . " <$ns:$tag> failed validation." ); - - return; - } - } else { - $this->logger->warning( __METHOD__ . " Validation function for $finalName (" - . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); - } - } - - if ( $this->ancestorStruct && $this->processingArray ) { - // Aka both an array and a struct. ( self::MODE_BAGSTRUCT ) - $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val; - } elseif ( $this->ancestorStruct ) { - $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val; - } elseif ( $this->processingArray ) { - if ( $this->itemLang === false ) { - // normal array - $this->results['xmp-' . $info['map_group']][$finalName][] = $val; - } else { - // lang array. - $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val; - } - } else { - $this->results['xmp-' . $info['map_group']][$finalName] = $val; - } - } -} diff --git a/includes/media/XMPInfo.php b/includes/media/XMPInfo.php deleted file mode 100644 index 052be33a4b..0000000000 --- a/includes/media/XMPInfo.php +++ /dev/null @@ -1,1168 +0,0 @@ - true ). - * Only used with validateClosed. - * * rangeLow and rangeHigh - Alternative to choices for numeric ranges. - * Again for validateClosed only. - * * children - For MODE_STRUCT items, allowed children. - * * structPart - Indicates that this element can only appear as a member - * of a structure. - * - * Currently this just has a bunch of EXIF values as this class is only half-done. - */ - static private $items = [ - 'http://ns.adobe.com/exif/1.0/' => [ - 'ApertureValue' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'BrightnessValue' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'CompressedBitsPerPixel' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'DigitalZoomRatio' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'ExposureBiasValue' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'ExposureIndex' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'ExposureTime' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'FlashEnergy' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational', - ], - 'FNumber' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'FocalLength' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'FocalPlaneXResolution' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'FocalPlaneYResolution' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'GPSAltitude' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational', - ], - 'GPSDestBearing' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'GPSDestDistance' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'GPSDOP' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'GPSImgDirection' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'GPSSpeed' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'GPSTrack' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'MaxApertureValue' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'ShutterSpeedValue' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - 'SubjectDistance' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational' - ], - /* Flash */ - 'Flash' => [ - 'mode' => XMPReader::MODE_STRUCT, - 'children' => [ - 'Fired' => true, - 'Function' => true, - 'Mode' => true, - 'RedEyeMode' => true, - 'Return' => true, - ], - 'validate' => 'validateFlash', - 'map_group' => 'exif', - ], - 'Fired' => [ - 'map_group' => 'exif', - 'validate' => 'validateBoolean', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'Function' => [ - 'map_group' => 'exif', - 'validate' => 'validateBoolean', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'Mode' => [ - 'map_group' => 'exif', - 'validate' => 'validateClosed', - 'mode' => XMPReader::MODE_SIMPLE, - 'choices' => [ '0' => true, '1' => true, - '2' => true, '3' => true ], - 'structPart' => true, - ], - 'Return' => [ - 'map_group' => 'exif', - 'validate' => 'validateClosed', - 'mode' => XMPReader::MODE_SIMPLE, - 'choices' => [ '0' => true, - '2' => true, '3' => true ], - 'structPart' => true, - ], - 'RedEyeMode' => [ - 'map_group' => 'exif', - 'validate' => 'validateBoolean', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - /* End Flash */ - 'ISOSpeedRatings' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateInteger' - ], - /* end rational things */ - 'ColorSpace' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '1' => true, '65535' => true ], - ], - 'ComponentsConfiguration' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateClosed', - 'choices' => [ '1' => true, '2' => true, '3' => true, '4' => true, - '5' => true, '6' => true ] - ], - 'Contrast' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '0' => true, '1' => true, '2' => true ] - ], - 'CustomRendered' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '0' => true, '1' => true ] - ], - 'DateTimeOriginal' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateDate', - ], - 'DateTimeDigitized' => [ /* xmp:CreateDate */ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateDate', - ], - /* todo: there might be interesting information in - * exif:DeviceSettingDescription, but need to find an - * example - */ - 'ExifVersion' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'ExposureMode' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'rangeLow' => 0, - 'rangeHigh' => 2, - ], - 'ExposureProgram' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'rangeLow' => 0, - 'rangeHigh' => 8, - ], - 'FileSource' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '3' => true ] - ], - 'FlashpixVersion' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'FocalLengthIn35mmFilm' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateInteger', - ], - 'FocalPlaneResolutionUnit' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '2' => true, '3' => true ], - ], - 'GainControl' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'rangeLow' => 0, - 'rangeHigh' => 4, - ], - /* this value is post-processed out later */ - 'GPSAltitudeRef' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '0' => true, '1' => true ], - ], - 'GPSAreaInformation' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'GPSDestBearingRef' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ 'T' => true, 'M' => true ], - ], - 'GPSDestDistanceRef' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ 'K' => true, 'M' => true, - 'N' => true ], - ], - 'GPSDestLatitude' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateGPS', - ], - 'GPSDestLongitude' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateGPS', - ], - 'GPSDifferential' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '0' => true, '1' => true ], - ], - 'GPSImgDirectionRef' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ 'T' => true, 'M' => true ], - ], - 'GPSLatitude' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateGPS', - ], - 'GPSLongitude' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateGPS', - ], - 'GPSMapDatum' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'GPSMeasureMode' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '2' => true, '3' => true ] - ], - 'GPSProcessingMethod' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'GPSSatellites' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'GPSSpeedRef' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ 'K' => true, 'M' => true, - 'N' => true ], - ], - 'GPSStatus' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ 'A' => true, 'V' => true ] - ], - 'GPSTimeStamp' => [ - 'map_group' => 'exif', - // Note: in exif, GPSDateStamp does not include - // the time, where here it does. - 'map_name' => 'GPSDateStamp', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateDate', - ], - 'GPSTrackRef' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ 'T' => true, 'M' => true ] - ], - 'GPSVersionID' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'ImageUniqueID' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'LightSource' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - /* can't use a range, as it skips... */ - 'choices' => [ '0' => true, '1' => true, - '2' => true, '3' => true, '4' => true, - '9' => true, '10' => true, '11' => true, - '12' => true, '13' => true, - '14' => true, '15' => true, - '17' => true, '18' => true, - '19' => true, '20' => true, - '21' => true, '22' => true, - '23' => true, '24' => true, - '255' => true, - ], - ], - 'MeteringMode' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'rangeLow' => 0, - 'rangeHigh' => 6, - 'choices' => [ '255' => true ], - ], - /* Pixel(X|Y)Dimension are rather useless, but for - * completeness since we do it with exif. - */ - 'PixelXDimension' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateInteger', - ], - 'PixelYDimension' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateInteger', - ], - 'Saturation' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'rangeLow' => 0, - 'rangeHigh' => 2, - ], - 'SceneCaptureType' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'rangeLow' => 0, - 'rangeHigh' => 3, - ], - 'SceneType' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '1' => true ], - ], - // Note, 6 is not valid SensingMethod. - 'SensingMethod' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'rangeLow' => 1, - 'rangeHigh' => 5, - 'choices' => [ '7' => true, 8 => true ], - ], - 'Sharpness' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'rangeLow' => 0, - 'rangeHigh' => 2, - ], - 'SpectralSensitivity' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - // This tag should perhaps be displayed to user better. - 'SubjectArea' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateInteger', - ], - 'SubjectDistanceRange' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'rangeLow' => 0, - 'rangeHigh' => 3, - ], - 'SubjectLocation' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateInteger', - ], - 'UserComment' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_LANG, - ], - 'WhiteBalance' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '0' => true, '1' => true ] - ], - ], - 'http://ns.adobe.com/tiff/1.0/' => [ - 'Artist' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'BitsPerSample' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateInteger', - ], - 'Compression' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '1' => true, '6' => true ], - ], - /* this prop should not be used in XMP. dc:rights is the correct prop */ - 'Copyright' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_LANG, - ], - 'DateTime' => [ /* proper prop is xmp:ModifyDate */ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateDate', - ], - 'ImageDescription' => [ /* proper one is dc:description */ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_LANG, - ], - 'ImageLength' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateInteger', - ], - 'ImageWidth' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateInteger', - ], - 'Make' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'Model' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - /**** Do not extract this property - * It interferes with auto exif rotation. - * 'Orientation' => array( - * 'map_group' => 'exif', - * 'mode' => XMPReader::MODE_SIMPLE, - * 'validate' => 'validateClosed', - * 'choices' => array( '1' => true, '2' => true, '3' => true, '4' => true, 5 => true, - * '6' => true, '7' => true, '8' => true ), - *), - ******/ - 'PhotometricInterpretation' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '2' => true, '6' => true ], - ], - 'PlanerConfiguration' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '1' => true, '2' => true ], - ], - 'PrimaryChromaticities' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateRational', - ], - 'ReferenceBlackWhite' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateRational', - ], - 'ResolutionUnit' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '2' => true, '3' => true ], - ], - 'SamplesPerPixel' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateInteger', - ], - 'Software' => [ /* see xmp:CreatorTool */ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - /* ignore TransferFunction */ - 'WhitePoint' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateRational', - ], - 'XResolution' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational', - ], - 'YResolution' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRational', - ], - 'YCbCrCoefficients' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateRational', - ], - 'YCbCrPositioning' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateClosed', - 'choices' => [ '1' => true, '2' => true ], - ], - /******** - * Disable extracting this property (bug 31944) - * Several files have a string instead of a Seq - * for this property. XMPReader doesn't handle - * mismatched types very gracefully (it marks - * the entire file as invalid, instead of just - * the relavent prop). Since this prop - * doesn't communicate all that useful information - * just disable this prop for now, until such - * XMPReader is more graceful (bug 32172) - * 'YCbCrSubSampling' => array( - * 'map_group' => 'exif', - * 'mode' => XMPReader::MODE_SEQ, - * 'validate' => 'validateClosed', - * 'choices' => array( '1' => true, '2' => true ), - * ), - */ - ], - 'http://ns.adobe.com/exif/1.0/aux/' => [ - 'Lens' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'SerialNumber' => [ - 'map_group' => 'exif', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'OwnerName' => [ - 'map_group' => 'exif', - 'map_name' => 'CameraOwnerName', - 'mode' => XMPReader::MODE_SIMPLE, - ], - ], - 'http://purl.org/dc/elements/1.1/' => [ - 'title' => [ - 'map_group' => 'general', - 'map_name' => 'ObjectName', - 'mode' => XMPReader::MODE_LANG - ], - 'description' => [ - 'map_group' => 'general', - 'map_name' => 'ImageDescription', - 'mode' => XMPReader::MODE_LANG - ], - 'contributor' => [ - 'map_group' => 'general', - 'map_name' => 'dc-contributor', - 'mode' => XMPReader::MODE_BAG - ], - 'coverage' => [ - 'map_group' => 'general', - 'map_name' => 'dc-coverage', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'creator' => [ - 'map_group' => 'general', - 'map_name' => 'Artist', // map with exif Artist, iptc byline (2:80) - 'mode' => XMPReader::MODE_SEQ, - ], - 'date' => [ - 'map_group' => 'general', - // Note, not mapped with other date properties, as this type of date is - // non-specific: "A point or period of time associated with an event in - // the lifecycle of the resource" - 'map_name' => 'dc-date', - 'mode' => XMPReader::MODE_SEQ, - 'validate' => 'validateDate', - ], - /* Do not extract dc:format, as we've got better ways to determine MIME type */ - 'identifier' => [ - 'map_group' => 'deprecated', - 'map_name' => 'Identifier', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'language' => [ - 'map_group' => 'general', - 'map_name' => 'LanguageCode', /* mapped with iptc 2:135 */ - 'mode' => XMPReader::MODE_BAG, - 'validate' => 'validateLangCode', - ], - 'publisher' => [ - 'map_group' => 'general', - 'map_name' => 'dc-publisher', - 'mode' => XMPReader::MODE_BAG, - ], - // for related images/resources - 'relation' => [ - 'map_group' => 'general', - 'map_name' => 'dc-relation', - 'mode' => XMPReader::MODE_BAG, - ], - 'rights' => [ - 'map_group' => 'general', - 'map_name' => 'Copyright', - 'mode' => XMPReader::MODE_LANG, - ], - // Note: source is not mapped with iptc source, since iptc - // source describes the source of the image in terms of a person - // who provided the image, where this is to describe an image that the - // current one is based on. - 'source' => [ - 'map_group' => 'general', - 'map_name' => 'dc-source', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'subject' => [ - 'map_group' => 'general', - 'map_name' => 'Keywords', /* maps to iptc 2:25 */ - 'mode' => XMPReader::MODE_BAG, - ], - 'type' => [ - 'map_group' => 'general', - 'map_name' => 'dc-type', - 'mode' => XMPReader::MODE_BAG, - ], - ], - 'http://ns.adobe.com/xap/1.0/' => [ - 'CreateDate' => [ - 'map_group' => 'general', - 'map_name' => 'DateTimeDigitized', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateDate', - ], - 'CreatorTool' => [ - 'map_group' => 'general', - 'map_name' => 'Software', - 'mode' => XMPReader::MODE_SIMPLE - ], - 'Identifier' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_BAG, - ], - 'Label' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'ModifyDate' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'DateTime', - 'validate' => 'validateDate', - ], - 'MetadataDate' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - // map_name to be consistent with other date names. - 'map_name' => 'DateTimeMetadata', - 'validate' => 'validateDate', - ], - 'Nickname' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'Rating' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateRating', - ], - ], - 'http://ns.adobe.com/xap/1.0/rights/' => [ - 'Certificate' => [ - 'map_group' => 'general', - 'map_name' => 'RightsCertificate', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'Marked' => [ - 'map_group' => 'general', - 'map_name' => 'Copyrighted', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateBoolean', - ], - 'Owner' => [ - 'map_group' => 'general', - 'map_name' => 'CopyrightOwner', - 'mode' => XMPReader::MODE_BAG, - ], - // this seems similar to dc:rights. - 'UsageTerms' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_LANG, - ], - 'WebStatement' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - ], - // XMP media management. - 'http://ns.adobe.com/xap/1.0/mm/' => [ - // if we extract the exif UniqueImageID, might - // as well do this too. - 'OriginalDocumentID' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - // It might also be useful to do xmpMM:LastURL - // and xmpMM:DerivedFrom as you can potentially, - // get the url of this document/source for this - // document. However whats more likely is you'd - // get a file:// url for the path of the doc, - // which is somewhat of a privacy issue. - ], - 'http://creativecommons.org/ns#' => [ - 'license' => [ - 'map_name' => 'LicenseUrl', - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'morePermissions' => [ - 'map_name' => 'MorePermissionsUrl', - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'attributionURL' => [ - 'map_group' => 'general', - 'map_name' => 'AttributionUrl', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'attributionName' => [ - 'map_group' => 'general', - 'map_name' => 'PreferredAttributionName', - 'mode' => XMPReader::MODE_SIMPLE, - ], - ], - // Note, this property affects how jpeg metadata is extracted. - 'http://ns.adobe.com/xmp/note/' => [ - 'HasExtendedXMP' => [ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_SIMPLE, - ], - ], - /* Note, in iptc schemas, the legacy properties are denoted - * as deprecated, since other properties should used instead, - * and properties marked as deprecated in the standard are - * are marked as general here as they don't have replacements - */ - 'http://ns.adobe.com/photoshop/1.0/' => [ - 'City' => [ - 'map_group' => 'deprecated', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'CityDest', - ], - 'Country' => [ - 'map_group' => 'deprecated', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'CountryDest', - ], - 'State' => [ - 'map_group' => 'deprecated', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'ProvinceOrStateDest', - ], - 'DateCreated' => [ - 'map_group' => 'deprecated', - // marking as deprecated as the xmp prop preferred - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'DateTimeOriginal', - 'validate' => 'validateDate', - // note this prop is an XMP, not IPTC date - ], - 'CaptionWriter' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'Writer', - ], - 'Instructions' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'SpecialInstructions', - ], - 'TransmissionReference' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'OriginalTransmissionRef', - ], - 'AuthorsPosition' => [ - /* This corresponds with 2:85 - * By-line Title, which needs to be - * handled weirdly to correspond - * with iptc/exif. */ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_SIMPLE - ], - 'Credit' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'Source' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'Urgency' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'Category' => [ - // Note, this prop is deprecated, but in general - // group since it doesn't have a replacement. - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'iimCategory', - ], - 'SupplementalCategories' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_BAG, - 'map_name' => 'iimSupplementalCategory', - ], - 'Headline' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE - ], - ], - 'http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/' => [ - 'CountryCode' => [ - 'map_group' => 'deprecated', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'CountryCodeDest', - ], - 'IntellectualGenre' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - // Note, this is a six digit code. - // See: http://cv.iptc.org/newscodes/scene/ - // Since these aren't really all that common, - // we just show the number. - 'Scene' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_BAG, - 'validate' => 'validateInteger', - 'map_name' => 'SceneCode', - ], - /* Note: SubjectCode should be an 8 ascii digits. - * it is not really an integer (has leading 0's, - * cannot have a +/- sign), but validateInteger - * will let it through. - */ - 'SubjectCode' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_BAG, - 'map_name' => 'SubjectNewsCode', - 'validate' => 'validateInteger' - ], - 'Location' => [ - 'map_group' => 'deprecated', - 'mode' => XMPReader::MODE_SIMPLE, - 'map_name' => 'SublocationDest', - ], - 'CreatorContactInfo' => [ - /* Note this maps to 2:118 in iim - * (Contact) field. However those field - * types are slightly different - 2:118 - * is free form text field, where this - * is more structured. - */ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_STRUCT, - 'map_name' => 'Contact', - 'children' => [ - 'CiAdrExtadr' => true, - 'CiAdrCity' => true, - 'CiAdrCtry' => true, - 'CiEmailWork' => true, - 'CiTelWork' => true, - 'CiAdrPcode' => true, - 'CiAdrRegion' => true, - 'CiUrlWork' => true, - ], - ], - 'CiAdrExtadr' => [ /* address */ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'CiAdrCity' => [ /* city */ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'CiAdrCtry' => [ /* country */ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'CiEmailWork' => [ /* email (possibly separated by ',') */ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'CiTelWork' => [ /* telephone */ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'CiAdrPcode' => [ /* postal code */ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'CiAdrRegion' => [ /* province/state */ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'CiUrlWork' => [ /* url. Multiple may be separated by comma. */ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - /* End contact info struct properties */ - ], - 'http://iptc.org/std/Iptc4xmpExt/2008-02-29/' => [ - 'Event' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - ], - 'OrganisationInImageName' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_BAG, - 'map_name' => 'OrganisationInImage' - ], - 'PersonInImage' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_BAG, - ], - 'MaxAvailHeight' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateInteger', - 'map_name' => 'OriginalImageHeight', - ], - 'MaxAvailWidth' => [ - 'map_group' => 'general', - 'mode' => XMPReader::MODE_SIMPLE, - 'validate' => 'validateInteger', - 'map_name' => 'OriginalImageWidth', - ], - // LocationShown and LocationCreated are handled - // specially because they are hierarchical, but we - // also want to merge with the old non-hierarchical. - 'LocationShown' => [ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_BAGSTRUCT, - 'children' => [ - 'WorldRegion' => true, - 'CountryCode' => true, /* iso code */ - 'CountryName' => true, - 'ProvinceState' => true, - 'City' => true, - 'Sublocation' => true, - ], - ], - 'LocationCreated' => [ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_BAGSTRUCT, - 'children' => [ - 'WorldRegion' => true, - 'CountryCode' => true, /* iso code */ - 'CountryName' => true, - 'ProvinceState' => true, - 'City' => true, - 'Sublocation' => true, - ], - ], - 'WorldRegion' => [ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'CountryCode' => [ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'CountryName' => [ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - 'map_name' => 'Country', - ], - 'ProvinceState' => [ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - 'map_name' => 'ProvinceOrState', - ], - 'City' => [ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - 'Sublocation' => [ - 'map_group' => 'special', - 'mode' => XMPReader::MODE_SIMPLE, - 'structPart' => true, - ], - - /* Other props that might be interesting but - * Not currently extracted: - * ArtworkOrObject, (info about objects in picture) - * DigitalSourceType - * RegistryId - */ - ], - - /* Plus props we might want to consider: - * (Note: some of these have unclear/incomplete definitions - * from the iptc4xmp standard). - * ImageSupplier (kind of like iptc source field) - * ImageSupplierId (id code for image from supplier) - * CopyrightOwner - * ImageCreator - * Licensor - * Various model release fields - * Property release fields. - */ - ]; -} diff --git a/includes/media/XMPValidate.php b/includes/media/XMPValidate.php deleted file mode 100644 index 32a3340fa1..0000000000 --- a/includes/media/XMPValidate.php +++ /dev/null @@ -1,400 +0,0 @@ -setLogger( $logger ); - } - - public function setLogger( LoggerInterface $logger ) { - $this->logger = $logger; - } - /** - * Function to validate boolean properties ( True or False ) - * - * @param array $info Information about current property - * @param mixed &$val Current value to validate - * @param bool $standalone If this is a simple property or array - */ - public function validateBoolean( $info, &$val, $standalone ) { - if ( !$standalone ) { - // this only validates standalone properties, not arrays, etc - return; - } - if ( $val !== 'True' && $val !== 'False' ) { - $this->logger->info( __METHOD__ . " Expected True or False but got $val" ); - $val = null; - } - } - - /** - * function to validate rational properties ( 12/10 ) - * - * @param array $info Information about current property - * @param mixed &$val Current value to validate - * @param bool $standalone If this is a simple property or array - */ - public function validateRational( $info, &$val, $standalone ) { - if ( !$standalone ) { - // this only validates standalone properties, not arrays, etc - return; - } - if ( !preg_match( '/^(?:-?\d+)\/(?:\d+[1-9]|[1-9]\d*)$/D', $val ) ) { - $this->logger->info( __METHOD__ . " Expected rational but got $val" ); - $val = null; - } - } - - /** - * function to validate rating properties -1, 0-5 - * - * if its outside of range put it into range. - * - * @see MWG spec - * @param array $info Information about current property - * @param mixed &$val Current value to validate - * @param bool $standalone If this is a simple property or array - */ - public function validateRating( $info, &$val, $standalone ) { - if ( !$standalone ) { - // this only validates standalone properties, not arrays, etc - return; - } - if ( !preg_match( '/^[-+]?\d*(?:\.?\d*)$/D', $val ) - || !is_numeric( $val ) - ) { - $this->logger->info( __METHOD__ . " Expected rating but got $val" ); - $val = null; - - return; - } else { - $nVal = (float)$val; - if ( $nVal < 0 ) { - // We do < 0 here instead of < -1 here, since - // the values between 0 and -1 are also illegal - // as -1 is meant as a special reject rating. - $this->logger->info( __METHOD__ . " Rating too low, setting to -1 (Rejected)" ); - $val = '-1'; - - return; - } - if ( $nVal > 5 ) { - $this->logger->info( __METHOD__ . " Rating too high, setting to 5" ); - $val = '5'; - - return; - } - } - } - - /** - * function to validate integers - * - * @param array $info Information about current property - * @param mixed &$val Current value to validate - * @param bool $standalone If this is a simple property or array - */ - public function validateInteger( $info, &$val, $standalone ) { - if ( !$standalone ) { - // this only validates standalone properties, not arrays, etc - return; - } - if ( !preg_match( '/^[-+]?\d+$/D', $val ) ) { - $this->logger->info( __METHOD__ . " Expected integer but got $val" ); - $val = null; - } - } - - /** - * function to validate properties with a fixed number of allowed - * choices. (closed choice) - * - * @param array $info Information about current property - * @param mixed &$val Current value to validate - * @param bool $standalone If this is a simple property or array - */ - public function validateClosed( $info, &$val, $standalone ) { - if ( !$standalone ) { - // this only validates standalone properties, not arrays, etc - return; - } - - // check if its in a numeric range - $inRange = false; - if ( isset( $info['rangeLow'] ) - && isset( $info['rangeHigh'] ) - && is_numeric( $val ) - && ( intval( $val ) <= $info['rangeHigh'] ) - && ( intval( $val ) >= $info['rangeLow'] ) - ) { - $inRange = true; - } - - if ( !isset( $info['choices'][$val] ) && !$inRange ) { - $this->logger->info( __METHOD__ . " Expected closed choice, but got $val" ); - $val = null; - } - } - - /** - * function to validate and modify flash structure - * - * @param array $info Information about current property - * @param mixed &$val Current value to validate - * @param bool $standalone If this is a simple property or array - */ - public function validateFlash( $info, &$val, $standalone ) { - if ( $standalone ) { - // this only validates flash structs, not individual properties - return; - } - if ( !( isset( $val['Fired'] ) - && isset( $val['Function'] ) - && isset( $val['Mode'] ) - && isset( $val['RedEyeMode'] ) - && isset( $val['Return'] ) - ) ) { - $this->logger->info( __METHOD__ . " Flash structure did not have all the required components" ); - $val = null; - } else { - $val = ( "\0" | ( $val['Fired'] === 'True' ) - | ( intval( $val['Return'] ) << 1 ) - | ( intval( $val['Mode'] ) << 3 ) - | ( ( $val['Function'] === 'True' ) << 5 ) - | ( ( $val['RedEyeMode'] === 'True' ) << 6 ) ); - } - } - - /** - * function to validate LangCode properties ( en-GB, etc ) - * - * This is just a naive check to make sure it somewhat looks like a lang code. - * - * @see BCP 47 - * @see https://wwwimages2.adobe.com/content/dam/Adobe/en/devnet/xmp/pdfs/ - * XMP%20SDK%20Release%20cc-2014-12/XMPSpecificationPart1.pdf page 22 (section 8.2.2.4) - * - * @param array $info Information about current property - * @param mixed &$val Current value to validate - * @param bool $standalone If this is a simple property or array - */ - public function validateLangCode( $info, &$val, $standalone ) { - if ( !$standalone ) { - // this only validates standalone properties, not arrays, etc - return; - } - if ( !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $val ) ) { - // this is a rather naive check. - $this->logger->info( __METHOD__ . " Expected Lang code but got $val" ); - $val = null; - } - } - - /** - * function to validate date properties, and convert to (partial) Exif format. - * - * Dates can be one of the following formats: - * YYYY - * YYYY-MM - * YYYY-MM-DD - * YYYY-MM-DDThh:mmTZD - * YYYY-MM-DDThh:mm:ssTZD - * YYYY-MM-DDThh:mm:ss.sTZD - * - * @param array $info Information about current property - * @param mixed &$val Current value to validate. Converts to TS_EXIF as a side-effect. - * in cases where there's only a partial date, it will give things like - * 2011:04. - * @param bool $standalone If this is a simple property or array - */ - public function validateDate( $info, &$val, $standalone ) { - if ( !$standalone ) { - // this only validates standalone properties, not arrays, etc - return; - } - $res = []; - // @codingStandardsIgnoreStart Long line that cannot be broken - if ( !preg_match( - /* ahh! scary regex... */ - '/^([0-3]\d{3})(?:-([01]\d)(?:-([0-3]\d)(?:T([0-2]\d):([0-6]\d)(?::([0-6]\d)(?:\.\d+)?)?([-+]\d{2}:\d{2}|Z)?)?)?)?$/D', - $val, $res ) - ) { - // @codingStandardsIgnoreEnd - - $this->logger->info( __METHOD__ . " Expected date but got $val" ); - $val = null; - } else { - /* - * $res is formatted as follows: - * 0 -> full date. - * 1 -> year, 2-> month, 3-> day, 4-> hour, 5-> minute, 6->second - * 7-> Timezone specifier (Z or something like +12:30 ) - * many parts are optional, some aren't. For example if you specify - * minute, you must specify hour, day, month, and year but not second or TZ. - */ - - /* - * First of all, if year = 0000, Something is wrongish, - * so don't extract. This seems to happen when - * some programs convert between metadata formats. - */ - if ( $res[1] === '0000' ) { - $this->logger->info( __METHOD__ . " Invalid date (year 0): $val" ); - $val = null; - - return; - } - - if ( !isset( $res[4] ) ) { // hour - // just have the year month day (if that) - $val = $res[1]; - if ( isset( $res[2] ) ) { - $val .= ':' . $res[2]; - } - if ( isset( $res[3] ) ) { - $val .= ':' . $res[3]; - } - - return; - } - - if ( !isset( $res[7] ) || $res[7] === 'Z' ) { - // if hour is set, then minute must also be or regex above will fail. - $val = $res[1] . ':' . $res[2] . ':' . $res[3] - . ' ' . $res[4] . ':' . $res[5]; - if ( isset( $res[6] ) && $res[6] !== '' ) { - $val .= ':' . $res[6]; - } - - return; - } - - // Extra check for empty string necessary due to TZ but no second case. - $stripSeconds = false; - if ( !isset( $res[6] ) || $res[6] === '' ) { - $res[6] = '00'; - $stripSeconds = true; - } - - // Do timezone processing. We've already done the case that tz = Z. - - // We know that if we got to this step, year, month day hour and min must be set - // by virtue of regex not failing. - - $unix = ( new ConvertableTimestamp( - $res[1] . $res[2] . $res[3] . $res[4] . $res[5] . $res[6] - ) )->getTimestamp( TS_UNIX ); - $offset = intval( substr( $res[7], 1, 2 ) ) * 60 * 60; - $offset += intval( substr( $res[7], 4, 2 ) ) * 60; - if ( substr( $res[7], 0, 1 ) === '-' ) { - $offset = -$offset; - } - $val = ( new ConvertableTimestamp( $unix + $offset ) )->getTimestamp( TS_EXIF ); - - if ( $stripSeconds ) { - // If seconds weren't specified, remove the trailing ':00'. - $val = substr( $val, 0, -3 ); - } - } - } - - /** function to validate, and more importantly - * translate the XMP DMS form of gps coords to - * the decimal form we use. - * - * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf - * section 1.2.7.4 on page 23 - * - * @param array $info Unused (info about prop) - * @param string &$val GPS string in either DDD,MM,SSk or - * or DDD,MM.mmk form - * @param bool $standalone If its a simple prop (should always be true) - */ - public function validateGPS( $info, &$val, $standalone ) { - if ( !$standalone ) { - return; - } - - $m = []; - if ( preg_match( - '/(\d{1,3}),(\d{1,2}),(\d{1,2})([NWSE])/D', - $val, $m ) - ) { - $coord = intval( $m[1] ); - $coord += intval( $m[2] ) * ( 1 / 60 ); - $coord += intval( $m[3] ) * ( 1 / 3600 ); - if ( $m[4] === 'S' || $m[4] === 'W' ) { - $coord = -$coord; - } - $val = $coord; - - return; - } elseif ( preg_match( - '/(\d{1,3}),(\d{1,2}(?:.\d*)?)([NWSE])/D', - $val, $m ) - ) { - $coord = intval( $m[1] ); - $coord += floatval( $m[2] ) * ( 1 / 60 ); - if ( $m[3] === 'S' || $m[3] === 'W' ) { - $coord = -$coord; - } - $val = $coord; - - return; - } else { - $this->logger->info( __METHOD__ - . " Expected GPSCoordinate, but got $val." ); - $val = null; - - return; - } - } -} diff --git a/tests/phpunit/includes/libs/xmp/XMPTest.php b/tests/phpunit/includes/libs/xmp/XMPTest.php new file mode 100644 index 0000000000..ac52a39ffe --- /dev/null +++ b/tests/phpunit/includes/libs/xmp/XMPTest.php @@ -0,0 +1,226 @@ +markTestSkipped( "PHP extension 'exif' is not loaded, skipping." ); + } + } + + /** + * Put XMP in, compare what comes out... + * + * @param string $xmp The actual xml data. + * @param array $expected Expected result of parsing the xmp. + * @param string $info Short sentence on what's being tested. + * + * @throws Exception + * @dataProvider provideXMPParse + * + * @covers XMPReader::parse + */ + public function testXMPParse( $xmp, $expected, $info ) { + if ( !is_string( $xmp ) || !is_array( $expected ) ) { + throw new Exception( "Invalid data provided to " . __METHOD__ ); + } + $reader = new XMPReader; + $reader->parse( $xmp ); + $this->assertEquals( $expected, $reader->getResults(), $info, 0.0000000001 ); + } + + public static function provideXMPParse() { + $xmpPath = __DIR__ . '/../../../data/xmp/'; + $data = []; + + // $xmpFiles format: array of arrays with first arg file base name, + // with the actual file having .xmp on the end for the xmp + // and .result.php on the end for a php file containing the result + // array. Second argument is some info on what's being tested. + $xmpFiles = [ + [ '1', 'parseType=Resource test' ], + [ '2', 'Structure with mixed attribute and element props' ], + [ '3', 'Extra qualifiers (that should be ignored)' ], + [ '3-invalid', 'Test ignoring qualifiers that look like normal props' ], + [ '4', 'Flash as qualifier' ], + [ '5', 'Flash as qualifier 2' ], + [ '6', 'Multiple rdf:Description' ], + [ '7', 'Generic test of several property types' ], + [ 'flash', 'Test of Flash property' ], + [ 'invalid-child-not-struct', 'Test child props not in struct or ignored' ], + [ 'no-recognized-props', 'Test namespace and no recognized props' ], + [ 'no-namespace', 'Test non-namespaced attributes are ignored' ], + [ 'bag-for-seq', "Allow bag's instead of seq's. (bug 27105)" ], + [ 'utf16BE', 'UTF-16BE encoding' ], + [ 'utf16LE', 'UTF-16LE encoding' ], + [ 'utf32BE', 'UTF-32BE encoding' ], + [ 'utf32LE', 'UTF-32LE encoding' ], + [ 'xmpExt', 'Extended XMP missing second part' ], + [ 'gps', 'Handling of exif GPS parameters in XMP' ], + ]; + + $xmpFiles[] = [ 'doctype-included', 'XMP includes doctype' ]; + + foreach ( $xmpFiles as $file ) { + $xmp = file_get_contents( $xmpPath . $file[0] . '.xmp' ); + // I'm not sure if this is the best way to handle getting the + // result array, but it seems kind of big to put directly in the test + // file. + $result = null; + include $xmpPath . $file[0] . '.result.php'; + $data[] = [ $xmp, $result, '[' . $file[0] . '.xmp] ' . $file[1] ]; + } + + return $data; + } + + /** Test ExtendedXMP block support. (Used when the XMP has to be split + * over multiple jpeg segments, due to 64k size limit on jpeg segments. + * + * @todo This is based on what the standard says. Need to find a real + * world example file to double check the support for this is right. + * + * @covers XMPReader::parseExtended + */ + public function testExtendedXMP() { + $xmpPath = __DIR__ . '/../../../data/xmp/'; + $standardXMP = file_get_contents( $xmpPath . 'xmpExt.xmp' ); + $extendedXMP = file_get_contents( $xmpPath . 'xmpExt2.xmp' ); + + $md5sum = '28C74E0AC2D796886759006FBE2E57B7'; // of xmpExt2.xmp + $length = pack( 'N', strlen( $extendedXMP ) ); + $offset = pack( 'N', 0 ); + $extendedPacket = $md5sum . $length . $offset . $extendedXMP; + + $reader = new XMPReader(); + $reader->parse( $standardXMP ); + $reader->parseExtended( $extendedPacket ); + $actual = $reader->getResults(); + + $expected = [ + 'xmp-exif' => [ + 'DigitalZoomRatio' => '0/10', + 'Flash' => 9, + 'FNumber' => '2/10', + ] + ]; + + $this->assertEquals( $expected, $actual ); + } + + /** + * This test has an extended XMP block with a wrong guid (md5sum) + * and thus should only return the StandardXMP, not the ExtendedXMP. + * + * @covers XMPReader::parseExtended + */ + public function testExtendedXMPWithWrongGUID() { + $xmpPath = __DIR__ . '/../../../data/xmp/'; + $standardXMP = file_get_contents( $xmpPath . 'xmpExt.xmp' ); + $extendedXMP = file_get_contents( $xmpPath . 'xmpExt2.xmp' ); + + $md5sum = '28C74E0AC2D796886759006FBE2E57B9'; // Note last digit. + $length = pack( 'N', strlen( $extendedXMP ) ); + $offset = pack( 'N', 0 ); + $extendedPacket = $md5sum . $length . $offset . $extendedXMP; + + $reader = new XMPReader(); + $reader->parse( $standardXMP ); + $reader->parseExtended( $extendedPacket ); + $actual = $reader->getResults(); + + $expected = [ + 'xmp-exif' => [ + 'DigitalZoomRatio' => '0/10', + 'Flash' => 9, + ] + ]; + + $this->assertEquals( $expected, $actual ); + } + + /** + * Have a high offset to simulate a missing packet, + * which should cause it to ignore the ExtendedXMP packet. + * + * @covers XMPReader::parseExtended + */ + public function testExtendedXMPMissingPacket() { + $xmpPath = __DIR__ . '/../../../data/xmp/'; + $standardXMP = file_get_contents( $xmpPath . 'xmpExt.xmp' ); + $extendedXMP = file_get_contents( $xmpPath . 'xmpExt2.xmp' ); + + $md5sum = '28C74E0AC2D796886759006FBE2E57B7'; // of xmpExt2.xmp + $length = pack( 'N', strlen( $extendedXMP ) ); + $offset = pack( 'N', 2048 ); + $extendedPacket = $md5sum . $length . $offset . $extendedXMP; + + $reader = new XMPReader(); + $reader->parse( $standardXMP ); + $reader->parseExtended( $extendedPacket ); + $actual = $reader->getResults(); + + $expected = [ + 'xmp-exif' => [ + 'DigitalZoomRatio' => '0/10', + 'Flash' => 9, + ] + ]; + + $this->assertEquals( $expected, $actual ); + } + + /** + * Test for multi-section, hostile XML + * @covers XMPReader::checkParseSafety + */ + public function testCheckParseSafety() { + + // Test for detection + $xmpPath = __DIR__ . '/../../../data/xmp/'; + $file = fopen( $xmpPath . 'doctype-included.xmp', 'rb' ); + $valid = false; + $reader = new XMPReader(); + do { + $chunk = fread( $file, 10 ); + $valid = $reader->parse( $chunk, feof( $file ) ); + } while ( !feof( $file ) ); + $this->assertFalse( $valid, 'Check that doctype is detected in fragmented XML' ); + $this->assertEquals( + [], + $reader->getResults(), + 'Check that doctype is detected in fragmented XML' + ); + fclose( $file ); + unset( $reader ); + + // Test for false positives + $file = fopen( $xmpPath . 'doctype-not-included.xmp', 'rb' ); + $valid = false; + $reader = new XMPReader(); + do { + $chunk = fread( $file, 10 ); + $valid = $reader->parse( $chunk, feof( $file ) ); + } while ( !feof( $file ) ); + $this->assertTrue( + $valid, + 'Check for false-positive detecting doctype in fragmented XML' + ); + $this->assertEquals( + [ + 'xmp-exif' => [ + 'DigitalZoomRatio' => '0/10', + 'Flash' => '9' + ] + ], + $reader->getResults(), + 'Check that doctype is detected in fragmented XML' + ); + } +} diff --git a/tests/phpunit/includes/libs/xmp/XMPValidateTest.php b/tests/phpunit/includes/libs/xmp/XMPValidateTest.php new file mode 100644 index 0000000000..7f7ea930e2 --- /dev/null +++ b/tests/phpunit/includes/libs/xmp/XMPValidateTest.php @@ -0,0 +1,53 @@ +validateDate( [], $value, true ); + $this->assertEquals( $expected, $value ); + } + + public static function provideDates() { + /* For reference valid date formats are: + * YYYY + * YYYY-MM + * YYYY-MM-DD + * YYYY-MM-DDThh:mmTZD + * YYYY-MM-DDThh:mm:ssTZD + * YYYY-MM-DDThh:mm:ss.sTZD + * (Time zone is optional) + */ + return [ + [ '1992', '1992' ], + [ '1992-04', '1992:04' ], + [ '1992-02-01', '1992:02:01' ], + [ '2011-09-29', '2011:09:29' ], + [ '1982-12-15T20:12', '1982:12:15 20:12' ], + [ '1982-12-15T20:12Z', '1982:12:15 20:12' ], + [ '1982-12-15T20:12+02:30', '1982:12:15 22:42' ], + [ '1982-12-15T01:12-02:30', '1982:12:14 22:42' ], + [ '1982-12-15T20:12:11', '1982:12:15 20:12:11' ], + [ '1982-12-15T20:12:11Z', '1982:12:15 20:12:11' ], + [ '1982-12-15T20:12:11+01:10', '1982:12:15 21:22:11' ], + [ '2045-12-15T20:12:11', '2045:12:15 20:12:11' ], + [ '1867-06-01T15:00:00', '1867:06:01 15:00:00' ], + /* some invalid ones */ + [ '2001--12', null ], + [ '2001-5-12', null ], + [ '2001-5-12TZ', null ], + [ '2001-05-12T15', null ], + [ '2001-12T15:13', null ], + ]; + } +} diff --git a/tests/phpunit/includes/media/XMPTest.php b/tests/phpunit/includes/media/XMPTest.php deleted file mode 100644 index bffe415c5d..0000000000 --- a/tests/phpunit/includes/media/XMPTest.php +++ /dev/null @@ -1,223 +0,0 @@ -checkPHPExtension( 'exif' ); # Requires libxml to do XMP parsing - } - - /** - * Put XMP in, compare what comes out... - * - * @param string $xmp The actual xml data. - * @param array $expected Expected result of parsing the xmp. - * @param string $info Short sentence on what's being tested. - * - * @throws Exception - * @dataProvider provideXMPParse - * - * @covers XMPReader::parse - */ - public function testXMPParse( $xmp, $expected, $info ) { - if ( !is_string( $xmp ) || !is_array( $expected ) ) { - throw new Exception( "Invalid data provided to " . __METHOD__ ); - } - $reader = new XMPReader; - $reader->parse( $xmp ); - $this->assertEquals( $expected, $reader->getResults(), $info, 0.0000000001 ); - } - - public static function provideXMPParse() { - $xmpPath = __DIR__ . '/../../data/xmp/'; - $data = []; - - // $xmpFiles format: array of arrays with first arg file base name, - // with the actual file having .xmp on the end for the xmp - // and .result.php on the end for a php file containing the result - // array. Second argument is some info on what's being tested. - $xmpFiles = [ - [ '1', 'parseType=Resource test' ], - [ '2', 'Structure with mixed attribute and element props' ], - [ '3', 'Extra qualifiers (that should be ignored)' ], - [ '3-invalid', 'Test ignoring qualifiers that look like normal props' ], - [ '4', 'Flash as qualifier' ], - [ '5', 'Flash as qualifier 2' ], - [ '6', 'Multiple rdf:Description' ], - [ '7', 'Generic test of several property types' ], - [ 'flash', 'Test of Flash property' ], - [ 'invalid-child-not-struct', 'Test child props not in struct or ignored' ], - [ 'no-recognized-props', 'Test namespace and no recognized props' ], - [ 'no-namespace', 'Test non-namespaced attributes are ignored' ], - [ 'bag-for-seq', "Allow bag's instead of seq's. (bug 27105)" ], - [ 'utf16BE', 'UTF-16BE encoding' ], - [ 'utf16LE', 'UTF-16LE encoding' ], - [ 'utf32BE', 'UTF-32BE encoding' ], - [ 'utf32LE', 'UTF-32LE encoding' ], - [ 'xmpExt', 'Extended XMP missing second part' ], - [ 'gps', 'Handling of exif GPS parameters in XMP' ], - ]; - - $xmpFiles[] = [ 'doctype-included', 'XMP includes doctype' ]; - - foreach ( $xmpFiles as $file ) { - $xmp = file_get_contents( $xmpPath . $file[0] . '.xmp' ); - // I'm not sure if this is the best way to handle getting the - // result array, but it seems kind of big to put directly in the test - // file. - $result = null; - include $xmpPath . $file[0] . '.result.php'; - $data[] = [ $xmp, $result, '[' . $file[0] . '.xmp] ' . $file[1] ]; - } - - return $data; - } - - /** Test ExtendedXMP block support. (Used when the XMP has to be split - * over multiple jpeg segments, due to 64k size limit on jpeg segments. - * - * @todo This is based on what the standard says. Need to find a real - * world example file to double check the support for this is right. - * - * @covers XMPReader::parseExtended - */ - public function testExtendedXMP() { - $xmpPath = __DIR__ . '/../../data/xmp/'; - $standardXMP = file_get_contents( $xmpPath . 'xmpExt.xmp' ); - $extendedXMP = file_get_contents( $xmpPath . 'xmpExt2.xmp' ); - - $md5sum = '28C74E0AC2D796886759006FBE2E57B7'; // of xmpExt2.xmp - $length = pack( 'N', strlen( $extendedXMP ) ); - $offset = pack( 'N', 0 ); - $extendedPacket = $md5sum . $length . $offset . $extendedXMP; - - $reader = new XMPReader(); - $reader->parse( $standardXMP ); - $reader->parseExtended( $extendedPacket ); - $actual = $reader->getResults(); - - $expected = [ - 'xmp-exif' => [ - 'DigitalZoomRatio' => '0/10', - 'Flash' => 9, - 'FNumber' => '2/10', - ] - ]; - - $this->assertEquals( $expected, $actual ); - } - - /** - * This test has an extended XMP block with a wrong guid (md5sum) - * and thus should only return the StandardXMP, not the ExtendedXMP. - * - * @covers XMPReader::parseExtended - */ - public function testExtendedXMPWithWrongGUID() { - $xmpPath = __DIR__ . '/../../data/xmp/'; - $standardXMP = file_get_contents( $xmpPath . 'xmpExt.xmp' ); - $extendedXMP = file_get_contents( $xmpPath . 'xmpExt2.xmp' ); - - $md5sum = '28C74E0AC2D796886759006FBE2E57B9'; // Note last digit. - $length = pack( 'N', strlen( $extendedXMP ) ); - $offset = pack( 'N', 0 ); - $extendedPacket = $md5sum . $length . $offset . $extendedXMP; - - $reader = new XMPReader(); - $reader->parse( $standardXMP ); - $reader->parseExtended( $extendedPacket ); - $actual = $reader->getResults(); - - $expected = [ - 'xmp-exif' => [ - 'DigitalZoomRatio' => '0/10', - 'Flash' => 9, - ] - ]; - - $this->assertEquals( $expected, $actual ); - } - - /** - * Have a high offset to simulate a missing packet, - * which should cause it to ignore the ExtendedXMP packet. - * - * @covers XMPReader::parseExtended - */ - public function testExtendedXMPMissingPacket() { - $xmpPath = __DIR__ . '/../../data/xmp/'; - $standardXMP = file_get_contents( $xmpPath . 'xmpExt.xmp' ); - $extendedXMP = file_get_contents( $xmpPath . 'xmpExt2.xmp' ); - - $md5sum = '28C74E0AC2D796886759006FBE2E57B7'; // of xmpExt2.xmp - $length = pack( 'N', strlen( $extendedXMP ) ); - $offset = pack( 'N', 2048 ); - $extendedPacket = $md5sum . $length . $offset . $extendedXMP; - - $reader = new XMPReader(); - $reader->parse( $standardXMP ); - $reader->parseExtended( $extendedPacket ); - $actual = $reader->getResults(); - - $expected = [ - 'xmp-exif' => [ - 'DigitalZoomRatio' => '0/10', - 'Flash' => 9, - ] - ]; - - $this->assertEquals( $expected, $actual ); - } - - /** - * Test for multi-section, hostile XML - * @covers XMPReader::checkParseSafety - */ - public function testCheckParseSafety() { - - // Test for detection - $xmpPath = __DIR__ . '/../../data/xmp/'; - $file = fopen( $xmpPath . 'doctype-included.xmp', 'rb' ); - $valid = false; - $reader = new XMPReader(); - do { - $chunk = fread( $file, 10 ); - $valid = $reader->parse( $chunk, feof( $file ) ); - } while ( !feof( $file ) ); - $this->assertFalse( $valid, 'Check that doctype is detected in fragmented XML' ); - $this->assertEquals( - [], - $reader->getResults(), - 'Check that doctype is detected in fragmented XML' - ); - fclose( $file ); - unset( $reader ); - - // Test for false positives - $file = fopen( $xmpPath . 'doctype-not-included.xmp', 'rb' ); - $valid = false; - $reader = new XMPReader(); - do { - $chunk = fread( $file, 10 ); - $valid = $reader->parse( $chunk, feof( $file ) ); - } while ( !feof( $file ) ); - $this->assertTrue( - $valid, - 'Check for false-positive detecting doctype in fragmented XML' - ); - $this->assertEquals( - [ - 'xmp-exif' => [ - 'DigitalZoomRatio' => '0/10', - 'Flash' => '9' - ] - ], - $reader->getResults(), - 'Check that doctype is detected in fragmented XML' - ); - } -} diff --git a/tests/phpunit/includes/media/XMPValidateTest.php b/tests/phpunit/includes/media/XMPValidateTest.php deleted file mode 100644 index 6a006295df..0000000000 --- a/tests/phpunit/includes/media/XMPValidateTest.php +++ /dev/null @@ -1,53 +0,0 @@ -validateDate( [], $value, true ); - $this->assertEquals( $expected, $value ); - } - - public static function provideDates() { - /* For reference valid date formats are: - * YYYY - * YYYY-MM - * YYYY-MM-DD - * YYYY-MM-DDThh:mmTZD - * YYYY-MM-DDThh:mm:ssTZD - * YYYY-MM-DDThh:mm:ss.sTZD - * (Time zone is optional) - */ - return [ - [ '1992', '1992' ], - [ '1992-04', '1992:04' ], - [ '1992-02-01', '1992:02:01' ], - [ '2011-09-29', '2011:09:29' ], - [ '1982-12-15T20:12', '1982:12:15 20:12' ], - [ '1982-12-15T20:12Z', '1982:12:15 20:12' ], - [ '1982-12-15T20:12+02:30', '1982:12:15 22:42' ], - [ '1982-12-15T01:12-02:30', '1982:12:14 22:42' ], - [ '1982-12-15T20:12:11', '1982:12:15 20:12:11' ], - [ '1982-12-15T20:12:11Z', '1982:12:15 20:12:11' ], - [ '1982-12-15T20:12:11+01:10', '1982:12:15 21:22:11' ], - [ '2045-12-15T20:12:11', '2045:12:15 20:12:11' ], - [ '1867-06-01T15:00:00', '1867:06:01 15:00:00' ], - /* some invalid ones */ - [ '2001--12', null ], - [ '2001-5-12', null ], - [ '2001-5-12TZ', null ], - [ '2001-05-12T15', null ], - [ '2001-12T15:13', null ], - ]; - } -}