*
* The public methods one would call in this class are
* - parse( $content )
- * Reads in xmp content.
- * Can potentially be called multiple times with partial data each time.
+ * Reads in xmp content.
+ * Can potentially be called multiple times with partial data each time.
* - parseExtended( $content )
- * Reads XMPExtended blocks (jpeg files only).
+ * Reads XMPExtended blocks (jpeg files only).
* - getResults
- * Outputs a results array.
+ * Outputs a results array.
*
* Note XMP kind of looks like rdf. They are not the same thing - XMP is
* encoded as a specific subset of rdf. This class can read XMP. It cannot
*
*/
class XMPReader {
+ private $curItem = array(); // array to hold the current element (and previous element, and so on)
- private $curItem = array(); // array to hold the current element (and previous element, and so on)
- private $ancestorStruct = false; // the structure name when processing nested structures.
- private $charContent = false; // temporary holder for character data that appears in xmp doc.
- private $mode = array(); // stores the state the xmpreader is in (see MODE_FOO constants)
- private $results = array(); // array to hold results
- private $processingArray = false; // if we're doing a seq or bag.
- private $itemLang = false; // used for lang alts only
+ private $ancestorStruct = false; // the structure name when processing nested structures.
+
+ private $charContent = false; // temporary holder for character data that appears in xmp doc.
+
+ private $mode = array(); // stores the state the xmpreader is in (see MODE_FOO constants)
+
+ private $results = array(); // array to hold results
+
+ private $processingArray = false; // if we're doing a seq or bag.
+
+ private $itemLang = false; // used for lang alts only
private $xmlParser;
+
private $charset = false;
+
private $extendedXMPOffset = 0;
protected $items;
$this->items = XMPInfo::getItems();
$this->resetXMLParser();
-
}
+
/**
* Main use is if a single item has multiple xmp documents describing it.
* For example in jpeg's with extendedXMP
* the array, and transform any metadata that is special-cased.
*
* @return Array array of results as an array of arrays suitable for
- * FormatMetadata::getFormattedData().
+ * FormatMetadata::getFormattedData().
*/
public function getResults() {
// xmp-special is for metadata that affects how stuff
$data = $this->results;
- wfRunHooks( 'XMPGetResults', Array( &$data ) );
+ wfRunHooks( 'XMPGetResults', array( &$data ) );
if ( isset( $data['xmp-special']['AuthorsPosition'] )
&& is_string( $data['xmp-special']['AuthorsPosition'] )
} catch ( MWException $e ) {
wfDebugLog( 'XMP', 'XMP parse error: ' . $e );
$this->results = array();
+
return false;
}
+
return true;
}
// or programs that make such files..
$guid = substr( $content, 0, 32 );
if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] )
- || $this->results['xmp-special']['HasExtendedXMP'] !== $guid ) {
- wfDebugLog( 'XMP', __METHOD__ . " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" );
+ || $this->results['xmp-special']['HasExtendedXMP'] !== $guid
+ ) {
+ wfDebugLog( 'XMP', __METHOD__ .
+ " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" );
+
return false;
}
$len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) );
if ( !$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) {
wfDebugLog( 'XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.' );
+
return false;
}
- // we're not very robust here. we should accept it in the wrong order. To quote
- // the xmp standard:
- // "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the
- // StandardXMP. However, the JPEG standard does not require preservation of marker segment order. A
- // robust JPEG reader should tolerate the marker segments in any order."
+ // we're not very robust here. we should accept it in the wrong order.
+ // To quote the XMP standard:
+ // "A JPEG writer should write the ExtendedXMP marker segments in order,
+ // immediately following the StandardXMP. However, the JPEG standard
+ // does not require preservation of marker segment order. A robust JPEG
+ // reader should tolerate the marker segments in any order."
//
- // otoh the probability that an image will have more than 128k of metadata is rather low...
- // so the probability that it will have > 128k, and be in the wrong order is very low...
+ // otoh the probability that an image will have more than 128k of
+ // metadata is rather low... so the probability that it will have
+ // > 128k, and be in the wrong order is very low...
if ( $len['offset'] !== $this->extendedXMPOffset ) {
wfDebugLog( 'XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was '
. $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')' );
+
return false;
}
}
wfDebugLog( 'XMP', __METHOD__ . 'Parsing a XMPExtended block' );
+
return $this->parse( $actualContent, $atEnd );
}
} else {
$this->charContent .= $data;
}
-
}
/** When we hit a closing element in MODE_IGNORE
}
array_shift( $this->curItem );
array_shift( $this->mode );
-
}
/**
&& !( $elm === self::NS_RDF . ' Description'
&& $this->mode[0] === self::MODE_STRUCT )
) {
- throw new MWException( "nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>' );
+ throw new MWException( "nesting mismatch. got a </$elm> but expected a </" .
+ $this->curItem[0] . '>' );
}
// Validate structures.
if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
// This can happen if all the members of the struct failed validation.
wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> has no valid members." );
-
} elseif ( is_callable( $validate ) ) {
$val =& $this->results['xmp-' . $info['map_group']][$finalName];
call_user_func_array( $validate, array( $info, &$val, false ) );
if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
wfDebugLog( 'XMP', __METHOD__ . " Empty compund element $finalName." );
+
return;
}
if ( $info['mode'] === self::MODE_LANG ) {
$this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang';
}
-
} else {
throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." );
}
if ( $elm === self::NS_RDF . ' value' ) {
list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
$this->saveValue( $ns, $tag, $this->charContent );
+
return;
} else {
array_shift( $this->mode );
function endElement( $parser, $elm ) {
if ( $elm === ( self::NS_RDF . ' RDF' )
|| $elm === 'adobe:ns:meta/ xmpmeta'
- || $elm === 'adobe:ns:meta/ xapmeta' )
- {
+ || $elm === 'adobe:ns:meta/ xapmeta'
+ ) {
// ignore these.
return;
}
// that forgets the namespace on some things.
// (Luckily they are unimportant things).
wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." );
+
return;
}
throw new MWException( "Hit end element </$elm> but no curItem" );
}
- switch( $this->mode[0] ) {
+ switch ( $this->mode[0] ) {
case self::MODE_IGNORE:
$this->endElementModeIgnore( $elm );
break;
} else {
throw new MWException( "Expected <rdf:Bag> but got $elm." );
}
-
}
/**
} else {
throw new MWException( "Expected <rdf:Seq> but got $elm." );
}
-
}
/**
} else {
throw new MWException( "Expected <rdf:Seq> but got $elm." );
}
-
}
/**
} elseif ( $elm === self::NS_RDF . ' value' ) {
// This should not be here.
throw new MWException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' );
-
} else {
// something else we don't recognize, like a qualifier maybe.
- wfDebugLog( 'XMP', __METHOD__ . " Encountered element <$elm> where only expecting character data as value of " . $this->curItem[0] );
+ wfDebugLog( 'XMP', __METHOD__ .
+ " Encountered element <$elm> where only expecting character data as value of " .
+ $this->curItem[0] );
array_unshift( $this->mode, self::MODE_IGNORE );
array_unshift( $this->curItem, $elm );
-
}
-
}
/**
array_unshift( $this->mode, self::MODE_IGNORE );
array_unshift( $this->curItem, $ns . ' ' . $tag );
+
return;
}
$mode = $this->items[$ns][$tag]['mode'];
wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
array_unshift( $this->mode, self::MODE_IGNORE );
array_unshift( $this->curItem, $ns . ' ' . $tag );
+
return;
}
-
}
// process attributes
$this->doAttribs( $attribs );
if ( isset( $this->items[$ns][$tag] ) ) {
if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] )
- && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) )
- {
+ && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] )
+ ) {
// This assumes that we don't have inter-namespace nesting
// which we don't in all the properties we're interested in.
throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct
if ( $this->charContent !== false ) {
// Something weird.
// Should not happen in valid XMP.
- throw new MWException( "tag <$tag> nested in non-whitespace characters (" . $this->charContent . ")." );
+ throw new MWException( "tag <$tag> nested in non-whitespace characters (" .
+ $this->charContent . ")." );
}
} else {
array_unshift( $this->mode, self::MODE_IGNORE );
array_unshift( $this->curItem, $elm );
+
return;
}
-
}
if ( $ns === self::NS_RDF && $tag === 'Description' ) {
? $this->items[$curNS][$curTag]['map_name'] : $curTag;
$this->doAttribs( $attribs );
-
} else {
// Normal BAG or SEQ containing simple values.
array_unshift( $this->mode, self::MODE_SIMPLE );
array_unshift( $this->curItem, $this->curItem[0] );
$this->processingArray = true;
}
-
}
/**
throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." );
}
if ( !isset( $attribs[self::NS_XML . ' lang'] )
- || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] ) )
- {
+ || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] )
+ ) {
throw new MWException( __METHOD__
. " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" );
}
if ( $elm === self::NS_RDF . ' RDF'
|| $elm === 'adobe:ns:meta/ xmpmeta'
- || $elm === 'adobe:ns:meta/ xapmeta' )
- {
+ || $elm === 'adobe:ns:meta/ xapmeta'
+ ) {
/* ignore. */
return;
} elseif ( $elm === self::NS_RDF . ' Description' ) {
if ( strpos( $elm, ' ' ) === false ) {
// This probably shouldn't happen.
wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." );
+
return;
}
. "encountered <$elm> with no mode" );
}
- switch( $this->mode[0] ) {
+ switch ( $this->mode[0] ) {
case self::MODE_IGNORE:
$this->startElementModeIgnore( $elm );
break;
* @throws MWException
*/
private function doAttribs( $attribs ) {
-
// first check for rdf:parseType attribute, as that can change
// how the attributes are interperted.
if ( isset( $attribs[self::NS_RDF . ' parseType'] )
&& $attribs[self::NS_RDF . ' parseType'] === 'Resource'
- && $this->mode[0] === self::MODE_SIMPLE )
- {
+ && $this->mode[0] === self::MODE_SIMPLE
+ ) {
// this is equivalent to having an inner rdf:Description
$this->mode[0] = self::MODE_QDESC;
}
// is to be consistent between here and validating structures.
if ( is_null( $val ) ) {
wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
+
return;
}
} else {