Make partial dates in XMP not have the ommitted fields fulled out to 1's (reported...
[lhc/web/wiklou.git] / includes / media / XMPValidate.php
1 <?php
2 /**
3 * This contains some static methods for
4 * validating XMP properties. See XMPInfo and XMPReader classes.
5 *
6 * Each of these functions take the same parameters
7 * * an info array which is a subset of the XMPInfo::items array
8 * * A value (passed as reference) to validate. This can be either a
9 * simple value or an array
10 * * A boolean to determine if this is validating a simple or complex values
11 *
12 * It should be noted that when an array is being validated, typically the validation
13 * function is called once for each value, and then once at the end for the entire array.
14 *
15 * These validation functions can also be used to modify the data. See the gps and flash one's
16 * for example.
17 *
18 * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart1.pdf starting at pg 28
19 * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf starting at pg 11
20 */
21 class XMPValidate {
22 /**
23 * function to validate boolean properties ( True or False )
24 *
25 * @param $info Array information about current property
26 * @param &$val Mixed current value to validate
27 * @param $standalone Boolean if this is a simple property or array
28 */
29 public static function validateBoolean( $info, &$val, $standalone ) {
30 if ( !$standalone ) {
31 // this only validates standalone properties, not arrays, etc
32 return;
33 }
34 if ( $val !== 'True' && $val !== 'False' ) {
35 wfDebugLog( 'XMP', __METHOD__ . " Expected True or False but got $val" );
36 $val = null;
37 }
38
39 }
40
41 /**
42 * function to validate rational properties ( 12/10 )
43 *
44 * @param $info Array information about current property
45 * @param &$val Mixed current value to validate
46 * @param $standalone Boolean if this is a simple property or array
47 */
48 public static function validateRational( $info, &$val, $standalone ) {
49 if ( !$standalone ) {
50 // this only validates standalone properties, not arrays, etc
51 return;
52 }
53 if ( !preg_match( '/^(?:-?\d+)\/(?:\d+[1-9]|[1-9]\d*)$/D', $val ) ) {
54 wfDebugLog( 'XMP', __METHOD__ . " Expected rational but got $val" );
55 $val = null;
56 }
57
58 }
59
60 /**
61 * function to validate rating properties -1, 0-5
62 *
63 * if its outside of range put it into range.
64 *
65 * @see MWG spec
66 * @param $info Array information about current property
67 * @param &$val Mixed current value to validate
68 * @param $standalone Boolean if this is a simple property or array
69 */
70 public static function validateRating( $info, &$val, $standalone ) {
71 if ( !$standalone ) {
72 // this only validates standalone properties, not arrays, etc
73 return;
74 }
75 if ( !preg_match( '/^[-+]?\d*(?:\.?\d*)$/D', $val )
76 || !is_numeric($val)
77 ) {
78 wfDebugLog( 'XMP', __METHOD__ . " Expected rating but got $val" );
79 $val = null;
80 return;
81 } else {
82 $nVal = (float) $val;
83 if ( $nVal < 0 ) {
84 // We do < 0 here instead of < -1 here, since
85 // the values between 0 and -1 are also illegal
86 // as -1 is meant as a special reject rating.
87 wfDebugLog( 'XMP', __METHOD__ . " Rating too low, setting to -1 (Rejected)");
88 $val = '-1';
89 return;
90 }
91 if ( $nVal > 5 ) {
92 wfDebugLog( 'XMP', __METHOD__ . " Rating too high, setting to 5");
93 $val = '5';
94 return;
95 }
96 }
97 }
98
99 /**
100 * function to validate integers
101 *
102 * @param $info Array information about current property
103 * @param &$val Mixed current value to validate
104 * @param $standalone Boolean if this is a simple property or array
105 */
106 public static function validateInteger( $info, &$val, $standalone ) {
107 if ( !$standalone ) {
108 // this only validates standalone properties, not arrays, etc
109 return;
110 }
111 if ( !preg_match( '/^[-+]?\d+$/D', $val ) ) {
112 wfDebugLog( 'XMP', __METHOD__ . " Expected integer but got $val" );
113 $val = null;
114 }
115
116 }
117
118 /**
119 * function to validate properties with a fixed number of allowed
120 * choices. (closed choice)
121 *
122 * @param $info Array information about current property
123 * @param &$val Mixed current value to validate
124 * @param $standalone Boolean if this is a simple property or array
125 */
126 public static function validateClosed( $info, &$val, $standalone ) {
127 if ( !$standalone ) {
128 // this only validates standalone properties, not arrays, etc
129 return;
130 }
131
132 //check if its in a numeric range
133 $inRange = false;
134 if ( isset( $info['rangeLow'] )
135 && isset( $info['rangeHigh'] )
136 && is_numeric( $val )
137 && ( intval( $val ) <= $info['rangeHigh'] )
138 && ( intval( $val ) >= $info['rangeLow'] )
139 ) {
140 $inRange = true;
141 }
142
143 if ( !isset( $info['choices'][$val] ) && !$inRange ) {
144 wfDebugLog( 'XMP', __METHOD__ . " Expected closed choice, but got $val" );
145 $val = null;
146 }
147 }
148
149 /**
150 * function to validate and modify flash structure
151 *
152 * @param $info Array information about current property
153 * @param &$val Mixed current value to validate
154 * @param $standalone Boolean if this is a simple property or array
155 */
156 public static function validateFlash( $info, &$val, $standalone ) {
157 if ( $standalone ) {
158 // this only validates flash structs, not individual properties
159 return;
160 }
161 if ( !( isset( $val['Fired'] )
162 && isset( $val['Function'] )
163 && isset( $val['Mode'] )
164 && isset( $val['RedEyeMode'] )
165 && isset( $val['Return'] )
166 ) ) {
167 wfDebugLog( 'XMP', __METHOD__ . " Flash structure did not have all the required components" );
168 $val = null;
169 } else {
170 $val = ( "\0" | ( $val['Fired'] === 'True' )
171 | ( intval( $val['Return'] ) << 1 )
172 | ( intval( $val['Mode'] ) << 3 )
173 | ( ( $val['Function'] === 'True' ) << 5 )
174 | ( ( $val['RedEyeMode'] === 'True' ) << 6 ) );
175 }
176 }
177
178 /**
179 * function to validate LangCode properties ( en-GB, etc )
180 *
181 * This is just a naive check to make sure it somewhat looks like a lang code.
182 *
183 * @see rfc 3066
184 * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart1.pdf page 30 (section 8.2.2.5)
185 *
186 * @param $info Array information about current property
187 * @param &$val Mixed current value to validate
188 * @param $standalone Boolean if this is a simple property or array
189 */
190 public static function validateLangCode( $info, &$val, $standalone ) {
191 if ( !$standalone ) {
192 // this only validates standalone properties, not arrays, etc
193 return;
194 }
195 if ( !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $val) ) {
196 //this is a rather naive check.
197 wfDebugLog( 'XMP', __METHOD__ . " Expected Lang code but got $val" );
198 $val = null;
199 }
200
201 }
202
203 /**
204 * function to validate date properties, and convert to (partial) Exif format.
205 *
206 * Dates can be one of the following formats:
207 * YYYY
208 * YYYY-MM
209 * YYYY-MM-DD
210 * YYYY-MM-DDThh:mmTZD
211 * YYYY-MM-DDThh:mm:ssTZD
212 * YYYY-MM-DDThh:mm:ss.sTZD
213 *
214 * @param $info Array information about current property
215 * @param &$val Mixed current value to validate. Converts to TS_EXIF as a side-effect.
216 * in cases where there's only a partial date, it will give things like
217 * 2011:04.
218 * @param $standalone Boolean if this is a simple property or array
219 */
220 public static function validateDate( $info, &$val, $standalone ) {
221 if ( !$standalone ) {
222 // this only validates standalone properties, not arrays, etc
223 return;
224 }
225 $res = array();
226 if ( !preg_match(
227 /* ahh! scary regex... */
228 '/^([0-3]\d{3})(?:-([01]\d)(?:-([0-3]\d)(?:T([0-2]\d):([0-6]\d)(?::([0-6]\d)(?:\.\d+)?)?([-+]\d{2}:\d{2}|Z)?)?)?)?$/D'
229 , $val, $res)
230 ) {
231 wfDebugLog( 'XMP', __METHOD__ . " Expected date but got $val" );
232 $val = null;
233 } else {
234 /*
235 * $res is formatted as follows:
236 * 0 -> full date.
237 * 1 -> year, 2-> month, 3-> day, 4-> hour, 5-> minute, 6->second
238 * 7-> Timezone specifier (Z or something like +12:30 )
239 * many parts are optional, some aren't. For example if you specify
240 * minute, you must specify hour, day, month, and year but not second or TZ.
241 */
242
243 /*
244 * First of all, if year = 0000, Something is wrongish,
245 * so don't extract. This seems to happen when
246 * some programs convert between metadata formats.
247 */
248 if ( $res[1] === '0000' ) {
249 wfDebugLog( 'XMP', __METHOD__ . " Invalid date (year 0): $val" );
250 $val = null;
251 return;
252 }
253
254 if ( !isset( $res[4] ) ) { //hour
255 //just have the year month day (if that)
256 $val = $res[1];
257 if ( isset( $res[2] ) ) {
258 $val .= ':' . $res[2];
259 }
260 if ( isset( $res[3] ) ) {
261 $val .= ':' . $res[3];
262 }
263 return;
264 }
265
266 if ( !isset( $res[7] ) || $res[7] === 'Z' ) {
267 //if hour is set, then minute must also be or regex above will fail.
268 $val = $res[1] . ':' . $res[2] . ':' . $res[3]
269 . ' ' . $res[4] . ':' . $res[5];
270 if ( isset( $res[6] ) && $res[6] !== '' ) {
271 $val .= ':' . $res[6];
272 }
273 return;
274 }
275
276
277 // Extra check for empty string necessary due to TZ but no second case.
278 $stripSeconds = false;
279 if ( !isset( $res[6] ) || $res[6] === '' ) {
280 $res[6] = '00';
281 $stripSeconds = true;
282 }
283
284 // Do timezone processing. We've already done the case that tz = Z.
285
286 // We know that if we got to this step, year, month day hour and min must be set
287 // by virtue of regex not failing.
288
289 $unix = wfTimestamp( TS_UNIX, $res[1] . $res[2] . $res[3] . $res[4] . $res[5] . $res[6] );
290 $offset = intval( substr( $res[7], 1, 2 ) ) * 60 * 60;
291 $offset += intval( substr( $res[7], 4, 2 ) ) * 60;
292 if ( substr( $res[7], 0, 1 ) === '-' ) {
293 $offset = -$offset;
294 }
295 $val = wfTimestamp( TS_EXIF, $unix + $offset );
296
297 if ( $stripSeconds ) {
298 // If seconds weren't specified, remove the trailing ':00'.
299 $val = substr( $val, 0, -3 );
300 }
301 }
302
303 }
304
305 /** function to validate, and more importantly
306 * translate the XMP DMS form of gps coords to
307 * the decimal form we use.
308 *
309 * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf
310 * section 1.2.7.4 on page 23
311 *
312 * @param $info Array unused (info about prop)
313 * @param &$val String GPS string in either DDD,MM,SSk or
314 * or DDD,MM.mmk form
315 * @param $standalone Boolean if its a simple prop (should always be true)
316 */
317 public static function validateGPS ( $info, &$val, $standalone ) {
318 if ( !$standalone ) {
319 return;
320 }
321
322 $m = array();
323 if ( preg_match(
324 '/(\d{1,3}),(\d{1,2}),(\d{1,2})([NWSE])/D',
325 $val, $m )
326 ) {
327 $coord = intval( $m[1] );
328 $coord += intval( $m[2] ) * (1/60);
329 $coord += intval( $m[3] ) * (1/3600);
330 if ( $m[4] === 'S' || $m[4] === 'W' ) {
331 $coord = -$coord;
332 }
333 $val = $coord;
334 return;
335 } elseif ( preg_match(
336 '/(\d{1,3}),(\d{1,2}(?:.\d*)?)([NWSE])/D',
337 $val, $m )
338 ) {
339 $coord = intval( $m[1] );
340 $coord += floatval( $m[2] ) * (1/60);
341 if ( $m[3] === 'S' || $m[3] === 'W' ) {
342 $coord = -$coord;
343 }
344 $val = $coord;
345 return;
346
347 } else {
348 wfDebugLog( 'XMP', __METHOD__
349 . " Expected GPSCoordinate, but got $val." );
350 $val = null;
351 return;
352 }
353 }
354 }