From f9173cb902cac38f75bf6525e0f8d861d50de8cc Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Thu, 5 Jan 2012 23:25:39 +0000 Subject: [PATCH] Make sure that if we fail to read the App13 (iptc) block of a JPG file, that that doesn't block other metadata from being read. Also makes sure if more then one app13 block is in the file, they are all read, not just the last one that appears in the file (This required some changes to tests since before the intermediate value was just one value, now its an array of all such blocks) --- RELEASE-NOTES-1.19 | 2 ++ includes/media/BitmapMetadataHandler.php | 16 +++++++++++++--- includes/media/JpegMetadataExtractor.php | 10 ++++++---- tests/phpunit/data/media/iptc-invalid-psir.jpg | Bin 0 -> 9574 bytes .../media/BitmapMetadataHandlerTest.php | 10 ++++++++++ .../media/JpegMetadataExtractorTest.php | 8 ++++---- 6 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 tests/phpunit/data/media/iptc-invalid-psir.jpg diff --git a/RELEASE-NOTES-1.19 b/RELEASE-NOTES-1.19 index 2cb77f7d99..e073ce5f15 100644 --- a/RELEASE-NOTES-1.19 +++ b/RELEASE-NOTES-1.19 @@ -223,6 +223,8 @@ production. * (bug 33321) Adding a line to MediaWiki:Sidebar that contains a pipe, but doesn't have any pipes after being transformed by MessageCache, causes exception on all pages. +* Files with IPTC blocks we can't read no longer prevent extraction of exif + or other metadata. === API changes in 1.19 === * (bug 19838) siprop=interwikimap can now use the interwiki cache. diff --git a/includes/media/BitmapMetadataHandler.php b/includes/media/BitmapMetadataHandler.php index d1caa67a38..8aab9bd5f2 100644 --- a/includes/media/BitmapMetadataHandler.php +++ b/includes/media/BitmapMetadataHandler.php @@ -32,7 +32,15 @@ class BitmapMetadataHandler { * @param String $app13 String containing app13 block from jpeg file */ private function doApp13 ( $app13 ) { - $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 ); + try { + $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 ); + } catch ( MWException $e ) { + // Error reading the iptc hash information. + // This probably means the App13 segment is something other than what we expect. + // However, still try to read it, and treat it as if the hash didn't exist. + wfDebug( "Error parsing iptc data of file: " . $e->getMessage() . "\n" ); + $this->iptcType = 'iptc-no-hash'; + } $iptc = IPTC::parse( $app13 ); $this->addMetadata( $iptc, $this->iptcType ); @@ -122,8 +130,10 @@ class BitmapMetadataHandler { if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) { $meta->addMetadata( Array( 'JPEGFileComment' => $seg['COM'] ), 'native' ); } - if ( isset( $seg['PSIR'] ) ) { - $meta->doApp13( $seg['PSIR'] ); + if ( isset( $seg['PSIR'] ) && count( $seg['PSIR'] ) > 0 ) { + foreach( $seg['PSIR'] as $curPSIRValue ) { + $meta->doApp13( $curPSIRValue ); + } } if ( isset( $seg['XMP'] ) && $showXMP ) { $xmp = new XMPReader(); diff --git a/includes/media/JpegMetadataExtractor.php b/includes/media/JpegMetadataExtractor.php index 4769bf8e56..224b4a2bed 100644 --- a/includes/media/JpegMetadataExtractor.php +++ b/includes/media/JpegMetadataExtractor.php @@ -31,6 +31,7 @@ class JpegMetadataExtractor { $segments = array( 'XMP_ext' => array(), 'COM' => array(), + 'PSIR' => array(), ); if ( !$filename ) { @@ -122,7 +123,7 @@ class JpegMetadataExtractor { // APP13 - PSIR. IPTC and some photoshop stuff $temp = self::jpegExtractMarker( $fh ); if ( substr( $temp, 0, 14 ) === "Photoshop 3.0\x00" ) { - $segments["PSIR"] = $temp; + $segments["PSIR"][] = $temp; } } elseif ( $buffer === "\xD9" || $buffer === "\xDA" ) { // EOI - end of image or SOS - start of scan. either way we're past any interesting segments @@ -162,11 +163,12 @@ class JpegMetadataExtractor { * This should generally be called by BitmapMetadataHandler::doApp13() * * @param String $app13 photoshop psir app13 block from jpg. + * @throws MWException (It gets caught next level up though) * @return String if the iptc hash is good or not. */ public static function doPSIR ( $app13 ) { if ( !$app13 ) { - return; + throw new MWException( "No App13 segment given" ); } // First compare hash with real thing // 0x404 contains IPTC, 0x425 has hash @@ -218,8 +220,8 @@ class JpegMetadataExtractor { // this should not happen, but check. if ( $lenData['len'] + $offset > $appLen ) { - wfDebug( __METHOD__ . " PSIR data too long.\n" ); - return 'iptc-no-hash'; + throw new MWException( "PSIR data too long. (item length=" . $lenData['len'] + . "; offset=$offset; total length=$appLen)" ); } if ( $valid ) { diff --git a/tests/phpunit/data/media/iptc-invalid-psir.jpg b/tests/phpunit/data/media/iptc-invalid-psir.jpg new file mode 100644 index 0000000000000000000000000000000000000000..01b9acf39ae22f46c142b4f532351c6bd55b2c6d GIT binary patch literal 9574 zcmeHrcT`i`_HF`*ARxU8D1iW>1QM#$LklD{6PmPxltgMER8c?_rS~4HLg-!TVxvik zs5C*a&;=<{r3nx3J;!m*{oV7s@4oWKd*iO`F|yYBzWL3$R_5Gm?;MXEe*!R}5J&_7 z2m}K3PCkI+6~M2ToIQO#eVjeLF33Qo0g9SPLny!=%KZ zlCsMBzsM@;UX+AtT59SVT{1N@l~K2Kw81#&o0y`1hyc$rGO{tViLkSapyi?R=zqH% zKLaq+040DVN+1Y8!3?Bi1|D|;&H;b`%9AYu0)941s#DabffOeoiU|NXML|V%iV}D- z$M}N-q&R_@Syb_Nrw?qu7GCk4^{A=*7;q=py?%dJZNhqG?sb6Uz%jru|2w_2EZi@DX%9s{2E^K} zH0=8v0tOA@FO;lPU6&}IFfqEalm9^zW$-m2_ktzx{ye42bS zdAB5}Qw$CESHi90wwJmwj$!=P) zX~tB%jguv3M81#(D>PH4z2t4U43W*lyn2My=E@A&PkMEdS++Bc;Q*D(!LWu<<0vn@BD=#YC}H9W1QuAx~sXeI3>fuEOFu)W0$ zmB%9S%1#!-lkm?4bs5UJgT=+0brjDbN z!nKA?v&igB)^eaH(uDi7wtrn#KXVB!GvC@tTrs7jY84WjHqs+%ps=EIg8J&Y+^3Zb zPFrfzj?sHZgDA_|`JA$gPCJy(6O?udzK`1;(SK$F3x@A@6=?@Tdoo}K_a@*9EYACe z;CV!_1m*RzPUbQ)!Vgw#I7}KHw10r-CRJW*lq!8%kFYU8ecc<48%f2eP^TD=>OZcv zU%Pn>sJ!U1GMXLrORUFec4(H53qiKMeZvj}g4^!1SG^@zv2#?$b0uDfh^L%wsnlmV z`z4rG=xF;%#pb-NyFYN``Sh-gtJmVYi|cc!ZWp}PMO-q9V6z%bAGzYTr53g>9tR=o z1vVC#f*af8r1au=M~85>_=-LHyTczU2y17R%AGPrJ>c$`WIcl2$mV`fOi{wXJZ!9R zprEJ+-XA@3*R|gkGHoJ`%w99-;rH!wO8PcF`~1z6;)k4eoC3t_|eK7G~~kIG5cZ zivs)9qD+aIcBWb)3rMm=?=MGv#_ z&InxHbwj9)d>Bb}4-~yqEG{*tIO@vM)?{+mx#ibBDM2YX%Wol8P~T^{UplBvED1!v z-#4gj&MP9fY(udNmsQf&?Zb$-gAlR zTt-n6C;S*Ng7l<}sw4VHK{F1y0rixyz5yHB2gd+|(hAi2!_;)>%qU#KBkhQ^`S^f2 zPWp$}zivSC&4K(Qx?{ixC4$yg=`o<@=waMTM23F&To|>4MMKSvRm!~SdH%GnN$Cz2 z3vRa&4p*8wp$9`Z>;~S}?VWi;GQPzc3vny$P1*Mi>e=A4eQzY6TJxwz5X5KNY8vS0 z8jS2}Hr62bp$uyCWT#YY9&w6u({OgIb8KUW2+L-;=7_vOmQN;gnJi)$=)3Wh?sLYeu3rf-veZBpQtAY~RU6aLJ1y@-r_kO+OmHM^L}^xD}z`GMZR!sz@> zGMRi@oI~YEq{1x6zd@dksYpy*Ldz@rz++oHLwS-RI?gR}4*kB09Nh&AN9soxc=Xa? z@NP0IwIiipQ@I;BMdZYMIX{3z;?K>GD|=3hQ>`+WpiBdJLYzd#uoB?ink-%nO{I3> z!p(JH|A?5Mo{d81)|_cx;F4ofFEI*T)PWjKsVi;kC!U{myWbjjF1AcU*&7|x;9PUv2F^*4;G+2o7An$#w zL0hR)np`&3x|qi4>v?~k_U$_BHKW;N@yjWLM?o=}XS{{`A&1_{sq4bBh}7q-5xCef zG_hpY=y2~0{ZW}wO#8OqXtAE@x55{J5uE;UP?4mG-N+bXF8NjFm%mPq8xhm0#zmSY z39P;^to%~I?86>uDKQ!FH8;S_7yU|&N^*vI_T?Cw5A`n6Imj0-)c!`YH{*^0ZP!`X z6nK;q+8$^b3XL|L*93_ZV`+o9jj~~$>diHvd)Rw?`skx4w*5H0weK!k;Afx%T|Q1wU0O%RXw&AIdMI1f_n4*eMP z4jByX(e(4Cwr`$9WQy2z8cr!E>s2`z>+fwq9d&}#nLW*I75KZ%T_n}9?8(|;8uoKN z%OL^4gQV4NAvb3GbE9#>tWk<97Y2H}6V{)8i$9`5d@`BQ zbRI6AsZURKRl^}cpeKjnyie#$3^SYZGcK8&Rq})i^s3wy9hkp%Q#o`Xj{hc`nb^QZ zt6X3vCXGE^pe1ajX}J1ZGdZN{a*9T1yQ`RXw_9&X4#`_Yuf`G0iJ7;6M%WJxUqetW zuD-;ZewI?vHs)2>=>518-7Wm}F-#irm8c*4kOBpk*F)Rt8y?NTl|LLl^RcnkFuhhU zZ*jR?twii4X_9bIrr7t^7>dYIH+Dy@3QhBP;92g=@YuTAN~Wh}H4o~W;(20M3*oKN z-P$^y0wbfJ*WJ9LC5n`8qk4xG2WzY7pwWVBdv z`R;`~!d40JxP8MqqoYnp4z?Iqnd%a_0rKi+-Y{$HFFIV77Szq}Vn;oUH!t}G$5y0+ zjl^I1QQwbOmCCVda$-4T8fRJQUQ3~?K3{y)=W#f=tBN3n?>Ai2NonW(DD6F`wF4bT zVCO6C);25G`4M5r=WUV+gL})ZGaxx`mEZM5J%_Jxv$EMbx*B3F6M%zVCL2y zibkcW?_|zNK$JJcxi_FQn9AX$i1TT%^2%m{QAXC3^ds&(m$d$#w4vM@huWbVcQ2?S z%Z5z1l5vHib4mpm0vJas@15(ZPF_Mtg`}O|J_d{uT1p2Oy(Vj~bVctptOY;wT5ht; z=-AGfoTe?cWnUO~nlNYeo!GCQ+BWO!I&6CpfJki_)??*sbX=eCGhZboykRg| zH}-XNfgIc7&v`nzN&n8rL-|s~) zRj*KW3S1q`*VB?=47;~&p4H)@xUDI!cP+{g;wcp66wDxe_0=AuvPULEO_*(kI<<{& zu?Zu}(!s^Wu=jzCC3{ao(cv0?Is}w^#ti!zZU1%ODuJCOuyl)<5=V4E!u2*l7OSCK z3bQW~cxTlPJm5M$1UDg4%tlbS7JC$a!ui?%h@87@oeNOe41cD%d2&X)Vzf=WA|tr91&0mc7?j2Zjg>rKu^^= zUODuk%59XzWY`o*EmcBEfMIW@G`=`!zESSpG2q(b;!E`9?*$en7fU7EngpQ&Dyb}S zxa`t5H$TX6pNHbEC>;ZaueixxAKS0dl~erQ)ciojp~=MHozJ%78|TEgWeS+otoZ9@ z#BY49K5P9=i93=8!R9Hdj}i3bN^HWZmFV{#Ar?UMIga2uNco3@BPovi z)m1&w6Y>7)?CVs-jZY8TFO#&N=OheUOK?RmF1Yda*DmYAG>!pAyER9P$AJ1P!p2N$ zZ<(7YZV;wm`&Kw5yi~Rk&(|!&fOaRl_=4}4>q%PP9!gkM5vOd=K5w!Qn)$u<&1A`C zX_296)0-}zUEO`v-a)u}&$16*&B2me&!cZi(mL+=brGy3BwkcV7AMW7rc{kZo-Ra{ znM^6B&ke-!xgB7>lxgIDitS2btdERA(uJMv|A;+%{CmA@Q(N+R+YAf)njsy!D`RVQCCOtH?|=kX$XrOe4f zs5Buhw&rc^_$SVBZ$r!QnW0{H2uaLLmCGuqvJh$uwOPY|l33SeJ0i+U%2RHwxGkAG z6g|G%NRy1(Uk(uem_ew1Z^e5J&5163=CY85iXB{Mb(4YOVsc+9EY>dU#%BYMl&8p_ zVL8LDK6UjeFWcYPclGDC)x5?H@k}+k$&EJOWy(&DqE1JPCL;~JLH$C^8&KOu;D<+0 zaZwk1uSY5$B=J3juT2=(j6EITa13 zU>}M;vXKfYazrF=RHRFQ@jG0YXx(Tx9fgkZ?!8an-S{PHCZ8WPHsJ%B8I3~R72iY~ zL>bgSe>b2w1r`a(d6n2(xo&XAQFbA5E0Dn;H650EC;3v!LC#a<W{p78r2Qj!{pBcq zkXj^~1#inp!>DPduNJJJrIa&!k-K?yWio))yRU9+IiPFo&XNoZ z+O%X47Ir^9GM_3@ylW^%QTo|LS9lz^H^IKvztUtI9zpS0#jx{ZvHhFrgp7!l1B)9T zHzxW-sAlK6O8x%$CZZG z&BGW4-fX*i9<4g7RJ1D(mM`aqu`fKJA>GMeaC0sM@rzP~pswFeJ|TQ`Q=yNbCy0TX ztBA~bf3c@8w^eexGus+ud6rX0sm~mHNWLRl+2!4wg`#y0MEf~SSpy3>_?sTDHEAGM z#Y0~Tmtaib3|YP@(Mw23XAw(sc1dn*b-|JorR6RYsoUV{RP7hq11my(8F*JvoUwNZ zGCy;cm!`Xs1&OspK60|96~Sv;(+4O%q4RAS0c+)Gsw`fUo+fOIl{O0LRK6MkIu(Qhg z`hiFc56U3v#*`pvF~pw>)BGW`Gvf86%LXyNP=CGBPWzGWQXp9@Us5-emi3xjH>Q$ zS#E=&T@IbWMqPu``gfn{7AKUC%qo;V&&Up~`}Fwh$EuovcCg76$%sr-u0HH`c6_6U z(BSl{Zj_E2_hbxlhXc|zohdNtGJMsd&9XE@f^KPa%)CNU?o*)pic zw#DA*I7xkRa>?^NB&4>mSLi!w6KVUD^8(Q*O#N&lTcG0gtFmbm2YQ#AZBAK0d>0&w z#S;agN=Az=2Bg<^w;LjsKXoWxmx+Ad-SEKh!E?`@>$Iz!K+AG>YS}$$M}?BElv|{9 zbE!v+P>anb^nrnBq{~hUduo#a%A`lQZ-D2?3_6gw_pvl<%8Scy-)0f!2-C-<7F!A; zyK}mQt9nwx-%C0ky`G9bx0TwTB)9V+8|GqC+~?JpjI@Lx{XyscT)zLLrh)tyIt9A9 zo*72MXb9obtPb>=y!NqFV_{4YT#i8b-@(AUXM#4r=M^XLM~G?8-5(tfu1{*fLbP#3 zA&W^LYaIAfV$lUdnFWKy5`=XOHs`WohambrqVREK8~L=#3~TfC#|BxKz^2?f5ZUb< zQwNNB8uMg--8b%68fA~4qxvZtkmjc4%)kY=7bwy?yLy^#h$c zFR%s#1Ohon#vHw&NNp;YTPgx^zj108Wk~{mdyizrh9)d(#;gh+1HO8WmXXcNj7>;Y zo$wuA#CVcuWdYi1HpEHF5!f{-Y})LaJjXC_J{vYVItAYp1v+y`AB4wN<}v5h5O;uq>M z9(6^YybZK)GiO*Qrc+T*+AJEq*^Rz;x2xC66)%)pKQHu^)H>Nx+Oih_`#3{?Mezft zX^$E023Oc+*Z4^jjH7{s5oAz#yq>t1)?XvTN3!IJtbCe2V;KpaYt0ci?At*Hat9_J zo|p=Ea@|qv;G(Ug7C0;2P2sK)sTB(g&1Q07`b#jMY42Z?{zBmwfo_1}Fi( zqfGAG>G-MaX1bPJnGjq!{uA^El*Rq%?~pTYm16Y@?u_OA-zlrVLw~Sx{=4;0=%>}A z@PCBxQ(6C7@TbsE_J1SvU!kAw{}yy81h)#1+5ATTy&G5PHH@b;R`+@}iE~w8Kzx$@ z&-6dplO`asbQ0j?8o*#Q!)ey++5~JUo0LjdgH^&vU{M=ll(0VUc)yANq5`-CUgt(kC(?z-R)zweCRT-jLS=R3$)xNOS@-tIWkSQ&vVKDTS5>1T1esl zl+Yg|qBxgH&5$z3g>{$RDjAb488Z{K<0GckV$a)`6sbKwxddL|uv~v0QlsNkAv+9^ z9ono7PxFZF;r z^?)jZkC+-9Cffby;Q_-RqBx+wG&Goh<>X47c?b^^{Ua!Tr4ROp)wpx(j{Pa*-i{IqwDKw2_QdIYBjnMl3KqKaABxn^6p0lGgapg+d*2t^Wv+cR`xL` zP?*tMZ_Xe&)9u0=BkPR6(KMA>MVc1%PauX92e!Y9s{BTyS^Av!u}Jc-d<#|-{B(V& z4c&Du`-1pZG0o3Ye*MkkrQJJzzQ&$|>ABUjJpPXoA5E2)RGs@!6M5}78uQ2-)u$+{ z6UgP=s^(gFd0xHe^(O+q({wGgcfLbh-W`GdWoS2<>7M-NGvap{K3mp_t)ddQ7U~oK z|Cd5r)i}}rkL$nxZvXQe{xtG`IZfA@{za|;e-_B!a<{|=KfilePath . + 'iptc-invalid-psir.jpg' ); + $this->assertEquals( 'Created with GIMP', $meta['JPEGFileComment'][0] ); + } + public function testIPTCDates() { $meta = BitmapMetadataHandler::Jpeg( $this->filePath . 'iptc-timetest.jpg' ); diff --git a/tests/phpunit/includes/media/JpegMetadataExtractorTest.php b/tests/phpunit/includes/media/JpegMetadataExtractorTest.php index 5bf9bfe7c7..f48382a431 100644 --- a/tests/phpunit/includes/media/JpegMetadataExtractorTest.php +++ b/tests/phpunit/includes/media/JpegMetadataExtractorTest.php @@ -59,7 +59,7 @@ class JpegMetadataExtractorTest extends MediaWikiTestCase { public function testPSIRExtraction() { $res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-psir.jpg' ); $expected = '50686f746f73686f7020332e30003842494d04040000000000181c02190004746573741c02190003666f6f1c020000020004'; - $this->assertEquals( $expected, bin2hex( $res['PSIR'] ) ); + $this->assertEquals( $expected, bin2hex( $res['PSIR'][0] ) ); } public function testXMPExtractionAltAppId() { $res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-alt.jpg' ); @@ -70,19 +70,19 @@ class JpegMetadataExtractorTest extends MediaWikiTestCase { public function testIPTCHashComparisionNoHash() { $segments = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-psir.jpg' ); - $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'] ); + $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'][0] ); $this->assertEquals( 'iptc-no-hash', $res ); } public function testIPTCHashComparisionBadHash() { $segments = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-iptc-bad-hash.jpg' ); - $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'] ); + $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'][0] ); $this->assertEquals( 'iptc-bad-hash', $res ); } public function testIPTCHashComparisionGoodHash() { $segments = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-iptc-good-hash.jpg' ); - $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'] ); + $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'][0] ); $this->assertEquals( 'iptc-good-hash', $res ); } -- 2.20.1