* The SVGMetadataExtractor now based on XmlReader
[lhc/web/wiklou.git] / includes / media / DjVu.php
index c2973f9..cc3f1db 100644 (file)
@@ -1,10 +1,14 @@
 <?php
 /**
+ * Handler for DjVu images
+ *
  * @file
  * @ingroup Media
  */
  
 /**
+ * Handler for DjVu images
+ *
  * @ingroup Media
  */
 class DjVuHandler extends ImageHandler {
@@ -52,8 +56,6 @@ class DjVuHandler extends ImageHandler {
                $m = false;
                if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
                        return array( 'width' => $m[2], 'page' => $m[1] );
-               } else if ( preg_match( '/^page(\d+)-djvutxt$/', $str, $m ) ) {
-                       return array( 'djvutxt' => 1, 'page' => $m[1] );
                } else {
                        return false;
                }
@@ -66,21 +68,8 @@ class DjVuHandler extends ImageHandler {
                );
        }
 
-       function normaliseParams( $image, &$params ) {
-               global $wgDjvuTxt;
-               if( $params['djvutxt'] && $wgDjvuTxt) {
-                       if ( !isset( $params['page'] ) ) {
-                               $params['page'] = 1;
-                       }
-                       $params['width'] = 0;
-                       $params['height'] = 0;
-                       return true;
-               } 
-               else return parent::normaliseParams( $image, $params );
-       }
-
        function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
-               global $wgDjvuRenderer, $wgDjvuPostProcessor, $wgDjvuTxt;
+               global $wgDjvuRenderer, $wgDjvuPostProcessor;
 
                // Fetch XML and check it, to give a more informative error message than the one which
                // normaliseParams will inevitably give.
@@ -109,36 +98,19 @@ class DjVuHandler extends ImageHandler {
                        return new MediaTransformError( 'thumbnail_error', $width, $height, wfMsg( 'thumbnail_dest_directory' ) );
                }
 
-               if( $params['djvutxt'] && $wgDjvuTxt ) {
-                       # Extract djvu text
-                       $cmd = wfEscapeShellArg( $wgDjvuTxt ) . " --page={$page} " . wfEscapeShellArg( $srcPath ) ;
-                       wfProfileIn( 'djvutxt' );
-                       wfDebug( __METHOD__.": $cmd\n" );
-                       $err = wfShellExec( $cmd, $retval );
-                       wfProfileOut( 'djvutxt' );
-                       # Escape html characters
-                       $txt = htmlspecialchars( $err );
-                       # Write result to file
-                       if($retval == 0) {
-                               $f = fopen($dstPath, 'w');
-                               fwrite($f, $txt);
-                               fclose($f);
-                       }
-               }
-               else {
-                       # Use a subshell (brackets) to aggregate stderr from both pipeline commands
-                       # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
-                       $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " .
-                               wfEscapeShellArg( $srcPath );
-                       if ( $wgDjvuPostProcessor ) {
-                               $cmd .= " | {$wgDjvuPostProcessor}";
-                       }
-                       $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1';
-                       wfProfileIn( 'ddjvu' );
-                       wfDebug( __METHOD__.": $cmd\n" );
-                       $err = wfShellExec( $cmd, $retval );
-                       wfProfileOut( 'ddjvu' );
+               # Use a subshell (brackets) to aggregate stderr from both pipeline commands
+               # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
+               $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " .
+                       wfEscapeShellArg( $srcPath );
+               if ( $wgDjvuPostProcessor ) {
+                       $cmd .= " | {$wgDjvuPostProcessor}";
                }
+               $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1';
+               wfProfileIn( 'ddjvu' );
+               wfDebug( __METHOD__.": $cmd\n" );
+               $retval = '';
+               $err = wfShellExec( $cmd, $retval );
+               wfProfileOut( 'ddjvu' );
 
                $removed = $this->removeBadFile( $dstPath, $retval );
                if ( $retval != 0 || $removed ) {
@@ -168,7 +140,7 @@ class DjVuHandler extends ImageHandler {
        /**
         * Cache a document tree for the DjVu XML metadata
         */
-       function getMetaTree( $image ) {
+       function getMetaTree( $image , $gettext = false ) {
                if ( isset( $image->dejaMetaTree ) ) {
                        return $image->dejaMetaTree;
                }
@@ -182,22 +154,39 @@ class DjVuHandler extends ImageHandler {
 
                wfSuppressWarnings();
                try {
-                       $image->dejaMetaTree = new SimpleXMLElement( $metadata );
-               } catch( Exception $e ) {
-                       wfDebug( "Bogus multipage XML metadata on '$image->name'\n" );
                        // Set to false rather than null to avoid further attempts
                        $image->dejaMetaTree = false;
+                       $image->djvuTextTree = false;
+                       $tree = new SimpleXMLElement( $metadata );
+                       if( $tree->getName() == 'mw-djvu' ) {
+                               foreach($tree->children() as $b){ 
+                                       if( $b->getName() == 'DjVuTxt' ) {
+                                               $image->djvuTextTree = $b;
+                                       }
+                                       else if ( $b->getName() == 'DjVuXML' ) {
+                                               $image->dejaMetaTree = $b;
+                                       }
+                               }
+                       } else {
+                               $image->dejaMetaTree = $tree;
+                       }
+               } catch( Exception $e ) {
+                       wfDebug( "Bogus multipage XML metadata on '$image->name'\n" );
                }
                wfRestoreWarnings();
                wfProfileOut( __METHOD__ );
-               return $image->dejaMetaTree;
+               if( $gettext ) {
+                       return $image->djvuTextTree;
+               } else {
+                       return $image->dejaMetaTree;
+               }
        }
 
        function getImageSize( $image, $path ) {
                return $this->getDjVuImage( $image, $path )->getImageSize();
        }
 
-       function getThumbType( $ext, $mime ) {
+       function getThumbType( $ext, $mime, $params = null ) {
                global $wgDjvuOutputExtension;
                static $mime;
                if ( !isset( $mime ) ) {
@@ -244,4 +233,21 @@ class DjVuHandler extends ImageHandler {
                        return false;
                }
        }
+
+       function getPageText( $image, $page ){
+               $tree = $this->getMetaTree( $image, true );
+               if ( !$tree ) {
+                       return false;
+               }
+
+               $o = $tree->BODY[0]->PAGE[$page-1];
+               if ( $o ) {
+                       $txt = $o['value'];
+                       return $txt;
+               } else {
+                       return false;
+               }
+
+       }
+
 }