From: ThomasV Date: Thu, 4 Jun 2009 09:16:25 +0000 (+0000) Subject: store djvu text layer in img_metadata. fetch it in proofreadpage X-Git-Tag: 1.31.0-rc.0~41506 X-Git-Url: http://git.cyclocoop.org/%22%20.%20generer_url_aide%28?a=commitdiff_plain;h=22baa5d0906c96f3be7d9ab21138cdff8330f500;p=lhc%2Fweb%2Fwiklou.git store djvu text layer in img_metadata. fetch it in proofreadpage --- diff --git a/includes/DjVuImage.php b/includes/DjVuImage.php index 8e7caf63d0..fbb2586529 100644 --- a/includes/DjVuImage.php +++ b/includes/DjVuImage.php @@ -224,7 +224,7 @@ class DjVuImage { * @return string */ function retrieveMetaData() { - global $wgDjvuToXML, $wgDjvuDump; + global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt; if ( isset( $wgDjvuDump ) ) { # djvudump is faster as of version 3.5 # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583 @@ -242,6 +242,22 @@ class DjVuImage { } else { $xml = null; } + # Text layer + if ( isset( $wgDjvuTxt ) ) { + wfProfileIn( 'djvutxt' ); + $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ; + wfDebug( __METHOD__.": $cmd\n" ); + $txt = wfShellExec( $cmd, $retval ); + wfProfileOut( 'djvutxt' ); + if( $retval == 0) { + $txt = htmlspecialchars($txt); + $txt = preg_replace( "/\(page\s\d*\s\d*\s\d*\s\d*\s*\"(.*?)\"\s*\)/s", "", $txt ); + $txt = preg_replace( "/\(\)/", "", $txt ); + $txt = "\n\n\n" . $txt . "\n\n"; + $xml = preg_replace( "//", "", $xml ); + $xml = $xml . $txt. '' ; + } + } return $xml; } diff --git a/includes/media/DjVu.php b/includes/media/DjVu.php index 66e954d42c..38c16c21fb 100644 --- a/includes/media/DjVu.php +++ b/includes/media/DjVu.php @@ -135,7 +135,7 @@ class DjVuHandler extends ImageHandler { /** * Cache a document tree for the DjVu XML metadata */ - function getMetaTree( $image ) { + function getMetaTree( $image , $gettext = false ) { if ( isset( $image->dejaMetaTree ) ) { return $image->dejaMetaTree; } @@ -149,15 +149,32 @@ class DjVuHandler extends ImageHandler { wfSuppressWarnings(); try { - $image->dejaMetaTree = new SimpleXMLElement( $metadata ); - } catch( Exception $e ) { - wfDebug( "Bogus multipage XML metadata on '$image->name'\n" ); // Set to false rather than null to avoid further attempts $image->dejaMetaTree = false; + $image->djvuTextTree = false; + $tree = new SimpleXMLElement( $metadata ); + if( $tree->getName() == 'mw-djvu' ) { + foreach($tree->children() as $b){ + if( $b->getName() == 'DjVuTxt' ) { + $image->djvuTextTree = $b; + } + else if ( $b->getName() == 'DjVuXML' ) { + $image->dejaMetaTree = $b; + } + } + } else { + $image->dejaMetaTree = $tree; + } + } catch( Exception $e ) { + wfDebug( "Bogus multipage XML metadata on '$image->name'\n" ); } wfRestoreWarnings(); wfProfileOut( __METHOD__ ); - return $image->dejaMetaTree; + if( $gettext ) { + return $image->djvuTextTree; + } else { + return $image->dejaMetaTree; + } } function getImageSize( $image, $path ) { @@ -211,4 +228,21 @@ class DjVuHandler extends ImageHandler { return false; } } + + function getPageText( $image, $page ){ + $tree = $this->getMetaTree( $image, true ); + if ( !$tree ) { + return false; + } + + $o = $tree->BODY[0]->PAGE[$page-1]; + if ( $o ) { + $txt = $o['value']; + return $txt; + } else { + return false; + } + + } + }