*/
$wgDiff = '/usr/bin/diff';
+/**
+ * Path to the GNU sed utility.
+ */
+$wgSed = '/bin/sed';
+
/**
* We can also compress text stored in the 'text' table. If this is set on, new
* revisions will be compressed on page save if zlib support is available. Any
# $wgDjvuRenderer = 'ddjvu';
$wgDjvuRenderer = null;
+/**
+ * Path of the djvutxt DJVU text extraction utility
+ * Enable this and $wgDjvuDump to enable text layer extraction from djvu files
+ */
+# $wgDjvuTxt = 'djvutxt';
+$wgDjvuTxt = null;
+
/**
* Path of the djvutoxml executable
* This works like djvudump except much, much slower as of version 3.5.
$m = false;
if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
return array( 'width' => $m[2], 'page' => $m[1] );
+ } else if ( preg_match( '/^page(\d+)-djvutxt$/', $str, $m ) ) {
+ return array( 'djvutxt' => 1, 'page' => $m[1] );
} else {
return false;
}
);
}
+
+ function normaliseParams( $image, &$params ) {
+ global $wgDjvuTxt;
+ if( $params['djvutxt'] && $wgDjvuTxt) {
+ if ( !isset( $params['page'] ) ) {
+ $params['page'] = 1;
+ }
+ $params['width'] = 0;
+ $params['height'] = 0;
+ return true;
+ }
+ else return parent::normaliseParams( $image, $params );
+ }
+
function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
- global $wgDjvuRenderer, $wgDjvuPostProcessor;
+ global $wgDjvuRenderer, $wgDjvuPostProcessor, $wgDjvuTxt, $wgSed;
// Fetch XML and check it, to give a more informative error message than the one which
// normaliseParams will inevitably give.
# Use a subshell (brackets) to aggregate stderr from both pipeline commands
# before redirecting it to the overall stdout. This works in both Linux and Windows XP.
- $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " .
- wfEscapeShellArg( $srcPath );
- if ( $wgDjvuPostProcessor ) {
- $cmd .= " | {$wgDjvuPostProcessor}";
+
+ if( $params['djvutxt'] && $wgDjvuTxt && $wgSed ) {
+ #Read text from djvu
+ $cmd = '(' . wfEscapeShellArg( $wgDjvuTxt ) . " --page={$page} " . wfEscapeShellArg( $srcPath );
+ #Escape < > & characters
+ $cmd .= ' | ' . wfEscapeShellArg( $wgSed ) . ' "s/\&/\&/g ; s/</\</g ; s/>/\>/g ; s/\"/\"/g "';
+ $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1';
+ }
+ else {
+ $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " .
+ wfEscapeShellArg( $srcPath );
+ if ( $wgDjvuPostProcessor ) {
+ $cmd .= " | {$wgDjvuPostProcessor}";
+ }
+ $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1';
}
- $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1';
wfProfileIn( 'ddjvu' );
wfDebug( __METHOD__.": $cmd\n" );
$err = wfShellExec( $cmd, $retval );