From: Brian Wolff Date: Fri, 6 Dec 2013 19:34:49 +0000 (-0400) Subject: Add method to get entire text layer. This could be useful for search X-Git-Tag: 1.31.0-rc.0~17643 X-Git-Url: http://git.cyclocoop.org/%22.%20%20%20generer_url_action%28%22logout%22%2C%22logout=prive%22%29%20.%20%20%20%22?a=commitdiff_plain;h=751b9ebab8c6bea9cf125d6c122613a5a8c89df4;p=lhc%2Fweb%2Fwiklou.git Add method to get entire text layer. This could be useful for search Change-Id: I22f5fc47aef3cf362cdf630980deea48fe531d45 --- diff --git a/includes/media/MediaHandler.php b/includes/media/MediaHandler.php index 1dc74ce9fe..e9d0039f06 100644 --- a/includes/media/MediaHandler.php +++ b/includes/media/MediaHandler.php @@ -426,12 +426,37 @@ abstract class MediaHandler { * Currently overloaded by PDF and DjVu handlers * @param File $image * @param int $page Page number to get information for - * @return bool|string Page text or false when no text found. + * @return bool|string Page text or false when no text found or if + * unsupported. */ function getPageText( $image, $page ) { return false; } + /** + * Get the text of the entire document. + * @param File $file + * @return bool|string The text of the document or false if unsupported. + */ + public function getEntireText( File $file ) { + $numPages = $file->pageCount(); + if ( !$numPages ) { + // Not a multipage document + return $this->getPageText( $file, 1 ); + } + $document = ''; + for( $i = 1; $i <= $numPages; $i++ ) { + $curPage = $this->getPageText( $file, $i ); + if ( is_string( $curPage ) ) { + $document .= $curPage . "\n"; + } + } + if ( $document !== '' ) { + return $document; + } + return false; + } + /** * Get an array structure that looks like this: *