From: Brian Wolff <bawolff+wn@gmail.com>
Date: Fri, 6 Dec 2013 19:34:49 +0000 (-0400)
Subject: Add method to get entire text layer. This could be useful for search
X-Git-Tag: 1.31.0-rc.0~17643
X-Git-Url: http://git.cyclocoop.org/%22.%20%20%20generer_url_action%28%22logout%22%2C%22logout=prive%22%29%20.%20%20%20%22?a=commitdiff_plain;h=751b9ebab8c6bea9cf125d6c122613a5a8c89df4;p=lhc%2Fweb%2Fwiklou.git

Add method to get entire text layer. This could be useful for search

Change-Id: I22f5fc47aef3cf362cdf630980deea48fe531d45
---

diff --git a/includes/media/MediaHandler.php b/includes/media/MediaHandler.php
index 1dc74ce9fe..e9d0039f06 100644
--- a/includes/media/MediaHandler.php
+++ b/includes/media/MediaHandler.php
@@ -426,12 +426,37 @@ abstract class MediaHandler {
 	 * Currently overloaded by PDF and DjVu handlers
 	 * @param File $image
 	 * @param int $page Page number to get information for
-	 * @return bool|string Page text or false when no text found.
+	 * @return bool|string Page text or false when no text found or if
+	 *   unsupported.
 	 */
 	function getPageText( $image, $page ) {
 		return false;
 	}
 
+	/**
+	 * Get the text of the entire document.
+	 * @param File $file
+	 * @return bool|string The text of the document or false if unsupported.
+	 */
+	public function getEntireText( File $file ) {
+		$numPages = $file->pageCount();
+		if ( !$numPages ) {
+			// Not a multipage document
+			return $this->getPageText( $file, 1 );
+		}
+		$document = '';
+		for( $i = 1; $i <= $numPages; $i++ ) {
+			$curPage = $this->getPageText( $file, $i );
+			if ( is_string( $curPage ) ) {
+				$document .= $curPage . "\n";
+			}
+		}
+		if ( $document !== '' ) {
+			return $document;
+		}
+		return false;
+	}
+
 	/**
 	 * Get an array structure that looks like this:
 	 *