From 9b48c297fb7d078d2b79597d72612e885aadd707 Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Fri, 6 Dec 2013 16:29:30 -0400 Subject: [PATCH] Normalize newlines in DjVu text-layer metadata. Currently, newlines in DjVu text layer are stored as the literal string '\n'. Its up to the consumer to unescape that into a real newline. Other formats like pdfs return newlines as an actual \n character when getPageText() is called. I think getPageText() should not require callers to do this. Change-Id: Ie1a438bbce5444c53ff6b7b3aaf2b5267ba3c8b4 --- includes/media/DjVuImage.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/includes/media/DjVuImage.php b/includes/media/DjVuImage.php index 9bfc378d08..971c865236 100644 --- a/includes/media/DjVuImage.php +++ b/includes/media/DjVuImage.php @@ -306,7 +306,9 @@ EOR; function pageTextCallback( $matches ) { # Get rid of invalid UTF-8, strip control characters - return ''; + $val = htmlspecialchars( UtfNormal::cleanUp( stripcslashes( $matches[1] ) ) ); + $val = str_replace( array( "\n", '�' ), array( ' ', '' ), $val ); + return ''; } /** -- 2.20.1