From 29c1e061becfe6db5f000321a6cca5c7142e97a3 Mon Sep 17 00:00:00 2001 From: Pascal Bach Date: Thu, 23 Jul 2020 21:45:14 +0200 Subject: [PATCH] ocrmypdf: 10.2.0 -> 10.3.0 Add patch to make it compatible with pdfminer 20200720 Submitted upstream as https://github.com/jbarlow83/OCRmyPDF/pull/596 --- ...e-with-pdfminer.six-version-20200720.patch | 52 +++++++++++++++++++ pkgs/tools/text/ocrmypdf/default.nix | 6 ++- 2 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 pkgs/tools/text/ocrmypdf/0001-Make-compatible-with-pdfminer.six-version-20200720.patch diff --git a/pkgs/tools/text/ocrmypdf/0001-Make-compatible-with-pdfminer.six-version-20200720.patch b/pkgs/tools/text/ocrmypdf/0001-Make-compatible-with-pdfminer.six-version-20200720.patch new file mode 100644 index 000000000000..967bcd3948c2 --- /dev/null +++ b/pkgs/tools/text/ocrmypdf/0001-Make-compatible-with-pdfminer.six-version-20200720.patch @@ -0,0 +1,52 @@ +From 4315b58e0bffedd145cec61f96062292cd98278e Mon Sep 17 00:00:00 2001 +From: Pascal Bach +Date: Thu, 23 Jul 2020 21:37:33 +0200 +Subject: [PATCH] Make compatible with pdfminer.six version 20200720 + +--- + setup.py | 2 +- + src/ocrmypdf/pdfinfo/layout.py | 8 ++++++-- + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/setup.py b/setup.py +index bd95ed9..d1f4ab1 100644 +--- a/setup.py ++++ b/setup.py +@@ -83,7 +83,7 @@ setup( + 'cffi >= 1.9.1', # must be a setup and install requirement + 'coloredlogs >= 14.0', # strictly optional + 'img2pdf >= 0.3.0, < 0.4', # pure Python, so track HEAD closely +- 'pdfminer.six >= 20191110, <= 20200517', ++ 'pdfminer.six >= 20191110, <= 20200720', + 'pikepdf >= 1.14.0, < 2', + 'Pillow >= 7.0.0', + 'pluggy >= 0.13.0', +diff --git a/src/ocrmypdf/pdfinfo/layout.py b/src/ocrmypdf/pdfinfo/layout.py +index 98bd82e..8b41e14 100644 +--- a/src/ocrmypdf/pdfinfo/layout.py ++++ b/src/ocrmypdf/pdfinfo/layout.py +@@ -26,7 +26,11 @@ import pdfminer.pdfdevice + import pdfminer.pdfinterp + from pdfminer.converter import PDFLayoutAnalyzer + from pdfminer.layout import LAParams, LTChar, LTPage, LTTextBox +-from pdfminer.pdfdocument import PDFTextExtractionNotAllowed ++try: ++ from pdfminer.pdfdocument import PDFTextExtractionNotAllowedError ++except ImportError: ++ # Fallback for pdfminer < 20200720 ++ from pdfminer.pdfdocument import PDFTextExtractionNotAllowed as PDFTextExtractionNotAllowedError + from pdfminer.pdffont import PDFSimpleFont, PDFUnicodeNotDefined + from pdfminer.pdfpage import PDFPage + from pdfminer.utils import bbox2str, matrix2str +@@ -239,7 +243,7 @@ def get_page_analysis(infile, pageno, pscript5_mode): + with Path(infile).open('rb') as f: + page = PDFPage.get_pages(f, pagenos=[pageno], maxpages=0) + interp.process_page(next(page)) +- except PDFTextExtractionNotAllowed: ++ except PDFTextExtractionNotAllowedError: + raise EncryptedPdfError() + finally: + if pscript5_mode: +-- +2.27.0 + diff --git a/pkgs/tools/text/ocrmypdf/default.nix b/pkgs/tools/text/ocrmypdf/default.nix index 84e0bfb78d17..b7864b05b6e1 100644 --- a/pkgs/tools/text/ocrmypdf/default.nix +++ b/pkgs/tools/text/ocrmypdf/default.nix @@ -29,14 +29,14 @@ let in buildPythonApplication rec { pname = "ocrmypdf"; - version = "10.2.0"; + version = "10.3.0"; disabled = ! python3Packages.isPy3k; src = fetchFromGitHub { owner = "jbarlow83"; repo = "OCRmyPDF"; rev = "v${version}"; - sha256 = "1dkxhy3bjl48948jj2k6d684sd76xw1q427qc4hmxncr0wxj0ljp"; + sha256 = "0c6v7846lmkmbyfla07s35mpba4h09h0fx6pxqf0yvdjxmj46q8c"; }; nativeBuildInputs = with python3Packages; [ @@ -76,6 +76,8 @@ buildPythonApplication rec { src = ./liblept.patch; liblept = "${stdenv.lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}"; }) + # https://github.com/jbarlow83/OCRmyPDF/pull/596 + ./0001-Make-compatible-with-pdfminer.six-version-20200720.patch ]; makeWrapperArgs = [ "--prefix PATH : ${stdenv.lib.makeBinPath [ ghostscript jbig2enc pngquant qpdf tesseract4 unpaper ]}" ];