From b350ec48b7f5ef9d1c250961e362aef0a5a4de5c Mon Sep 17 00:00:00 2001 From: David Martin Date: Mon, 8 Oct 2018 19:37:05 +1100 Subject: [PATCH 1/2] Mention FORGIVING_OCR config option when language detection fails. It is not obvious that the PAPERLESS_FORGIVING_OCR allows to let document consumption happen even if no language can be detected. Mentioning it in the actual error message in the log seems like the best way to make it clear. --- src/paperless_tesseract/parsers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index dc5dbd637..ffa2727e5 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -153,7 +153,10 @@ def _get_ocr(self, imgs): ) raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) return raw_text - raise OCRError("Language detection failed") + error_msg = ("Language detection failed. Set " + "PAPERLESS_FORGIVING_OCR in config file to continue " + "anyway.") + raise OCRError(error_msg) if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE: raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) From 818780a1918543100bf4b25c3d558fb6f28e8956 Mon Sep 17 00:00:00 2001 From: David Martin Date: Mon, 8 Oct 2018 19:38:38 +1100 Subject: [PATCH 2/2] Add PAPERLESS_FORGIVING_OCR option to example config. It helps having it in the example config as that makes it more clear that it exists. --- paperless.conf.example | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/paperless.conf.example b/paperless.conf.example index 3604505cb..11e6d905b 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -188,6 +188,11 @@ PAPERLESS_DEBUG="false" #PAPERLESS_CONSUMER_LOOP_TIME=10 +# By default Paperless stops consuming a document if no language can be detected. +# Set to true to consume documents even if the language detection fails. +#PAPERLESS_FORGIVING_OCR="false" + + ############################################################################### #### Interface #### ###############################################################################