diff --git a/paperless.conf.example b/paperless.conf.example index 3604505cb..11e6d905b 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -188,6 +188,11 @@ PAPERLESS_DEBUG="false" #PAPERLESS_CONSUMER_LOOP_TIME=10 +# By default Paperless stops consuming a document if no language can be detected. +# Set to true to consume documents even if the language detection fails. +#PAPERLESS_FORGIVING_OCR="false" + + ############################################################################### #### Interface #### ############################################################################### diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 0139738be..fb8c1c3ec 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -153,7 +153,10 @@ def _get_ocr(self, imgs): ) raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) return raw_text - raise OCRError("Language detection failed") + error_msg = ("Language detection failed. Set " + "PAPERLESS_FORGIVING_OCR in config file to continue " + "anyway.") + raise OCRError(error_msg) if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE: raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)