Skip to content
This repository has been archived by the owner on Feb 16, 2023. It is now read-only.

Commit

Permalink
updated consumer: now using watchdog
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonas Winkler committed Nov 1, 2020
1 parent 8f4ddb3 commit 9f29dc2
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 226 deletions.
19 changes: 9 additions & 10 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,28 @@ verify_ssl = true
name = "pypi"

[packages]
django = "*"
django = "~=3.1"
pillow = "*"
dateparser = "*"
dateparser = "~=0.7"
django-cors-headers = "*"
djangorestframework = "*"
inotify-simple = "*"
djangorestframework = "~=3.12"
python-gnupg = "*"
python-dotenv = "*"
filemagic = "*"
pyocr = "*"
pyocr = "~=0.7"
langdetect = "*"
pdftotext = "*"
django-filter = "*"
django-filter = "~=2.4"
python-dateutil = "*"
psycopg2-binary = "*"
scikit-learn="*"
whoosh="*"
scikit-learn="~=0.23"
whoosh="~=2.7"
gunicorn = "*"
whitenoise = "*"
fuzzywuzzy = "*"
python-Levenshtein = "*"

django-extensions = "*"
django-extensions = ""
watchdog = "*"

[dev-packages]
coveralls = "*"
Expand Down
162 changes: 89 additions & 73 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 0 additions & 44 deletions src/documents/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
import logging
import os
import re
import time
import uuid

from operator import itemgetter
from django.conf import settings
from django.utils import timezone
from paperless.db import GnuPG
Expand Down Expand Up @@ -36,17 +34,12 @@ class Consumer:
5. Delete the document and image(s)
"""

# Files are considered ready for consumption if they have been unmodified
# for this duration
FILES_MIN_UNMODIFIED_DURATION = 0.5

def __init__(self, consume=settings.CONSUMPTION_DIR,
scratch=settings.SCRATCH_DIR):

self.logger = logging.getLogger(__name__)
self.logging_group = None

self._ignore = []
self.consume = consume
self.scratch = scratch

Expand Down Expand Up @@ -83,43 +76,6 @@ def log(self, level, message):
"group": self.logging_group
})

def consume_new_files(self):
"""
Find non-ignored files in consumption dir and consume them if they have
been unmodified for FILES_MIN_UNMODIFIED_DURATION.
"""
ignored_files = []
files = []
for entry in os.scandir(self.consume):
if entry.is_file():
file = (entry.path, entry.stat().st_mtime)
if file in self._ignore:
ignored_files.append(file)
else:
files.append(file)
else:
self.logger.warning(
"Skipping %s as it is not a file",
entry.path
)

if not files:
return

# Set _ignore to only include files that still exist.
# This keeps it from growing indefinitely.
self._ignore[:] = ignored_files

files_old_to_new = sorted(files, key=itemgetter(1))

time.sleep(self.FILES_MIN_UNMODIFIED_DURATION)

for file, mtime in files_old_to_new:
if mtime == os.path.getmtime(file):
# File has not been modified and can be consumed
if not self.try_consume_file(file):
self._ignore.append((file, mtime))

@transaction.atomic
def try_consume_file(self, file):
"""
Expand Down
Loading

0 comments on commit 9f29dc2

Please sign in to comment.