From 9d22d9c9b1835c7f755278338721c4dd764dc162 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Mon, 9 Nov 2020 20:29:02 +0100 Subject: [PATCH] added a task scheduler for recurring tasks --- .gitignore | 1 - Dockerfile | 8 +- Pipfile | 4 +- Pipfile.lock | 177 +++++++++++------- docker-compose.env.example | 4 - ...-compose.yml.example => docker-compose.yml | 12 +- paperless.conf.example | 10 + scripts/paperless-cron | 5 - scripts/supervisord.conf | 5 +- .../commands/document_create_classifier.py | 31 +-- .../management/commands/document_index.py | 15 +- .../management/commands/document_rerun_ocr.py | 60 ------ .../migrations/1001_auto_20201109_1636.py | 28 +++ src/documents/tasks.py | 57 ++++++ src/paperless/settings.py | 12 ++ 15 files changed, 240 insertions(+), 189 deletions(-) rename docker-compose.yml.example => docker-compose.yml (79%) delete mode 100644 scripts/paperless-cron delete mode 100644 src/documents/management/commands/document_rerun_ocr.py create mode 100644 src/documents/migrations/1001_auto_20201109_1636.py create mode 100644 src/documents/tasks.py diff --git a/.gitignore b/.gitignore index 871a7bd08..25c7c421a 100644 --- a/.gitignore +++ b/.gitignore @@ -65,7 +65,6 @@ target/ .virtualenv virtualenv /venv -docker-compose.yml docker-compose.env # Used for development diff --git a/Dockerfile b/Dockerfile index 05fd430aa..bb96305f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,6 @@ COPY Pipfile* ./ #Dependencies RUN apt-get update \ && DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \ - anacron \ build-essential \ curl \ ghostscript \ @@ -60,7 +59,6 @@ RUN apt-get update \ COPY scripts/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml COPY scripts/gunicorn.conf.py ./ COPY scripts/supervisord.conf /etc/supervisord.conf -COPY scripts/paperless-cron /etc/cron.daily/ COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh # copy app @@ -71,9 +69,7 @@ COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/document RUN addgroup --gid 1000 paperless \ && useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \ && chown -R paperless:paperless . \ - && chmod 755 /sbin/docker-entrypoint.sh \ - && chmod +x /etc/cron.daily/paperless-cron \ - && rm /etc/cron.daily/apt-compat /etc/cron.daily/dpkg + && chmod 755 /sbin/docker-entrypoint.sh WORKDIR /usr/src/paperless/src/ @@ -81,6 +77,6 @@ RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/consume", "/usr/src/paperless/export"] ENTRYPOINT ["/sbin/docker-entrypoint.sh"] -CMD ["python3", "manage.py", "--help"] +CMD ["supervisord", "-c", "/etc/supervisord.conf"] LABEL maintainer="Jonas Winkler " diff --git a/Pipfile b/Pipfile index beb252591..7cd663ac4 100644 --- a/Pipfile +++ b/Pipfile @@ -24,9 +24,11 @@ gunicorn = "*" whitenoise = "*" fuzzywuzzy = "*" python-Levenshtein = "*" -django-extensions = "" +django-extensions = "*" watchdog = "*" pathvalidate = "*" +django-q = "*" +redis = "*" [dev-packages] coveralls = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 40d92fa59..122750db1 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9b05b0a30fedd4192cd81df4fe96e7ae6e55facd557607cc1f9f66c173b4cdb1" + "sha256": "135aa8778c31854db426652dfa7abf813cdfab1b08bfc16c8cd82e627db7565e" }, "pipfile-spec": 6, "requires": {}, @@ -14,13 +14,28 @@ ] }, "default": { + "arrow": { + "hashes": [ + "sha256:e098abbd9af3665aea81bdd6c869e93af4feb078e98468dd351c383af187aac5", + "sha256:ff08d10cda1d36c68657d6ad20d74fbea493d980f8b2d45344e00d6ed2bf6ed4" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.17.0" + }, "asgiref": { "hashes": [ - "sha256:a5098bc870b80e7b872bff60bb363c7f2c2c89078759f6c47b53ff8c525a152e", - "sha256:cd88907ecaec59d78e4ac00ea665b03e571cb37e3a0e37b3702af1a9e86c365a" + "sha256:5ee950735509d04eb673bd7f7120f8fa1c9e2df495394992c73234d526907e17", + "sha256:7162a3cb30ab0609f1a4c95938fd73e8604f63bdba516a7f7d64b83ff09478f0" ], "markers": "python_version >= '3.5'", - "version": "==3.3.0" + "version": "==3.3.1" + }, + "blessed": { + "hashes": [ + "sha256:7d4914079a6e8e14fbe080dcaf14dee596a088057cdc598561080e3266123b48", + "sha256:81125aa5b84cb9dfc09ff451886f64b4b923b75c5eaf51fde9d1c48a135eb797" + ], + "version": "==1.17.11" }, "dateparser": { "hashes": [ @@ -52,7 +67,6 @@ "sha256:dc663652ac9460fd06580a973576820430c6d428720e874ae46b041fa63e0efa" ], "index": "pypi", - "markers": "python_version >= '3.5'", "version": "==3.0.9" }, "django-filter": { @@ -63,6 +77,22 @@ "index": "pypi", "version": "==2.4.0" }, + "django-picklefield": { + "hashes": [ + "sha256:15ccba592ca953b9edf9532e64640329cd47b136b7f8f10f2939caa5f9ce4287", + "sha256:3c702a54fde2d322fe5b2f39b8f78d9f655b8f77944ab26f703be6c0ed335a35" + ], + "markers": "python_version >= '3'", + "version": "==3.0.1" + }, + "django-q": { + "hashes": [ + "sha256:523d54dcf1b66152c1b658f914f00ed3b518a3432a9decd4898738ca8dbbe10f", + "sha256:7e5c5c021a15cff6807044a3aa48f5757789ccfef839d71c575f5512931a3e33" + ], + "index": "pypi", + "version": "==1.3.4" + }, "djangorestframework": { "hashes": [ "sha256:0209bafcb7b5010fdfec784034f059d512256424de2a0f084cb82b096d6dd6a7" @@ -206,41 +236,41 @@ }, "psycopg2-binary": { "hashes": [ - "sha256:cec7e622ebc545dbb4564e483dd20e4e404da17ae07e06f3e780b2dacd5cee66", - "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", - "sha256:7d92a09b788cbb1aec325af5fcba9fed7203897bbd9269d5691bb1e3bce29550", - "sha256:8cd0fb36c7412996859cb4606a35969dd01f4ea34d9812a141cd920c3b18be77", - "sha256:bd1be66dde2b82f80afb9459fc618216753f67109b859a361cf7def5c7968729", - "sha256:950bc22bb56ee6ff142a2cb9ee980b571dd0912b0334aa3fe0fe3788d860bea2", - "sha256:ac0c682111fbf404525dfc0f18a8b5f11be52657d4f96e9fcb75daf4f3984859", - "sha256:6a32f3a4cb2f6e1a0b15215f448e8ce2da192fd4ff35084d80d5e39da683e79b", - "sha256:ba28584e6bca48c59eecbf7efb1576ca214b47f05194646b081717fa628dfddf", "sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c", - "sha256:2dac98e85565d5688e8ab7bdea5446674a83a3945a8f416ad0110018d1501b94", - "sha256:1fabed9ea2acc4efe4671b92c669a213db744d2af8a9fc5d69a8e9bc14b7a9db", + "sha256:0e4dc3d5996760104746e6cfcdb519d9d2cd27c738296525d5867ea695774e67", "sha256:11b9c0ebce097180129e422379b824ae21c8f2a6596b159c7659e2e5a00e1aa0", - "sha256:7312e931b90fe14f925729cde58022f5d034241918a5c4f9797cac62f6b3a9dd", - "sha256:c2507d796fca339c8fb03216364cca68d87e037c1f774977c8fc377627d01c71", + "sha256:15978a1fbd225583dd8cdaf37e67ccc278b5abecb4caf6b2d6b8e2b948e953f6", + "sha256:1fabed9ea2acc4efe4671b92c669a213db744d2af8a9fc5d69a8e9bc14b7a9db", + "sha256:2dac98e85565d5688e8ab7bdea5446674a83a3945a8f416ad0110018d1501b94", "sha256:42ec1035841b389e8cc3692277a0bd81cdfe0b65d575a2c8862cec7a80e62e52", + "sha256:6422f2ff0919fd720195f64ffd8f924c1395d30f9a495f31e2392c2efafb5056", + "sha256:6a32f3a4cb2f6e1a0b15215f448e8ce2da192fd4ff35084d80d5e39da683e79b", + "sha256:7312e931b90fe14f925729cde58022f5d034241918a5c4f9797cac62f6b3a9dd", + "sha256:7d92a09b788cbb1aec325af5fcba9fed7203897bbd9269d5691bb1e3bce29550", + "sha256:833709a5c66ca52f1d21d41865a637223b368c0ee76ea54ca5bad6f2526c7679", + "sha256:89705f45ce07b2dfa806ee84439ec67c5d9a0ef20154e0e475e2b2ed392a5b83", + "sha256:8cd0fb36c7412996859cb4606a35969dd01f4ea34d9812a141cd920c3b18be77", + "sha256:950bc22bb56ee6ff142a2cb9ee980b571dd0912b0334aa3fe0fe3788d860bea2", "sha256:a0c50db33c32594305b0ef9abc0cb7db13de7621d2cadf8392a1d9b3c437ef77", + "sha256:a0eb43a07386c3f1f1ebb4dc7aafb13f67188eab896e7397aa1ee95a9c884eb2", + "sha256:aaa4213c862f0ef00022751161df35804127b78adf4a2755b9f991a507e425fd", + "sha256:ac0c682111fbf404525dfc0f18a8b5f11be52657d4f96e9fcb75daf4f3984859", "sha256:ad20d2eb875aaa1ea6d0f2916949f5c08a19c74d05b16ce6ebf6d24f2c9f75d1", "sha256:b4afc542c0ac0db720cf516dd20c0846f71c248d2b3d21013aa0d4ef9c71ca25", + "sha256:b8a3715b3c4e604bcc94c90a825cd7f5635417453b253499664f784fc4da0152", + "sha256:ba28584e6bca48c59eecbf7efb1576ca214b47f05194646b081717fa628dfddf", "sha256:ba381aec3a5dc29634f20692349d73f2d21f17653bda1decf0b52b11d694541f", - "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5", - "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", - "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", + "sha256:bd1be66dde2b82f80afb9459fc618216753f67109b859a361cf7def5c7968729", + "sha256:c2507d796fca339c8fb03216364cca68d87e037c1f774977c8fc377627d01c71", + "sha256:cec7e622ebc545dbb4564e483dd20e4e404da17ae07e06f3e780b2dacd5cee66", "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", - "sha256:0e4dc3d5996760104746e6cfcdb519d9d2cd27c738296525d5867ea695774e67", - "sha256:a0eb43a07386c3f1f1ebb4dc7aafb13f67188eab896e7397aa1ee95a9c884eb2", - "sha256:b8a3715b3c4e604bcc94c90a825cd7f5635417453b253499664f784fc4da0152", - "sha256:aaa4213c862f0ef00022751161df35804127b78adf4a2755b9f991a507e425fd", - "sha256:15978a1fbd225583dd8cdaf37e67ccc278b5abecb4caf6b2d6b8e2b948e953f6", - "sha256:833709a5c66ca52f1d21d41865a637223b368c0ee76ea54ca5bad6f2526c7679", - "sha256:89705f45ce07b2dfa806ee84439ec67c5d9a0ef20154e0e475e2b2ed392a5b83", + "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", + "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", - "sha256:6422f2ff0919fd720195f64ffd8f924c1395d30f9a495f31e2392c2efafb5056" + "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", + "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" ], "index": "pypi", "version": "==2.8.6" @@ -290,51 +320,59 @@ ], "version": "==2020.4" }, + "redis": { + "hashes": [ + "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2", + "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24" + ], + "index": "pypi", + "version": "==3.5.3" + }, "regex": { "hashes": [ - "sha256:dd3e6547ecf842a29cf25123fbf8d2461c53c8d37aa20d87ecee130c89b7079b", - "sha256:b88fa3b8a3469f22b4f13d045d9bd3eda797aa4e406fde0a2644bc92bbdd4bdd", - "sha256:297116e79074ec2a2f885d22db00ce6e88b15f75162c5e8b38f66ea734e73c64", - "sha256:2564def9ce0710d510b1fc7e5178ce2d20f75571f788b5197b3c8134c366f50c", + "sha256:03855ee22980c3e4863dc84c42d6d2901133362db5daf4c36b710dd895d78f0a", "sha256:06b52815d4ad38d6524666e0d50fe9173533c9cc145a5779b89733284e6f688f", - "sha256:b45bab9f224de276b7bc916f6306b86283f6aa8afe7ed4133423efb42015a898", - "sha256:f1fce1e4929157b2afeb4bb7069204d4370bab9f4fc03ca1fbec8bd601f8c87d", - "sha256:654c1635f2313d0843028487db2191530bca45af61ca85d0b16555c399625b0e", - "sha256:ea37320877d56a7f0a1e6a625d892cf963aa7f570013499f5b8d5ab8402b5625", - "sha256:52e83a5f28acd621ba8e71c2b816f6541af7144b69cc5859d17da76c436a5427", - "sha256:b8a686a6c98872007aa41fdbb2e86dc03b287d951ff4a7f1da77fb7f14113e4d", - "sha256:c2c6c56ee97485a127555c9595c069201b5161de9d05495fbe2132b5ac104786", - "sha256:832339223b9ce56b7b15168e691ae654d345ac1635eeb367ade9ecfe0e66bee0", - "sha256:c3466a84fce42c2016113101018a9981804097bacbab029c2d5b4fcb224b89de", - "sha256:3dfca201fa6b326239e1bccb00b915e058707028809b8ecc0cf6819ad233a740", + "sha256:11116d424734fe356d8777f89d625f0df783251ada95d6261b4c36ad27a394bb", + "sha256:119e0355dbdd4cf593b17f2fc5dbd4aec2b8899d0057e4957ba92f941f704bf5", "sha256:127a9e0c0d91af572fbb9e56d00a504dbd4c65e574ddda3d45b55722462210de", "sha256:1ec66700a10e3c75f1f92cbde36cca0d3aaee4c73dfa26699495a3a30b09093c", - "sha256:bf4f896c42c63d1f22039ad57de2644c72587756c0cfb3cc3b7530cfe228277f", - "sha256:bd904c0dec29bbd0769887a816657491721d5f545c29e30fd9d7a1a275dc80ab", - "sha256:03855ee22980c3e4863dc84c42d6d2901133362db5daf4c36b710dd895d78f0a", - "sha256:9b6305295b6591e45f069d3553c54d50cc47629eb5c218aac99e0f7fafbf90a1", - "sha256:c32c91a0f1ac779cbd73e62430de3d3502bbc45ffe5bb6c376015acfa848144b", - "sha256:4afa350f162551cf402bfa3cd8302165c8e03e689c897d185f16a167328cc6dd", "sha256:227a8d2e5282c2b8346e7f68aa759e0331a0b4a890b55a5cfbb28bd0261b84c0", - "sha256:3a5f08039eee9ea195a89e180c5762bfb55258bfb9abb61a20d3abee3b37fd12", - "sha256:c454ad88e56e80e44f824ef8366bb7e4c3def12999151fd5c0ea76a18fe9aa3e", - "sha256:11116d424734fe356d8777f89d625f0df783251ada95d6261b4c36ad27a394bb", + "sha256:2564def9ce0710d510b1fc7e5178ce2d20f75571f788b5197b3c8134c366f50c", + "sha256:297116e79074ec2a2f885d22db00ce6e88b15f75162c5e8b38f66ea734e73c64", "sha256:2dc522e25e57e88b4980d2bdd334825dbf6fa55f28a922fc3bfa60cc09e5ef53", - "sha256:625116aca6c4b57c56ea3d70369cacc4d62fead4930f8329d242e4fe7a58ce4b", + "sha256:3a5f08039eee9ea195a89e180c5762bfb55258bfb9abb61a20d3abee3b37fd12", + "sha256:3dfca201fa6b326239e1bccb00b915e058707028809b8ecc0cf6819ad233a740", "sha256:49461446b783945597c4076aea3f49aee4b4ce922bd241e4fcf62a3e7c61794c", - "sha256:de7fd57765398d141949946c84f3590a68cf5887dac3fc52388df0639b01eda4", + "sha256:4afa350f162551cf402bfa3cd8302165c8e03e689c897d185f16a167328cc6dd", "sha256:4b5a9bcb56cc146c3932c648603b24514447eafa6ce9295234767bf92f69b504", - "sha256:cb905f3d2e290a8b8f1579d3984f2cfa7c3a29cc7cba608540ceeed18513f520", - "sha256:cfcf28ed4ce9ced47b9b9670a4f0d3d3c0e4d4779ad4dadb1ad468b097f808aa", - "sha256:c8a2b7ccff330ae4c460aff36626f911f918555660cc28163417cb84ffb25789", - "sha256:c13d311a4c4a8d671f5860317eb5f09591fbe8259676b86a85769423b544451e", - "sha256:aacc8623ffe7999a97935eeabbd24b1ae701d08ea8f874a6ff050e93c3e658cf", + "sha256:52e83a5f28acd621ba8e71c2b816f6541af7144b69cc5859d17da76c436a5427", + "sha256:625116aca6c4b57c56ea3d70369cacc4d62fead4930f8329d242e4fe7a58ce4b", + "sha256:654c1635f2313d0843028487db2191530bca45af61ca85d0b16555c399625b0e", + "sha256:8092a5a06ad9a7a247f2a76ace121183dc4e1a84c259cf9c2ce3bbb69fac3582", + "sha256:832339223b9ce56b7b15168e691ae654d345ac1635eeb367ade9ecfe0e66bee0", "sha256:8ca9dca965bd86ea3631b975d63b0693566d3cc347e55786d5514988b6f5b84c", - "sha256:a62162be05edf64f819925ea88d09d18b09bebf20971b363ce0c24e8b4aa14c0", - "sha256:119e0355dbdd4cf593b17f2fc5dbd4aec2b8899d0057e4957ba92f941f704bf5", "sha256:96f99219dddb33e235a37283306834700b63170d7bb2a1ee17e41c6d589c8eb9", - "sha256:f43109822df2d3faac7aad79613f5f02e4eab0fc8ad7932d2e70e2a83bd49c26", - "sha256:8092a5a06ad9a7a247f2a76ace121183dc4e1a84c259cf9c2ce3bbb69fac3582" + "sha256:9b6305295b6591e45f069d3553c54d50cc47629eb5c218aac99e0f7fafbf90a1", + "sha256:a62162be05edf64f819925ea88d09d18b09bebf20971b363ce0c24e8b4aa14c0", + "sha256:aacc8623ffe7999a97935eeabbd24b1ae701d08ea8f874a6ff050e93c3e658cf", + "sha256:b45bab9f224de276b7bc916f6306b86283f6aa8afe7ed4133423efb42015a898", + "sha256:b88fa3b8a3469f22b4f13d045d9bd3eda797aa4e406fde0a2644bc92bbdd4bdd", + "sha256:b8a686a6c98872007aa41fdbb2e86dc03b287d951ff4a7f1da77fb7f14113e4d", + "sha256:bd904c0dec29bbd0769887a816657491721d5f545c29e30fd9d7a1a275dc80ab", + "sha256:bf4f896c42c63d1f22039ad57de2644c72587756c0cfb3cc3b7530cfe228277f", + "sha256:c13d311a4c4a8d671f5860317eb5f09591fbe8259676b86a85769423b544451e", + "sha256:c2c6c56ee97485a127555c9595c069201b5161de9d05495fbe2132b5ac104786", + "sha256:c32c91a0f1ac779cbd73e62430de3d3502bbc45ffe5bb6c376015acfa848144b", + "sha256:c3466a84fce42c2016113101018a9981804097bacbab029c2d5b4fcb224b89de", + "sha256:c454ad88e56e80e44f824ef8366bb7e4c3def12999151fd5c0ea76a18fe9aa3e", + "sha256:c8a2b7ccff330ae4c460aff36626f911f918555660cc28163417cb84ffb25789", + "sha256:cb905f3d2e290a8b8f1579d3984f2cfa7c3a29cc7cba608540ceeed18513f520", + "sha256:cfcf28ed4ce9ced47b9b9670a4f0d3d3c0e4d4779ad4dadb1ad468b097f808aa", + "sha256:dd3e6547ecf842a29cf25123fbf8d2461c53c8d37aa20d87ecee130c89b7079b", + "sha256:de7fd57765398d141949946c84f3590a68cf5887dac3fc52388df0639b01eda4", + "sha256:ea37320877d56a7f0a1e6a625d892cf963aa7f570013499f5b8d5ab8402b5625", + "sha256:f1fce1e4929157b2afeb4bb7069204d4370bab9f4fc03ca1fbec8bd601f8c87d", + "sha256:f43109822df2d3faac7aad79613f5f02e4eab0fc8ad7932d2e70e2a83bd49c26" ], "version": "==2020.10.28" }, @@ -429,6 +467,13 @@ "index": "pypi", "version": "==0.10.3" }, + "wcwidth": { + "hashes": [ + "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784", + "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83" + ], + "version": "==0.2.5" + }, "whitenoise": { "hashes": [ "sha256:05ce0be39ad85740a78750c86a93485c40f08ad8c62a6006de0233765996e5c7", @@ -488,10 +533,10 @@ }, "certifi": { "hashes": [ - "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", - "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" + "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", + "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" ], - "version": "==2020.6.20" + "version": "==2020.11.8" }, "chardet": { "hashes": [ diff --git a/docker-compose.env.example b/docker-compose.env.example index cc2a1d3ec..fb529898a 100644 --- a/docker-compose.env.example +++ b/docker-compose.env.example @@ -1,7 +1,3 @@ -# Database settings for paperless -# If you want to use sqlite instead, remove this setting. -PAPERLESS_DBHOST="db" - # The UID and GID of the user used to run paperless in the container. Set this # to your UID and GID on the host so that you have write access to the # consumption directory. diff --git a/docker-compose.yml.example b/docker-compose.yml similarity index 79% rename from docker-compose.yml.example rename to docker-compose.yml index 1130e26a3..f9b4d6c33 100644 --- a/docker-compose.yml.example +++ b/docker-compose.yml @@ -1,5 +1,9 @@ version: "3.4" services: + broker: + image: redis:latest + #restart: always + db: image: postgres:13 #restart: always @@ -11,13 +15,12 @@ services: POSTGRES_PASSWORD: paperless webserver: - build: . - image: paperless-ng + image: paperless-ng:latest #restart: always depends_on: - db ports: - - "8000:8000" + - 8000:8000 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000"] interval: 30s @@ -29,6 +32,9 @@ services: - ./export:/usr/src/paperless/export - ./consume:/usr/src/paperless/consume env_file: docker-compose.env + environment: + PAPERLESS_REDIS: redis://broker:6379 + PAPERLESS_DBHOST: db command: ["supervisord", "-c", "/etc/supervisord.conf"] diff --git a/paperless.conf.example b/paperless.conf.example index 9c0b57250..48df40ab2 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -3,6 +3,16 @@ # As this file contains passwords it should only be readable by the user # running paperless. +############################################################################### +#### Message Broker #### +############################################################################### + +# This is required for processing scheduled tasks such as email fetching, index +# optimization and for training the automatic document matcher. +# Defaults to localhost:6379. +#PAPERLESS_REDIS="redis://localhost:6379" + + ############################################################################### #### Database Settings #### ############################################################################### diff --git a/scripts/paperless-cron b/scripts/paperless-cron deleted file mode 100644 index 238857227..000000000 --- a/scripts/paperless-cron +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -cd /usr/src/paperless/src - -sudo -HEu paperless python3 manage.py document_create_classifier diff --git a/scripts/supervisord.conf b/scripts/supervisord.conf index d3ff288de..0ac416d3e 100644 --- a/scripts/supervisord.conf +++ b/scripts/supervisord.conf @@ -24,8 +24,9 @@ stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 -[program:anacron] -command=anacron -d +[program:scheduler] +command=python3 manage.py qcluster +user=paperless stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 diff --git a/src/documents/management/commands/document_create_classifier.py b/src/documents/management/commands/document_create_classifier.py index 85cb3b446..839044700 100755 --- a/src/documents/management/commands/document_create_classifier.py +++ b/src/documents/management/commands/document_create_classifier.py @@ -1,10 +1,6 @@ -import logging - from django.core.management.base import BaseCommand -from documents.classifier import DocumentClassifier, \ - IncompatibleClassifierVersionError -from paperless import settings from ...mixins import Renderable +from ...tasks import train_classifier class Command(Renderable, BaseCommand): @@ -18,27 +14,4 @@ def __init__(self, *args, **kwargs): BaseCommand.__init__(self, *args, **kwargs) def handle(self, *args, **options): - classifier = DocumentClassifier() - - try: - # load the classifier, since we might not have to train it again. - classifier.reload() - except (FileNotFoundError, IncompatibleClassifierVersionError): - # This is what we're going to fix here. - pass - - try: - if classifier.train(): - logging.getLogger(__name__).info( - "Saving updated classifier model to {}...".format(settings.MODEL_FILE) - ) - classifier.save_classifier() - else: - logging.getLogger(__name__).debug( - "Training data unchanged." - ) - - except Exception as e: - logging.getLogger(__name__).error( - "Classifier error: " + str(e) - ) + train_classifier() diff --git a/src/documents/management/commands/document_index.py b/src/documents/management/commands/document_index.py index 5a136d6a5..7dfdbaa42 100644 --- a/src/documents/management/commands/document_index.py +++ b/src/documents/management/commands/document_index.py @@ -1,9 +1,7 @@ from django.core.management import BaseCommand -from whoosh.writing import AsyncWriter -import documents.index as index from documents.mixins import Renderable -from documents.models import Document +from documents.tasks import index_reindex, index_optimize class Command(Renderable, BaseCommand): @@ -22,13 +20,6 @@ def handle(self, *args, **options): self.verbosity = options["verbosity"] if options['command'] == 'reindex': - documents = Document.objects.all() - - ix = index.open_index(recreate=True) - - with AsyncWriter(ix) as writer: - for document in documents: - index.update_document(writer, document) - + index_reindex() elif options['command'] == 'optimize': - index.open_index().optimize() + index_optimize() diff --git a/src/documents/management/commands/document_rerun_ocr.py b/src/documents/management/commands/document_rerun_ocr.py deleted file mode 100644 index 794357420..000000000 --- a/src/documents/management/commands/document_rerun_ocr.py +++ /dev/null @@ -1,60 +0,0 @@ -import argparse -import threading -from multiprocessing import Pool -from multiprocessing.pool import ThreadPool - -from django.core.management.base import BaseCommand - -from documents.consumer import Consumer -from documents.models import Log, Document -from documents.parsers import get_parser_class - - -def process_document(doc): - parser_class = get_parser_class(doc.file_name) - if not parser_class: - print("no parser available") - else: - print("Parser: {}".format(parser_class.__name__)) - parser = parser_class(doc.source_path, None) - try: - text = parser.get_text() - doc.content = text - doc.save() - finally: - parser.cleanup() - - -def document_index(value): - ivalue = int(value) - if not (1 <= ivalue <= Document.objects.count()): - raise argparse.ArgumentTypeError( - "{} is not a valid document index (out of range)".format(value)) - - return ivalue - - -class Command(BaseCommand): - - help = "Performs OCR on all documents again!" - - - def add_arguments(self, parser): - parser.add_argument( - "-s", "--start_index", - default=None, - type=document_index - ) - - def handle(self, *args, **options): - - docs = Document.objects.all().order_by("added") - - indices = range(options['start_index']-1, len(docs)) if options['start_index'] else range(len(docs)) - - for i in indices: - doc = docs[i] - print("==================================") - print("{} out of {}: {}".format(i+1, len(docs), doc.file_name)) - print("==================================") - process_document(doc) diff --git a/src/documents/migrations/1001_auto_20201109_1636.py b/src/documents/migrations/1001_auto_20201109_1636.py new file mode 100644 index 000000000..8d6a0f584 --- /dev/null +++ b/src/documents/migrations/1001_auto_20201109_1636.py @@ -0,0 +1,28 @@ +# Generated by Django 3.1.3 on 2020-11-09 16:36 + +from django.db import migrations +from django.db.migrations import RunPython +from django_q.models import Schedule +from django_q.tasks import schedule + + +def add_schedules(apps, schema_editor): + schedule('documents.tasks.train_classifier', name="Train the classifier", schedule_type=Schedule.HOURLY) + schedule('documents.tasks.index_optimize', name="Optimize the index", schedule_type=Schedule.DAILY) + schedule('documents.tasks.consume_mail', name="Check E-Mail", schedule_type=Schedule.MINUTES, minutes=10) + + +def remove_schedules(apps, schema_editor): + Schedule.objects.all().delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '1000_update_paperless_all'), + ('django_q', '0013_task_attempt_count'), + ] + + operations = [ + RunPython(add_schedules, remove_schedules) + ] diff --git a/src/documents/tasks.py b/src/documents/tasks.py new file mode 100644 index 000000000..aaf466bd2 --- /dev/null +++ b/src/documents/tasks.py @@ -0,0 +1,57 @@ +import logging + +from django.conf import settings +from django_q.tasks import async_task, result +from whoosh.writing import AsyncWriter + +from documents import index +from documents.classifier import DocumentClassifier, \ + IncompatibleClassifierVersionError +from documents.mail import MailFetcher +from documents.models import Document + + +def consume_mail(): + MailFetcher().pull() + + +def index_optimize(): + index.open_index().optimize() + + +def index_reindex(): + documents = Document.objects.all() + + ix = index.open_index(recreate=True) + + with AsyncWriter(ix) as writer: + for document in documents: + index.update_document(writer, document) + + +def train_classifier(): + classifier = DocumentClassifier() + + try: + # load the classifier, since we might not have to train it again. + classifier.reload() + except (FileNotFoundError, IncompatibleClassifierVersionError): + # This is what we're going to fix here. + pass + + try: + if classifier.train(): + logging.getLogger(__name__).info( + "Saving updated classifier model to {}...".format( + settings.MODEL_FILE) + ) + classifier.save_classifier() + else: + logging.getLogger(__name__).debug( + "Training data unchanged." + ) + + except Exception as e: + logging.getLogger(__name__).error( + "Classifier error: " + str(e) + ) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 65da01218..2c96350dc 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -71,6 +71,8 @@ def __get_boolean(key, default="NO"): "rest_framework", "django_filters", + "django_q", + ] REST_FRAMEWORK = { @@ -242,6 +244,16 @@ def __get_boolean(key, default="NO"): }, } +############################################################################### +# Task queue # +############################################################################### + +Q_CLUSTER = { + 'name': 'paperless', + 'catch_up': False, + 'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379") +} + ############################################################################### # Paperless Specific Settings # ###############################################################################