From fb6f2e07c97d8b6ad455a269d24f216ac80f5cf9 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Thu, 13 Sep 2018 15:19:25 +0200 Subject: [PATCH 01/19] Added a bunch of new features: - Debug mode is now configurable in the configuration file. This way, we don't have to edit versioned files to disable it on production systems. - Recent correspondents filter (enable in configuration file) - Document actions: Edit tags and correspondents on multiple documents at once - Replaced month list filter with date drilldown - Sortable document count columns on Tag and Correspondent admin - Last correspondence column on Correspondent admin - Save and edit next functionality for document editing --- .gitignore | 2 + paperless.conf.example | 10 ++ requirements.txt | 3 + src/documents/actions.py | 108 +++++++++++++ src/documents/admin.py | 142 +++++++++++++----- .../admin/documents/document/change_form.html | 18 ++- .../documents/document/select_object.html | 46 ++++++ src/paperless/settings.py | 14 +- 8 files changed, 301 insertions(+), 42 deletions(-) mode change 100644 => 100755 requirements.txt create mode 100644 src/documents/actions.py create mode 100644 src/documents/templates/admin/documents/document/select_object.html diff --git a/.gitignore b/.gitignore index a16f958a3..439d9df4b 100644 --- a/.gitignore +++ b/.gitignore @@ -81,3 +81,5 @@ docker-compose.env scripts/import-for-development scripts/nuke +# Static files collected by the collectstatic command +static/ diff --git a/paperless.conf.example b/paperless.conf.example index 15498a26a..05cf81724 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -59,6 +59,11 @@ PAPERLESS_EMAIL_SECRET="" #### Security #### ############################################################################### +# Controls whether django's debug mode is enabled. Disable this on production +# systems. Debug mode is enabled by default. +PAPERLESS_DEBUG="false" + + # Paperless can be instructed to attempt to encrypt your PDF files with GPG # using the PAPERLESS_PASSPHRASE specified below. If however you're not # concerned about encrypting these files (for example if you have disk @@ -203,3 +208,8 @@ PAPERLESS_EMAIL_SECRET="" # positive integer, but if you don't define one in paperless.conf, a default of # 100 will be used. #PAPERLESS_LIST_PER_PAGE=100 + + +# The number of years for which a correspondent will be included in the recent +# correspondents filter. +#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 index 0476efef1..89c7e296f --- a/requirements.txt +++ b/requirements.txt @@ -24,6 +24,7 @@ idna==2.7 inotify-simple==1.1.8 langdetect==1.0.7 more-itertools==4.3.0 +numpy==1.15.1 pdftotext==2.1.0 pillow==5.2.0 pluggy==0.7.1; python_version != '3.1.*' @@ -45,6 +46,8 @@ pytz==2018.5 regex==2018.8.29 requests==2.19.1 six==1.11.0 +scikit-learn==0.19.2 +scipy==1.1.0 termcolor==1.1.0 text-unidecode==1.2 tzlocal==1.5.1 diff --git a/src/documents/actions.py b/src/documents/actions.py new file mode 100644 index 000000000..3db5cd314 --- /dev/null +++ b/src/documents/actions.py @@ -0,0 +1,108 @@ +from django.contrib import messages +from django.contrib.admin import helpers +from django.contrib.admin.utils import model_ngettext +from django.core.exceptions import PermissionDenied +from django.template.response import TemplateResponse + +from documents.models import Tag, Correspondent + + +def select_action(modeladmin, request, queryset, title, action, modelclass, success_message="", document_action=None, queryset_action=None): + opts = modeladmin.model._meta + app_label = opts.app_label + + if not modeladmin.has_change_permission(request): + raise PermissionDenied + + if request.POST.get('post'): + n = queryset.count() + selected_object = modelclass.objects.get(id=request.POST.get('obj_id')) + if n: + for document in queryset: + if document_action: + document_action(document, selected_object) + document_display = str(document) + modeladmin.log_change(request, document, document_display) + if queryset_action: + queryset_action(queryset, selected_object) + + modeladmin.message_user(request, success_message % { + "selected_object": selected_object.name, "count": n, "items": model_ngettext(modeladmin.opts, n) + }, messages.SUCCESS) + + # Return None to display the change list page again. + return None + + context = dict( + modeladmin.admin_site.each_context(request), + title=title, + queryset=queryset, + opts=opts, + action_checkbox_name=helpers.ACTION_CHECKBOX_NAME, + media=modeladmin.media, + action=action, + objects=modelclass.objects.all(), + itemname=model_ngettext(modelclass, 1) + ) + + request.current_app = modeladmin.admin_site.name + + return TemplateResponse(request, "admin/%s/%s/select_object.html" % (app_label, opts.model_name), context) + + +def simple_action(modeladmin, request, queryset, success_message="", document_action=None, queryset_action=None): + if not modeladmin.has_change_permission(request): + raise PermissionDenied + + n = queryset.count() + if n: + for document in queryset: + if document_action: + document_action(document) + document_display = str(document) + modeladmin.log_change(request, document, document_display) + if queryset_action: + queryset_action(queryset) + modeladmin.message_user(request, success_message % { + "count": n, "items": model_ngettext(modeladmin.opts, n) + }, messages.SUCCESS) + + # Return None to display the change list page again. + return None + + +def add_tag_to_selected(modeladmin, request, queryset): + return select_action(modeladmin=modeladmin, request=request, queryset=queryset, + title="Add tag to multiple documents", + action="add_tag_to_selected", + modelclass=Tag, + success_message="Successfully added tag %(selected_object)s to %(count)d %(items)s.", + document_action=lambda doc, tag: doc.tags.add(tag)) +add_tag_to_selected.short_description = "Add tag to selected documents" + + +def remove_tag_from_selected(modeladmin, request, queryset): + return select_action(modeladmin=modeladmin, request=request, queryset=queryset, + title="Remove tag from multiple documents", + action="remove_tag_from_selected", + modelclass=Tag, + success_message="Successfully removed tag %(selected_object)s from %(count)d %(items)s.", + document_action=lambda doc, tag: doc.tags.remove(tag)) +remove_tag_from_selected.short_description = "Remove tag from selected documents" + + +def set_correspondent_on_selected(modeladmin, request, queryset): + return select_action(modeladmin=modeladmin, request=request, queryset=queryset, + title="Set correspondent on multiple documents", + action="set_correspondent_on_selected", + modelclass=Correspondent, + success_message="Successfully set correspondent %(selected_object)s on %(count)d %(items)s.", + queryset_action=lambda qs, correspondent: qs.update(correspondent=correspondent)) +set_correspondent_on_selected.short_description = "Set correspondent on selected documents" + + +def remove_correspondent_from_selected(modeladmin, request, queryset): + return simple_action(modeladmin=modeladmin, request=request, queryset=queryset, + success_message="Successfully removed correspondent from %(count)d %(items)s.", + queryset_action=lambda qs: qs.update(correspondent=None)) +remove_correspondent_from_selected.short_description = "Remove correspondent from selected documents" diff --git a/src/documents/admin.py b/src/documents/admin.py index 36154b6ba..d545c1c02 100644 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -1,44 +1,25 @@ -from datetime import datetime +from datetime import datetime, timedelta from django.conf import settings -from django.contrib import admin +from django.contrib import admin, messages +from django.contrib.admin.templatetags.admin_urls import add_preserved_filters from django.contrib.auth.models import User, Group +from django.http import HttpResponseRedirect try: from django.core.urlresolvers import reverse except ImportError: from django.urls import reverse from django.templatetags.static import static -from django.utils.safestring import mark_safe from django.utils.html import format_html, format_html_join +from django.utils.http import urlquote +from django.utils.safestring import mark_safe +from django.db import models +from documents.actions import add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, \ + remove_correspondent_from_selected from .models import Correspondent, Tag, Document, Log -class MonthListFilter(admin.SimpleListFilter): - - title = "Month" - - # Parameter for the filter that will be used in the URL query. - parameter_name = "month" - - def lookups(self, request, model_admin): - r = [] - for document in Document.objects.all(): - r.append(( - document.created.strftime("%Y-%m"), - document.created.strftime("%B %Y") - )) - return sorted(set(r), key=lambda x: x[0], reverse=True) - - def queryset(self, request, queryset): - - if not self.value(): - return None - - year, month = self.value().split("-") - return queryset.filter(created__year=year, created__month=month) - - class FinancialYearFilter(admin.SimpleListFilter): title = "Financial Year" @@ -104,18 +85,43 @@ def queryset(self, request, queryset): created__lte=self._fy_end(end)) +class RecentCorrespondentFilter(admin.RelatedFieldListFilter): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.title = "correspondent (recent)" + + def field_choices(self, field, request, model_admin): + lookups = [] + if settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS and settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS > 0: + date_limit = datetime.now() - timedelta(days=365*settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS) + for c in Correspondent.objects.filter(documents__created__gte=date_limit).distinct(): + lookups.append((c.id, c.name)) + return lookups + + class CommonAdmin(admin.ModelAdmin): list_per_page = settings.PAPERLESS_LIST_PER_PAGE class CorrespondentAdmin(CommonAdmin): - list_display = ("name", "match", "matching_algorithm", "document_count") + list_display = ("name", "match", "matching_algorithm", "document_count", "last_correspondence") list_filter = ("matching_algorithm",) list_editable = ("match", "matching_algorithm") + def get_queryset(self, request): + qs = super(CorrespondentAdmin, self).get_queryset(request) + qs = qs.annotate(document_count=models.Count("documents"), last_correspondence=models.Max("documents__created")) + return qs + def document_count(self, obj): - return obj.documents.count() + return obj.document_count + document_count.admin_order_field = "document_count" + + def last_correspondence(self, obj): + return obj.last_correspondence + last_correspondence.admin_order_field = "last_correspondence" class TagAdmin(CommonAdmin): @@ -125,8 +131,14 @@ class TagAdmin(CommonAdmin): list_filter = ("colour", "matching_algorithm") list_editable = ("colour", "match", "matching_algorithm") + def get_queryset(self, request): + qs = super(TagAdmin, self).get_queryset(request) + qs = qs.annotate(document_count=models.Count("documents")) + return qs + def document_count(self, obj): - return obj.documents.count() + return obj.document_count + document_count.admin_order_field = "document_count" class DocumentAdmin(CommonAdmin): @@ -140,12 +152,18 @@ class Media: readonly_fields = ("added",) list_display = ("title", "created", "added", "thumbnail", "correspondent", "tags_") - list_filter = ("tags", "correspondent", FinancialYearFilter, - MonthListFilter) + list_filter = ("tags", ('correspondent', RecentCorrespondentFilter), "correspondent", FinancialYearFilter) + filter_horizontal = ("tags",) ordering = ["-created", "correspondent"] + actions = [add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, remove_correspondent_from_selected] + + date_hierarchy = 'created' + + document_queue = None + def has_add_permission(self, request): return False @@ -153,6 +171,56 @@ def created_(self, obj): return obj.created.date().strftime("%Y-%m-%d") created_.short_description = "Created" + def changelist_view(self, request, extra_context=None): + response = super().changelist_view(request, extra_context) + + if request.method == 'GET': + cl = self.get_changelist_instance(request) + self.document_queue = [doc.id for doc in cl.queryset] + + return response + + def change_view(self, request, object_id=None, form_url='', extra_context=None): + extra_context = extra_context or {} + doc = Document.objects.get(id=object_id) + if self.document_queue and object_id and int(object_id) in self.document_queue: + # There is a queue of documents + current_index = self.document_queue.index(int(object_id)) + if current_index < len(self.document_queue) - 1: + # ... and there are still documents in the queue + extra_context['next_object'] = self.document_queue[current_index + 1] + return super(DocumentAdmin, self).change_view( + request, object_id, form_url, extra_context=extra_context, + ) + + def response_change(self, request, obj): + + # This is mostly copied from ModelAdmin.response_change() + opts = self.model._meta + preserved_filters = self.get_preserved_filters(request) + + msg_dict = { + 'name': opts.verbose_name, + 'obj': format_html('{}', urlquote(request.path), obj), + } + if "_saveandeditnext" in request.POST: + msg = format_html( + 'The {name} "{obj}" was changed successfully. Editing next object.', + **msg_dict + ) + self.message_user(request, msg, messages.SUCCESS) + redirect_url = reverse('admin:%s_%s_change' % + (opts.app_label, opts.model_name), + args=(request.POST['_next_object'],), + current_app=self.admin_site.name) + redirect_url = add_preserved_filters({'preserved_filters': preserved_filters, 'opts': opts}, redirect_url) + response = HttpResponseRedirect(redirect_url) + else: + response = super().response_change(request, obj) + + return response + + @mark_safe def thumbnail(self, obj): return self._html_tag( "a", @@ -165,8 +233,8 @@ def thumbnail(self, obj): ), href=obj.download_url ) - thumbnail.allow_tags = True + @mark_safe def tags_(self, obj): r = "" for tag in obj.tags.all(): @@ -183,10 +251,11 @@ def tags_(self, obj): ) } ) - return mark_safe(r) - tags_.allow_tags = True + return r + @mark_safe def document(self, obj): + # TODO: is this method even used anymore? return self._html_tag( "a", self._html_tag( @@ -199,7 +268,6 @@ def document(self, obj): ), href=obj.download_url ) - document.allow_tags = True @staticmethod def _html_tag(kind, inside=None, **kwargs): diff --git a/src/documents/templates/admin/documents/document/change_form.html b/src/documents/templates/admin/documents/document/change_form.html index 7bd0e483f..88fae955d 100644 --- a/src/documents/templates/admin/documents/document/change_form.html +++ b/src/documents/templates/admin/documents/document/change_form.html @@ -1,5 +1,21 @@ {% extends 'admin/change_form.html' %} +{% block content %} + +{{ block.super }} + +{% if next_object %} + +{% endif %} + +{% endblock content %} {% block footer %} @@ -10,4 +26,4 @@ django.jQuery(".field-created input").first().attr("type", "date") -{% endblock footer %} \ No newline at end of file +{% endblock footer %} diff --git a/src/documents/templates/admin/documents/document/select_object.html b/src/documents/templates/admin/documents/document/select_object.html new file mode 100644 index 000000000..1439b5c21 --- /dev/null +++ b/src/documents/templates/admin/documents/document/select_object.html @@ -0,0 +1,46 @@ +{% extends "admin/base_site.html" %} +{% load i18n l10n admin_urls static %} +{% load staticfiles %} + +{% block extrahead %} +{{ block.super }} +{{ media }} + + +{% endblock %} + +{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %} + +{% block breadcrumbs %} + +{% endblock %} + +{% block content %} +

Please select the {{itemname}}.

+
{% csrf_token %} +
+ {% for obj in queryset %} + + {% endfor %} +

+ +

+ + + +

+ + {% trans "Go back" %} +

+
+
+{% endblock %} diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 956b90a7f..e6f3da0cb 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -22,12 +22,12 @@ load_dotenv("/usr/local/etc/paperless.conf") -def __get_boolean(key): +def __get_boolean(key, default="NO"): """ Return a boolean value based on whatever the user has supplied in the environment based on whether the value "looks like" it's True or not. """ - return bool(os.getenv(key, "NO").lower() in ("yes", "y", "1", "t", "true")) + return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true")) # Build paths inside the project like this: os.path.join(BASE_DIR, ...) @@ -47,7 +47,7 @@ def __get_boolean(key): # SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True +DEBUG = __get_boolean("PAPERLESS_DEBUG", "YES") LOGIN_URL = "admin:login" @@ -81,7 +81,7 @@ def __get_boolean(key): "rest_framework", "crispy_forms", - "django_filters", + "django_filters" ] @@ -292,3 +292,9 @@ def __get_boolean(key): # Specify the default date order (for autodetected dates) DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY") + +# Specify for how many years a correspondent is considered recent. Recent +# correspondents will be shown in a separate "Recent correspondents" filter as +# well. Set to 0 to disable this filter. +PAPERLESS_RECENT_CORRESPONDENT_YEARS = int(os.getenv( + "PAPERLESS_RECENT_CORRESPONDENT_YEARS", 0)) From cce6b43062b25464a00d09f47fffda83a86e925b Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sat, 22 Sep 2018 13:59:50 +0100 Subject: [PATCH 02/19] Clean up release notes --- docs/changelog.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 804447855..9396493a7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -15,7 +15,8 @@ Changelog * As his last bit of effort on this release, Joshua also added some code to allow you to view the documents inline rather than download them as an attachment. `#400`_ -* Finally, `ahyear`_ found a slip in the Docker documentation and patched it. `#401`_ +* Finally, `ahyear`_ found a slip in the Docker documentation and patched it. + `#401`_ 2.2.1 @@ -32,14 +33,14 @@ Changelog version of Paperless that supports Django 2.0! As a result of their hard work, you can now also run Paperless on Python 3.7 as well: `#386`_ & `#390`_. -* `Stéphane Brunner`_ added a few lines of code that made tagging interface a lot - easier on those of us with lots of different tags: `#391`_. +* `Stéphane Brunner`_ added a few lines of code that made tagging interface a + lot easier on those of us with lots of different tags: `#391`_. * `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create tags, so that's fixed now too: `#384`_. * `erikarvstedt`_ tweaked the behaviour of the test suite to be better behaved for packaging environments: `#383`_. -* `Lukasz Soluch`_ added CORS support to make building a new Javascript-based front-end - cleaner & easier: `#387`_. +* `Lukasz Soluch`_ added CORS support to make building a new Javascript-based + front-end cleaner & easier: `#387`_. 2.1.0 From b4b4d8f25ec274c6c82944f20ca009ac368f364b Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sat, 22 Sep 2018 14:00:00 +0100 Subject: [PATCH 03/19] Add an example for pdf2pdfocr with the pre-consume hook --- docs/consumption.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/consumption.rst b/docs/consumption.rst index bf62ed0a2..fabaf2641 100644 --- a/docs/consumption.rst +++ b/docs/consumption.rst @@ -76,6 +76,29 @@ Pre-consumption script * Document file name +A simple but common example for this would be creating a simple script like +this: + +.. code:: bash + :name: "/usr/local/bin/ocr-pdf" + + #!/usr/bin/env bash + pdf2pdfocr.py -i ${1} + +.. code:: bash + :name: /etc/paperless.conf + + ... + PAPERLESS_PRE_CONSUME_SCRIPT="/usr/local/bin/ocr-pdf" + ... + +This will pass the path to the document about to be consumed to ``/usr/local/bin/ocr-pdf``, +which will in turn call `pdf2pdfocr.py`_ on your document, which will then +overwrite the file with an OCR'd version of the file and exit. At which point, +the consumption process will begin with the newly modified file. + +.. _pdf2pdfocr.py: https://github.com/LeoFCardoso/pdf2pdfocr + .. _consumption-director-hook-variables-post: From 60ee08adec5491c80d0f0e91a7497892dc9cbb43 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sat, 22 Sep 2018 15:27:22 +0100 Subject: [PATCH 04/19] Reduce duplication in docker-compose.env.example See #404 for more info on where this came from. --- docker-compose.env.example | 48 +++++++++++++------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/docker-compose.env.example b/docker-compose.env.example index 3c1664573..51332437d 100644 --- a/docker-compose.env.example +++ b/docker-compose.env.example @@ -1,38 +1,22 @@ # Environment variables to set for Paperless -# Commented out variables will be replaced by a default within Paperless. - -# Passphrase Paperless uses to encrypt and decrypt your documents, if you want -# encryption at all. -# PAPERLESS_PASSPHRASE=CHANGE_ME - -# The amount of threads to use for text recognition -# PAPERLESS_OCR_THREADS=4 - -# Additional languages to install for text recognition +# Commented out variables will be replaced with a default within Paperless. +# +# In addition to what you see here, you can also define any values you find in +# paperless.conf.example here. Values like: +# +# * PAPERLESS_PASSPHRASE +# * PAPERLESS_CONSUMPTION_DIR +# * PAPERLESS_CONSUME_MAIL_HOST +# +# ...are all explained in that file but can be defined here, since the Docker +# installation doesn't make use of paperless.conf. + + +# Additional languages to install for text recognition. Note that this is +# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the +# default language used when guessing the language from the OCR output. # PAPERLESS_OCR_LANGUAGES=deu ita # You can change the default user and group id to a custom one # USERMAP_UID=1000 # USERMAP_GID=1000 - -############################################################################### -#### Mail Consumption #### -############################################################################### - -# These values are required if you want paperless to check a particular email -# box every 10 minutes and attempt to consume documents from there. If you -# don't define a HOST, mail checking will just be disabled. -# Don't use quotes after = or it will crash your docker -# PAPERLESS_CONSUME_MAIL_HOST= -# PAPERLESS_CONSUME_MAIL_PORT= -# PAPERLESS_CONSUME_MAIL_USER= -# PAPERLESS_CONSUME_MAIL_PASS= - -# Override the default IMAP inbox here. If it's not set, Paperless defaults to -# INBOX. -# PAPERLESS_CONSUME_MAIL_INBOX=INBOX - -# Any email sent to the target account that does not contain this text will be -# ignored. Mail checking won't work without this. -# PAPERLESS_EMAIL_SECRET= - From 425bbe34efd57db8f3c06b0796d2470f4c3f3bd2 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sat, 22 Sep 2018 16:17:18 +0100 Subject: [PATCH 05/19] Make the names of the sample files visible --- docs/consumption.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/consumption.rst b/docs/consumption.rst index fabaf2641..15f6c6393 100644 --- a/docs/consumption.rst +++ b/docs/consumption.rst @@ -79,14 +79,16 @@ Pre-consumption script A simple but common example for this would be creating a simple script like this: +``/usr/local/bin/ocr-pdf`` + .. code:: bash - :name: "/usr/local/bin/ocr-pdf" #!/usr/bin/env bash pdf2pdfocr.py -i ${1} +``/etc/paperless.conf`` + .. code:: bash - :name: /etc/paperless.conf ... PAPERLESS_PRE_CONSUME_SCRIPT="/usr/local/bin/ocr-pdf" From 9682a6f6fcbe39114b2291848285951c10e953cc Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sat, 22 Sep 2018 16:22:03 +0100 Subject: [PATCH 06/19] Add a contribution guide --- docs/contributing.rst | 113 ++++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 2 files changed, 114 insertions(+) create mode 100644 docs/contributing.rst diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 000000000..4ee6d18d5 --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1,113 @@ +.. _contributing: + +Contributing to Paperless +######################### + +Maybe you've been using Paperless for a while and want to add a feature or two, +or maybe you've come across a bug that you have some ideas how to solve. The +beauty of Free software is that you can see what's wrong and help to get it +fixed for everyone! + + +How to Get Your Changes Rolled Into Paperless +============================================= + +If you've found a bug, but don't know how to fix it, you can always post an +issue on `GitHub`_ in the hopes that someone will have the time to fix it for +you. If however you're the one with the time, pull requests are always +welcome, you just have to make sure that your code conforms to a few standards: + +Pep8 +---- + +It's the standard for all Python development, so it's `very well documented`_. +The version is: + +* Lines should wrap at 79 characters +* Use ``snake_case`` for variables, ``CamelCase`` for classes, and ``ALL_CAPS`` + for constants. +* Space out your operators: ``stuff + 7`` instead of ``stuff+7`` +* Two empty lines between classes, and functions, but 1 empty line between + class methods. + +There's more to it than that, but if you follow those, you'll probably be +alright. When you submit your pull request, there's a pep8 checker that'll +look at your code to see if anything is off. If it finds anything, it'll +complain at you until you fix it. + + +Additional Style Guides +----------------------- + +Where pep8 is ambiguous, I've tried to be a little more specific. These rules +aren't hard-and-fast, but if you can conform to them, I'll appreciate it and +spend less time trying to conform your PR before merging: + + +Function calls +.............. + +If you're calling a function and that necessitates more than one line of code, +please format it like this: + +.. code:: python + + my_function( + argument1, + kwarg1="x", + kwarg2="y" + another_really_long_kwarg="some big value" + a_kwarg_calling_another_long_function=another_function( + another_arg, + another_kwarg="kwarg!" + ) + ) + +This is all in the interest of code uniformity rather than anything else. If +we stick to a style, everything is understandable in the same way. + + +Quoting Strings +............... + +pep8 is a little too open-minded on this for my liking. Python strings should +be quoted with double quotes (``"``) except in cases where the resulting string +would require too much escaping of a double quote, in which case, a single +quoted, or triple-quoted string will do: + +.. code:: python + + my_string = "This is my string" + problematic_string = 'This is a "string" with "quotes" in it' + +In HTML templates, please use double-quotes for tag attributes, and single +quotes for arguments passed to Django tempalte tags: + +.. code:: html + +
+ link this +
+ +This is to keep linters happy they look at an HTML file and see an attribute +closing the ``"`` before it should have been. + +-- + +That's all there is in terms of guidelines, so I hope it's not too daunting. + + +The Code of Conduct +=================== + +Paperless has a `code of conduct`_. It's a lot like the other ones you see out +there, with a few small changes, but basically it boils down to: + +> Don't be an ass, or you might get banned. + +I'm proud to say that the CoC has never had to be enforced because everyone has +been awesome, friendly, and professional. + +.. _GitHub: https://github.com/danielquinn/paperless/issues +.. _very well documented: https://www.python.org/dev/peps/pep-0008/ +.. _code of conduct: https://github.com/danielquinn/paperless/blob/master/CODE_OF_CONDUCT.md diff --git a/docs/index.rst b/docs/index.rst index 7710a330c..fd9d57d4e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -43,5 +43,6 @@ Contents customising extending troubleshooting + contributing scanners changelog From b420281be0c9db7f8f81a40eba9752b89a90c20f Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 12:40:46 +0100 Subject: [PATCH 07/19] Remove numpy, scikit-learn, and scipy as they weren't being used --- requirements.txt | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/requirements.txt b/requirements.txt index 89c7e296f..b8c26a579 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ -i https://pypi.python.org/simple -apipkg==1.5; python_version != '3.1.*' -atomicwrites==1.2.1; python_version != '3.1.*' +apipkg==1.5; python_version != '3.3.*' +atomicwrites==1.2.1; python_version != '3.3.*' attrs==18.2.0 certifi==2018.8.24 chardet==3.0.4 -coverage==4.5.1; python_version != '3.1.*' +coverage==4.5.1; python_version < '4' coveralls==1.5.0 dateparser==0.7.0 django-cors-headers==2.4.0 @@ -14,9 +14,9 @@ django-filter==2.0.0 django==2.0.8 djangorestframework==3.8.2 docopt==0.6.2 -execnet==1.5.0; python_version != '3.1.*' +execnet==1.5.0; python_version != '3.3.*' factory-boy==2.11.1 -faker==0.9.0 +faker==0.9.0; python_version >= '2.7' filemagic==1.6 fuzzywuzzy==0.15.0 gunicorn==19.9.0 @@ -24,20 +24,19 @@ idna==2.7 inotify-simple==1.1.8 langdetect==1.0.7 more-itertools==4.3.0 -numpy==1.15.1 pdftotext==2.1.0 pillow==5.2.0 -pluggy==0.7.1; python_version != '3.1.*' -py==1.6.0; python_version != '3.1.*' +pluggy==0.7.1; python_version != '3.3.*' +py==1.6.0; python_version != '3.3.*' pycodestyle==2.4.0 pyocr==0.5.3 -pytest-cov==2.5.1 +pytest-cov==2.6.0 pytest-django==3.4.2 pytest-env==0.6.2 -pytest-forked==0.2 +pytest-forked==0.2; python_version != '3.3.*' pytest-sugar==0.9.1 pytest-xdist==1.23.0 -pytest==3.7.4 +pytest==3.8.0 python-dateutil==2.7.3 python-dotenv==0.9.1 python-gnupg==0.4.3 @@ -46,9 +45,7 @@ pytz==2018.5 regex==2018.8.29 requests==2.19.1 six==1.11.0 -scikit-learn==0.19.2 -scipy==1.1.0 termcolor==1.1.0 text-unidecode==1.2 tzlocal==1.5.1 -urllib3==1.23; python_version != '3.0.*' +urllib3==1.23; python_version != '3.3.*' From 117d7dad0494e33ee4e743f40c5684c7063594c3 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 12:41:14 +0100 Subject: [PATCH 08/19] Improve the unknown language error message --- src/paperless_tesseract/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index e3c2ed361..f54461161 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -172,8 +172,8 @@ def _get_ocr(self, imgs): raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) return raw_text raise OCRError( - "The guessed language is not available in this instance of " - "Tesseract." + "The guessed language ({}) is not available in this instance " + "of Tesseract.".format(guessed_language) ) def _ocr(self, imgs, lang): From 4130dd346565b1567ff5902c9340a33570707b66 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 12:41:28 +0100 Subject: [PATCH 09/19] Conform code to standards --- src/documents/actions.py | 98 +++++++++---- src/documents/admin.py | 135 ++++++++++++------ .../documents/document/select_object.html | 68 ++++----- src/paperless/settings.py | 2 +- 4 files changed, 199 insertions(+), 104 deletions(-) diff --git a/src/documents/actions.py b/src/documents/actions.py index 3db5cd314..cd2698a2c 100644 --- a/src/documents/actions.py +++ b/src/documents/actions.py @@ -4,10 +4,13 @@ from django.core.exceptions import PermissionDenied from django.template.response import TemplateResponse -from documents.models import Tag, Correspondent +from documents.models import Correspondent, Tag -def select_action(modeladmin, request, queryset, title, action, modelclass, success_message="", document_action=None, queryset_action=None): +def select_action( + modeladmin, request, queryset, title, action, modelclass, + success_message="", document_action=None, queryset_action=None): + opts = modeladmin.model._meta app_label = opts.app_label @@ -27,7 +30,9 @@ def select_action(modeladmin, request, queryset, title, action, modelclass, succ queryset_action(queryset, selected_object) modeladmin.message_user(request, success_message % { - "selected_object": selected_object.name, "count": n, "items": model_ngettext(modeladmin.opts, n) + "selected_object": selected_object.name, + "count": n, + "items": model_ngettext(modeladmin.opts, n) }, messages.SUCCESS) # Return None to display the change list page again. @@ -47,10 +52,17 @@ def select_action(modeladmin, request, queryset, title, action, modelclass, succ request.current_app = modeladmin.admin_site.name - return TemplateResponse(request, "admin/%s/%s/select_object.html" % (app_label, opts.model_name), context) + return TemplateResponse( + request, + "admin/{}/{}/select_object.html".format(app_label, opts.model_name), + context + ) + +def simple_action( + modeladmin, request, queryset, success_message="", + document_action=None, queryset_action=None): -def simple_action(modeladmin, request, queryset, success_message="", document_action=None, queryset_action=None): if not modeladmin.has_change_permission(request): raise PermissionDenied @@ -72,37 +84,63 @@ def simple_action(modeladmin, request, queryset, success_message="", document_ac def add_tag_to_selected(modeladmin, request, queryset): - return select_action(modeladmin=modeladmin, request=request, queryset=queryset, - title="Add tag to multiple documents", - action="add_tag_to_selected", - modelclass=Tag, - success_message="Successfully added tag %(selected_object)s to %(count)d %(items)s.", - document_action=lambda doc, tag: doc.tags.add(tag)) -add_tag_to_selected.short_description = "Add tag to selected documents" + return select_action( + modeladmin=modeladmin, + request=request, + queryset=queryset, + title="Add tag to multiple documents", + action="add_tag_to_selected", + modelclass=Tag, + success_message="Successfully added tag %(selected_object)s to " + "%(count)d %(items)s.", + document_action=lambda doc, tag: doc.tags.add(tag) + ) def remove_tag_from_selected(modeladmin, request, queryset): - return select_action(modeladmin=modeladmin, request=request, queryset=queryset, - title="Remove tag from multiple documents", - action="remove_tag_from_selected", - modelclass=Tag, - success_message="Successfully removed tag %(selected_object)s from %(count)d %(items)s.", - document_action=lambda doc, tag: doc.tags.remove(tag)) -remove_tag_from_selected.short_description = "Remove tag from selected documents" + return select_action( + modeladmin=modeladmin, + request=request, + queryset=queryset, + title="Remove tag from multiple documents", + action="remove_tag_from_selected", + modelclass=Tag, + success_message="Successfully removed tag %(selected_object)s from " + "%(count)d %(items)s.", + document_action=lambda doc, tag: doc.tags.remove(tag) + ) def set_correspondent_on_selected(modeladmin, request, queryset): - return select_action(modeladmin=modeladmin, request=request, queryset=queryset, - title="Set correspondent on multiple documents", - action="set_correspondent_on_selected", - modelclass=Correspondent, - success_message="Successfully set correspondent %(selected_object)s on %(count)d %(items)s.", - queryset_action=lambda qs, correspondent: qs.update(correspondent=correspondent)) -set_correspondent_on_selected.short_description = "Set correspondent on selected documents" + + return select_action( + modeladmin=modeladmin, + request=request, + queryset=queryset, + title="Set correspondent on multiple documents", + action="set_correspondent_on_selected", + modelclass=Correspondent, + success_message="Successfully set correspondent %(selected_object)s " + "on %(count)d %(items)s.", + queryset_action=lambda qs, corr: qs.update(correspondent=corr) + ) def remove_correspondent_from_selected(modeladmin, request, queryset): - return simple_action(modeladmin=modeladmin, request=request, queryset=queryset, - success_message="Successfully removed correspondent from %(count)d %(items)s.", - queryset_action=lambda qs: qs.update(correspondent=None)) -remove_correspondent_from_selected.short_description = "Remove correspondent from selected documents" + return simple_action( + modeladmin=modeladmin, + request=request, + queryset=queryset, + success_message="Successfully removed correspondent from %(count)d " + "%(items)s.", + queryset_action=lambda qs: qs.update(correspondent=None) + ) + + +add_tag_to_selected.short_description = "Add tag to selected documents" +remove_tag_from_selected.short_description = \ + "Remove tag from selected documents" +set_correspondent_on_selected.short_description = \ + "Set correspondent on selected documents" +remove_correspondent_from_selected.short_description = \ + "Remove correspondent from selected documents" diff --git a/src/documents/admin.py b/src/documents/admin.py index d545c1c02..365a99c1a 100644 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -3,21 +3,23 @@ from django.conf import settings from django.contrib import admin, messages from django.contrib.admin.templatetags.admin_urls import add_preserved_filters -from django.contrib.auth.models import User, Group +from django.contrib.auth.models import Group, User +from django.db import models from django.http import HttpResponseRedirect -try: - from django.core.urlresolvers import reverse -except ImportError: - from django.urls import reverse from django.templatetags.static import static +from django.urls import reverse from django.utils.html import format_html, format_html_join from django.utils.http import urlquote from django.utils.safestring import mark_safe -from django.db import models -from documents.actions import add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, \ - remove_correspondent_from_selected -from .models import Correspondent, Tag, Document, Log +from documents.actions import ( + add_tag_to_selected, + remove_correspondent_from_selected, + remove_tag_from_selected, + set_correspondent_on_selected +) + +from .models import Correspondent, Document, Log, Tag class FinancialYearFilter(admin.SimpleListFilter): @@ -92,11 +94,18 @@ def __init__(self, *args, **kwargs): self.title = "correspondent (recent)" def field_choices(self, field, request, model_admin): + + years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS + days = 365 * years + lookups = [] - if settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS and settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS > 0: - date_limit = datetime.now() - timedelta(days=365*settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS) - for c in Correspondent.objects.filter(documents__created__gte=date_limit).distinct(): + if years and years > 0: + correspondents = Correspondent.objects.filter( + documents__created__gte=datetime.now() - timedelta(days=days) + ).distinct() + for c in correspondents: lookups.append((c.id, c.name)) + return lookups @@ -106,13 +115,22 @@ class CommonAdmin(admin.ModelAdmin): class CorrespondentAdmin(CommonAdmin): - list_display = ("name", "match", "matching_algorithm", "document_count", "last_correspondence") + list_display = ( + "name", + "match", + "matching_algorithm", + "document_count", + "last_correspondence" + ) list_filter = ("matching_algorithm",) list_editable = ("match", "matching_algorithm") def get_queryset(self, request): qs = super(CorrespondentAdmin, self).get_queryset(request) - qs = qs.annotate(document_count=models.Count("documents"), last_correspondence=models.Max("documents__created")) + qs = qs.annotate( + document_count=models.Count("documents"), + last_correspondence=models.Max("documents__created") + ) return qs def document_count(self, obj): @@ -152,17 +170,29 @@ class Media: readonly_fields = ("added",) list_display = ("title", "created", "added", "thumbnail", "correspondent", "tags_") - list_filter = ("tags", ('correspondent', RecentCorrespondentFilter), "correspondent", FinancialYearFilter) + list_filter = ( + "tags", + ("correspondent", RecentCorrespondentFilter), + "correspondent", + FinancialYearFilter + ) filter_horizontal = ("tags",) ordering = ["-created", "correspondent"] - actions = [add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, remove_correspondent_from_selected] + actions = [ + add_tag_to_selected, + remove_tag_from_selected, + set_correspondent_on_selected, + remove_correspondent_from_selected + ] - date_hierarchy = 'created' + date_hierarchy = "created" - document_queue = None + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.document_queue = [] def has_add_permission(self, request): return False @@ -172,25 +202,38 @@ def created_(self, obj): created_.short_description = "Created" def changelist_view(self, request, extra_context=None): - response = super().changelist_view(request, extra_context) - if request.method == 'GET': + response = super().changelist_view( + request, + extra_context=extra_context + ) + + if request.method == "GET": cl = self.get_changelist_instance(request) self.document_queue = [doc.id for doc in cl.queryset] return response - def change_view(self, request, object_id=None, form_url='', extra_context=None): + def change_view(self, request, object_id=None, form_url='', + extra_context=None): + extra_context = extra_context or {} - doc = Document.objects.get(id=object_id) - if self.document_queue and object_id and int(object_id) in self.document_queue: - # There is a queue of documents - current_index = self.document_queue.index(int(object_id)) - if current_index < len(self.document_queue) - 1: - # ... and there are still documents in the queue - extra_context['next_object'] = self.document_queue[current_index + 1] + + if self.document_queue and object_id: + if int(object_id) in self.document_queue: + # There is a queue of documents + current_index = self.document_queue.index(int(object_id)) + if current_index < len(self.document_queue) - 1: + # ... and there are still documents in the queue + extra_context["next_object"] = self.document_queue[ + current_index + 1 + ] + return super(DocumentAdmin, self).change_view( - request, object_id, form_url, extra_context=extra_context, + request, + object_id, + form_url, + extra_context=extra_context, ) def response_change(self, request, obj): @@ -200,25 +243,35 @@ def response_change(self, request, obj): preserved_filters = self.get_preserved_filters(request) msg_dict = { - 'name': opts.verbose_name, - 'obj': format_html('{}', urlquote(request.path), obj), + "name": opts.verbose_name, + "obj": format_html( + '{}', + urlquote(request.path), + obj + ), } if "_saveandeditnext" in request.POST: msg = format_html( - 'The {name} "{obj}" was changed successfully. Editing next object.', + 'The {name} "{obj}" was changed successfully. ' + 'Editing next object.', **msg_dict ) self.message_user(request, msg, messages.SUCCESS) - redirect_url = reverse('admin:%s_%s_change' % - (opts.app_label, opts.model_name), - args=(request.POST['_next_object'],), - current_app=self.admin_site.name) - redirect_url = add_preserved_filters({'preserved_filters': preserved_filters, 'opts': opts}, redirect_url) - response = HttpResponseRedirect(redirect_url) - else: - response = super().response_change(request, obj) + redirect_url = reverse( + "admin:{}_{}_change".format(opts.app_label, opts.model_name), + args=(request.POST["_next_object"],), + current_app=self.admin_site.name + ) + redirect_url = add_preserved_filters( + { + "preserved_filters": preserved_filters, + "opts": opts + }, + redirect_url + ) + return HttpResponseRedirect(redirect_url) - return response + return super().response_change(request, obj) @mark_safe def thumbnail(self, obj): diff --git a/src/documents/templates/admin/documents/document/select_object.html b/src/documents/templates/admin/documents/document/select_object.html index 1439b5c21..775d57b12 100644 --- a/src/documents/templates/admin/documents/document/select_object.html +++ b/src/documents/templates/admin/documents/document/select_object.html @@ -1,46 +1,50 @@ {% extends "admin/base_site.html" %} + + {% load i18n l10n admin_urls static %} {% load staticfiles %} -{% block extrahead %} -{{ block.super }} -{{ media }} - +{% block extrahead %} + {{ block.super }} + {{ media }} + {% endblock %} + {% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %} + {% block breadcrumbs %} - + {% endblock %} {% block content %} -

Please select the {{itemname}}.

-
{% csrf_token %} -
- {% for obj in queryset %} - - {% endfor %} -

- -

- - - -

- - {% trans "Go back" %} -

-
-
+

Please select the {{itemname}}.

+
{% csrf_token %} +
+ {% for obj in queryset %} + + {% endfor %} +

+ +

+ + + +

+ + {% trans "Go back" %} +

+
+
{% endblock %} diff --git a/src/paperless/settings.py b/src/paperless/settings.py index e6f3da0cb..433eabe88 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -81,7 +81,7 @@ def __get_boolean(key, default="NO"): "rest_framework", "crispy_forms", - "django_filters" + "django_filters", ] From 6db788a55034c450bb5340b8b152bf7f9e16a74a Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 12:54:39 +0100 Subject: [PATCH 10/19] Add docs for indentation & spacing --- docs/contributing.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/contributing.rst b/docs/contributing.rst index 4ee6d18d5..05f51731c 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -97,6 +97,34 @@ closing the ``"`` before it should have been. That's all there is in terms of guidelines, so I hope it's not too daunting. +Indentation & Spacing +..................... + +When it comes to indentation: + +* For Python, the rule is: follow pep8 and use 4 spaces. +* For Javascript, CSS, and HTML, please use 1 tab. + +Additionally, Django templates making use of block elements like ``{% if %}``, +``{% for %}``, and ``{% block %}`` etc. should be indented: + +Good: + +.. code:: html + + {% block stuff %} +

This is the stuff

+ {% endblock %} + +Bad: + +.. code:: html + + {% block stuff %} +

This is the stuff

+ {% endblock %} + + The Code of Conduct =================== From ff111f1bdeb687905c0b15ae26c68f706cbfb93a Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 12:54:49 +0100 Subject: [PATCH 11/19] Update changelog for new stuff from #405 --- docs/changelog.rst | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9396493a7..6fdaee647 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,14 @@ Changelog ######### +2.4.0 +===== + +* A new set of actions are now available thanks to `jonaswinkler`_'s very first + pull request! You can now do nifty things like tag documents in bulk, or set + correspondents in bulk. `#405`_ + + 2.3.0 ===== @@ -500,8 +508,9 @@ bulk of the work on this big change. .. _Kilian Koeltzsch: https://github.com/kiliankoe .. _Lukasz Soluch: https://github.com/LukaszSolo .. _Joshua Taillon: https://github.com/jat255 -.. _dubit0: https://github.com/dubit0 -.. _ahyear: https://github.com/ahyear +.. _dubit0: https://github.com/dubit0 +.. _ahyear: https://github.com/ahyear +.. _jonaswinkler: https://github.com/jonaswinkler .. _#20: https://github.com/danielquinn/paperless/issues/20 .. _#44: https://github.com/danielquinn/paperless/issues/44 @@ -588,6 +597,7 @@ bulk of the work on this big change. .. _#399: https://github.com/danielquinn/paperless/pull/399 .. _#400: https://github.com/danielquinn/paperless/pull/400 .. _#401: https://github.com/danielquinn/paperless/pull/401 +.. _#405: https://github.com/danielquinn/paperless/pull/405 .. _pipenv: https://docs.pipenv.org/ .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/ From 79e1e60238e86e772cdfad5f81acc9750b04a31c Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 12:59:56 +0100 Subject: [PATCH 12/19] Fix typo --- docs/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index 05f51731c..4678ff3aa 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -21,7 +21,7 @@ Pep8 ---- It's the standard for all Python development, so it's `very well documented`_. -The version is: +The short version is: * Lines should wrap at 79 characters * Use ``snake_case`` for variables, ``CamelCase`` for classes, and ``ALL_CAPS`` From 090565d84c6de4381c408e5a1853ed0288a1d42e Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 13:58:40 +0100 Subject: [PATCH 13/19] Tweak the import/export system to handle encryption choices better Now when you export a document, the `storage_type` value is always `unencrypted` (since that's what it is when it's exported anyway), and the flag is set by the importing script instead, based on the existence of a `PAPERLESS_PASSPHRASE` environment variable, indicating that encryption is enabled. --- .../management/commands/document_exporter.py | 7 ++++++- .../management/commands/document_importer.py | 14 +++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index fce09092c..42a514348 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -55,7 +55,12 @@ def dump(self): documents = Document.objects.all() document_map = {d.pk: d for d in documents} manifest = json.loads(serializers.serialize("json", documents)) - for document_dict in manifest: + + for index, document_dict in enumerate(manifest): + + # Force output to unencrypted as that will be the current state. + # The importer will make the decision to encrypt or not. + manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501 document = document_map[document_dict["pk"]] diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index 15401722c..ae5c1853f 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -94,7 +94,7 @@ def _import_files_from_manifest(self): document_path = os.path.join(self.source, doc_file) thumbnail_path = os.path.join(self.source, thumb_file) - if document.storage_type == Document.STORAGE_TYPE_GPG: + if settings.PASSPHRASE: with open(document_path, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: @@ -112,3 +112,15 @@ def _import_files_from_manifest(self): shutil.copy(document_path, document.source_path) shutil.copy(thumbnail_path, document.thumbnail_path) + + # Reset the storage type to whatever we've used while importing + + storage_type = Document.STORAGE_TYPE_UNENCRYPTED + if settings.PASSPHRASE: + storage_type = Document.STORAGE_TYPE_GPG + + Document.objects.filter( + pk__in=[r["pk"] for r in self.manifest] + ).update( + storage_type=storage_type + ) From d17497fd5b703d0b21d5dd581a3fe9e1ead2a265 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 14:00:27 +0100 Subject: [PATCH 14/19] Move the unique key on checksums to migration 15 This shouldn't affect anyone, since this migration is pretty old, but it allows people using PostgreSQL to actually run Paperless. --- src/documents/migrations/0014_document_checksum.py | 5 ----- src/documents/migrations/0015_add_insensitive_to_match.py | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/documents/migrations/0014_document_checksum.py b/src/documents/migrations/0014_document_checksum.py index bc563cf86..a22348ba4 100644 --- a/src/documents/migrations/0014_document_checksum.py +++ b/src/documents/migrations/0014_document_checksum.py @@ -158,9 +158,4 @@ class Migration(migrations.Migration): name='modified', field=models.DateTimeField(auto_now=True, db_index=True), ), - migrations.AlterField( - model_name='document', - name='checksum', - field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.', max_length=32, unique=True), - ), ] diff --git a/src/documents/migrations/0015_add_insensitive_to_match.py b/src/documents/migrations/0015_add_insensitive_to_match.py index 34a570c6e..30666dea9 100644 --- a/src/documents/migrations/0015_add_insensitive_to_match.py +++ b/src/documents/migrations/0015_add_insensitive_to_match.py @@ -12,6 +12,11 @@ class Migration(migrations.Migration): ] operations = [ + migrations.AlterField( + model_name='document', + name='checksum', + field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.', max_length=32, unique=True), + ), migrations.AddField( model_name='correspondent', name='is_insensitive', From b20d7eca03b44080530558cab0d406f0a0e3ae0d Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 14:01:15 +0100 Subject: [PATCH 15/19] Tweak settings.py to allow for TRUST-based PostgreSQL auth --- src/paperless/settings.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 433eabe88..4e788e56b 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -144,13 +144,14 @@ def __get_boolean(key, default="NO"): } } -if os.getenv("PAPERLESS_DBUSER") and os.getenv("PAPERLESS_DBPASS"): +if os.getenv("PAPERLESS_DBUSER"): DATABASES["default"] = { "ENGINE": "django.db.backends.postgresql_psycopg2", "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"), "USER": os.getenv("PAPERLESS_DBUSER"), - "PASSWORD": os.getenv("PAPERLESS_DBPASS") } + if os.getenv("PAPERLESS_DBPASS"): + DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS") # Password validation From acf6caca2f51e47baff40f38b9210c79a368b1e8 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 14:01:35 +0100 Subject: [PATCH 16/19] Add a tox test for Python 3.7 --- src/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tox.ini b/src/tox.ini index 98e44e063..ff47136be 100644 --- a/src/tox.ini +++ b/src/tox.ini @@ -5,7 +5,7 @@ [tox] skipsdist = True -envlist = py34, py35, py36, pycodestyle, doc +envlist = py34, py35, py36, py37, pycodestyle, doc [testenv] commands = pytest From 8726b0316c7216b682b663016a1aac8f723190d5 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 14:03:38 +0100 Subject: [PATCH 17/19] Add note about import/export process changes --- docs/changelog.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 6fdaee647..fb728b8d7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,11 @@ Changelog * A new set of actions are now available thanks to `jonaswinkler`_'s very first pull request! You can now do nifty things like tag documents in bulk, or set correspondents in bulk. `#405`_ +* The import/export system is now a little smarter. By default, documents are + tagged as ``unencrypted``, since exports are by their nature unencrypted. + It's now in the import step that we decide the storage type. This allows you + to export from an encrypted system and import into an unencrypted one, or + vice-versa. 2.3.0 From 35c5b8e263cd23b72e38725958c0f3cfe45471e5 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 14:05:35 +0100 Subject: [PATCH 18/19] Add note about tweaks to psql connections --- docs/changelog.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index fb728b8d7..9db59839e 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -12,7 +12,11 @@ Changelog It's now in the import step that we decide the storage type. This allows you to export from an encrypted system and import into an unencrypted one, or vice-versa. - +* The migration history has been slightly modified to accomodate PostgreSQL + users. Additionally, you can now tell paperless to use PostgreSQL simply by + declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to + connect to your Postgres database without a password unless you also set + ``PAPERLESS_DBPASS``. 2.3.0 ===== From a511d34d694faf907aabdff67b019ae37231d47b Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 23 Sep 2018 15:38:31 +0100 Subject: [PATCH 19/19] Fix implementation of django-filter --- docs/changelog.rst | 6 +++++ src/documents/filters.py | 50 ++++++++++++++++++---------------------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9db59839e..6ce2e49a4 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -17,6 +17,10 @@ Changelog declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to connect to your Postgres database without a password unless you also set ``PAPERLESS_DBPASS``. +* A bug was found in the REST API filter system that was the result of an + update of django-filter some time ago. This has now been patched `#412`_. + Thanks to `thepill`_ for spotting it! + 2.3.0 ===== @@ -520,6 +524,7 @@ bulk of the work on this big change. .. _dubit0: https://github.com/dubit0 .. _ahyear: https://github.com/ahyear .. _jonaswinkler: https://github.com/jonaswinkler +.. _thepill: https://github.com/thepill .. _#20: https://github.com/danielquinn/paperless/issues/20 .. _#44: https://github.com/danielquinn/paperless/issues/44 @@ -607,6 +612,7 @@ bulk of the work on this big change. .. _#400: https://github.com/danielquinn/paperless/pull/400 .. _#401: https://github.com/danielquinn/paperless/pull/401 .. _#405: https://github.com/danielquinn/paperless/pull/405 +.. _#412: https://github.com/danielquinn/paperless/issues/412 .. _pipenv: https://docs.pipenv.org/ .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/ diff --git a/src/documents/filters.py b/src/documents/filters.py index 68861d967..d52889666 100644 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -1,8 +1,14 @@ -from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter +from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter, ModelChoiceFilter from .models import Correspondent, Document, Tag +CHAR_KWARGS = ( + "startswith", "endswith", "contains", + "istartswith", "iendswith", "icontains" +) + + class CorrespondentFilterSet(FilterSet): class Meta: @@ -31,34 +37,24 @@ class Meta: class DocumentFilterSet(FilterSet): - CHAR_KWARGS = { - "lookup_expr": ( - "startswith", - "endswith", - "contains", - "istartswith", - "iendswith", - "icontains" - ) - } - - correspondent__name = CharFilter( - field_name="correspondent__name", **CHAR_KWARGS) - correspondent__slug = CharFilter( - field_name="correspondent__slug", **CHAR_KWARGS) - tags__name = CharFilter( - field_name="tags__name", **CHAR_KWARGS) - tags__slug = CharFilter( - field_name="tags__slug", **CHAR_KWARGS) - tags__empty = BooleanFilter( - field_name="tags", lookup_expr="isnull", distinct=True) + tags_empty = BooleanFilter( + label="Is tagged", + field_name="tags", + lookup_expr="isnull", + exclude=True + ) class Meta: model = Document fields = { - "title": [ - "startswith", "endswith", "contains", - "istartswith", "iendswith", "icontains" - ], - "content": ["contains", "icontains"], + + "title": CHAR_KWARGS, + "content": ("contains", "icontains"), + + "correspondent__name": CHAR_KWARGS, + "correspondent__slug": CHAR_KWARGS, + + "tags__name": CHAR_KWARGS, + "tags__slug": CHAR_KWARGS, + }