Skip to content
This repository has been archived by the owner on Feb 16, 2023. It is now read-only.

Commit

Permalink
Added document type
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonas Winkler committed Aug 24, 2018
1 parent dfa5ea4 commit d7ab69f
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 10 deletions.
66 changes: 65 additions & 1 deletion src/documents/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.core.exceptions import PermissionDenied
from django.template.response import TemplateResponse

from documents.models import Tag, Correspondent
from documents.models import Tag, Correspondent, DocumentType


def add_tag_to_selected(modeladmin, request, queryset):
Expand Down Expand Up @@ -159,3 +159,67 @@ def remove_correspondent_from_selected(modeladmin, request, queryset):


remove_correspondent_from_selected.short_description = "Remove correspondent from selected documents"


def set_document_type_on_selected(modeladmin, request, queryset):
opts = modeladmin.model._meta
app_label = opts.app_label

if not modeladmin.has_change_permission(request):
raise PermissionDenied

if request.POST.get('post'):
n = queryset.count()
document_type = DocumentType.objects.get(id=request.POST.get('document_type_id'))
if n:
for obj in queryset:
obj_display = str(obj)
modeladmin.log_change(request, obj, obj_display)
queryset.update(document_type=document_type)
modeladmin.message_user(request, "Successfully set document type %(document_type)s on %(count)d %(items)s." % {
"document_type": document_type.name, "count": n, "items": model_ngettext(modeladmin.opts, n)
}, messages.SUCCESS)

# Return None to display the change list page again.
return None

title = "Set document type on multiple documents"

context = dict(
modeladmin.admin_site.each_context(request),
title=title,
queryset=queryset,
opts=opts,
action_checkbox_name=helpers.ACTION_CHECKBOX_NAME,
media=modeladmin.media,
document_types=DocumentType.objects.all()
)

request.current_app = modeladmin.admin_site.name

return TemplateResponse(request,
"admin/%s/%s/set_document_type.html" % (app_label, opts.model_name)
, context)


set_document_type_on_selected.short_description = "Set document type on selected documents"


def remove_document_type_from_selected(modeladmin, request, queryset):
if not modeladmin.has_change_permission(request):
raise PermissionDenied

n = queryset.count()
if n:
for obj in queryset:
obj_display = str(obj)
modeladmin.log_change(request, obj, obj_display)
queryset.update(document_type=None)
modeladmin.message_user(request, "Successfully removed document type from %(count)d %(items)s." % {
"count": n, "items": model_ngettext(modeladmin.opts, n)
}, messages.SUCCESS)

return None


remove_document_type_from_selected.short_description = "Remove document type from selected documents"
27 changes: 22 additions & 5 deletions src/documents/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from django.utils.safestring import mark_safe

from documents.actions import add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, \
remove_correspondent_from_selected
from .models import Correspondent, Tag, Document, Log
remove_correspondent_from_selected, set_document_type_on_selected, remove_document_type_from_selected
from .models import Correspondent, Tag, Document, Log, DocumentType


class FinancialYearFilter(admin.SimpleListFilter):
Expand Down Expand Up @@ -120,6 +120,22 @@ def save_model(self, request, obj, form, change):
def document_count(self, obj):
return obj.documents.count()

class DocumentTypeAdmin(CommonAdmin):

list_display = ("name", "match", "matching_algorithm", "document_count")
list_filter = ("matching_algorithm",)
list_editable = ("match", "matching_algorithm")

def save_model(self, request, obj, form, change):
super().save_model(request, obj, form, change)

for document in Document.objects.filter(document_type__isnull=True).exclude(tags__is_archived_tag=True):
if obj.matches(document.content):
document.document_type = obj
document.save(update_fields=("document_type",))

def document_count(self, obj):
return obj.documents.count()

class DocumentAdmin(CommonAdmin):

Expand All @@ -132,12 +148,12 @@ class Media:
search_fields = ("correspondent__name", "title", "content", "tags__name")
readonly_fields = ("added",)
list_display = ("title", "created", "added", "thumbnail", "correspondent",
"tags_", "archive_serial_number")
list_filter = ("tags", "correspondent", FinancialYearFilter)
"tags_", "archive_serial_number", "document_type")
list_filter = ("document_type", "tags", "correspondent", FinancialYearFilter)

ordering = ["-created", "correspondent"]

actions = [add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, remove_correspondent_from_selected]
actions = [add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, remove_correspondent_from_selected, set_document_type_on_selected, remove_document_type_from_selected]

date_hierarchy = 'created'

Expand Down Expand Up @@ -273,6 +289,7 @@ class LogAdmin(CommonAdmin):

admin.site.register(Correspondent, CorrespondentAdmin)
admin.site.register(Tag, TagAdmin)
admin.site.register(DocumentType, DocumentTypeAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(Log, LogAdmin)

Expand Down
2 changes: 2 additions & 0 deletions src/documents/apps.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def ready(self):
from .signals.handlers import (
set_correspondent,
set_tags,
set_document_type,
run_pre_consume_script,
run_post_consume_script,
cleanup_document_deletion,
Expand All @@ -23,6 +24,7 @@ def ready(self):

document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_correspondent)
document_consumption_finished.connect(set_document_type)
document_consumption_finished.connect(set_log_entry)
document_consumption_finished.connect(run_post_consume_script)

Expand Down
33 changes: 33 additions & 0 deletions src/documents/migrations/0023_auto_20180823_1155.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 2.0.7 on 2018-08-23 11:55

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('documents', '0022_workflow_improvements'),
]

operations = [
migrations.CreateModel(
name='DocumentType',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128, unique=True)),
('slug', models.SlugField(blank=True)),
('match', models.CharField(blank=True, max_length=256)),
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
('is_insensitive', models.BooleanField(default=True)),
],
options={
'abstract': False,
},
),
migrations.AddField(
model_name='document',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'),
),
]
14 changes: 14 additions & 0 deletions src/documents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from django.template.defaultfilters import slugify
from django.utils import timezone

from reminders.models import Reminder
from .managers import LogManager


Expand Down Expand Up @@ -189,6 +190,11 @@ class Tag(MatchingModel):
help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)")


class DocumentType(MatchingModel):

pass


class Document(models.Model):

TYPE_PDF = "pdf"
Expand All @@ -215,6 +221,14 @@ class Document(models.Model):

title = models.CharField(max_length=128, blank=True, db_index=True)

document_type = models.ForeignKey(
DocumentType,
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL
)

content = models.TextField(
db_index=True,
blank=True,
Expand Down
31 changes: 30 additions & 1 deletion src/documents/signals/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django.contrib.contenttypes.models import ContentType
from django.utils import timezone

from ..models import Correspondent, Document, Tag
from ..models import Correspondent, Document, Tag, DocumentType


def logger(message, group):
Expand Down Expand Up @@ -44,6 +44,35 @@ def set_correspondent(sender, document=None, logging_group=None, **kwargs):
document.save(update_fields=("correspondent",))


def set_document_type(sender, document=None, logging_group=None, **kwargs):

# No sense in assigning a correspondent when one is already set.
if document.document_type:
return

# No matching document types, so no need to continue
potential_document_types = list(DocumentType.match_all(document.content))
if not potential_document_types:
return

potential_count = len(potential_document_types)
selected = potential_document_types[0]
if potential_count > 1:
message = "Detected {} potential document types, so we've opted for {}"
logger(
message.format(potential_count, selected),
logging_group
)

logger(
'Assigning document type "{}" to "{}" '.format(selected, document),
logging_group
)

document.document_type = selected
document.save(update_fields=("document_type",))


def set_tags(sender, document=None, logging_group=None, **kwargs):

current_tags = set(document.tags.all())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
border: 1px solid #cccccc;
border-radius: 2%;
overflow: hidden;
height: 300px;
height: 350px;
position: relative;
}
.result .header {
Expand Down Expand Up @@ -61,6 +61,11 @@
.result a.tag {
color: #ffffff;
}
.result .documentType {
padding: 5px;
background-color: #eeeeee;
text-align: center;
}
.result .date {
padding: 5px;
}
Expand Down Expand Up @@ -163,7 +168,8 @@
{# 5: Correspondent #}
{# 6: Tags #}
{# 7: Archive serial number #}
{# 8: Document edit url #}
{# 8: Document type #}
{# 9: Document edit url #}
<div class="box">
<div class="result">
<div class="header">
Expand All @@ -177,14 +183,15 @@
selection would not be possible with mouse click + drag. Instead,
the underlying link would be dragged.
{% endcomment %}
<div class="headerLink" onclick="location.href='{{ result.8 }}';"></div>
<div class="headerLink" onclick="location.href='{{ result.9 }}';"></div>
<div class="checkbox">{{ result.0 }}</div>
<div class="info">
{{ result.5 }}
</div>
{{ result.1 }}
<div style="clear: both;"></div>
</div>
{% if '>-<' not in result.8 %}<div class="documentType">{{ result.8 }}</div>{% endif %}
<div class="tags">{{ result.6 }}</div>
<div class="date">{{ result.2 }}</div>
<div style="clear: both;"></div>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{% extends "admin/base_site.html" %}
{% load i18n l10n admin_urls static %}
{% load staticfiles %}

{% block extrahead %}
{{ block.super }}
{{ media }}
<script type="text/javascript" src="{% static 'admin/js/cancel.js' %}"></script>

{% endblock %}

{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %}

{% block breadcrumbs %}
<div class="breadcrumbs">
<a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
&rsaquo; <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
&rsaquo; <a href="{% url opts|admin_urlname:'changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
&rsaquo; {{title}}
</div>
{% endblock %}

{% block content %}
<p>Please select the document type.</p>
<form method="post">{% csrf_token %}
<div>
{% for obj in queryset %}
<input type="hidden" name="{{ action_checkbox_name }}" value="{{ obj.pk|unlocalize }}"/>
{% endfor %}
<p>
<select name="document_type_id">
{% for document_type in document_types %}
<option value="{{document_type.id}}">{{document_type.name}}</option>
{% endfor %}
</select>
</p>

<input type="hidden" name="action" value="set_document_type_on_selected"/>
<input type="hidden" name="post" value="yes"/>
<p>
<input type="submit" value="{% trans " Confirm" %}" />
<a href="#" class="button cancel-link">{% trans "Go back" %}</a>
</p>
</div>
</form>
{% endblock %}
Empty file modified src/documents/templatetags/hacks.py
100644 → 100755
Empty file.

0 comments on commit d7ab69f

Please sign in to comment.