diff --git a/.gitignore b/.gitignore
index 2a1aff013..f3c069265 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,14 +6,17 @@ settings/keys/*
*.dot
reports
ENV
-venv
+.env
.DS_Store
build
deploy/last-update
logs/*
-cache/*
celerybeat.pid
celerybeat-schedule
.gitignore~
-static/scss/**/*.css.map
-*.retry
\ No newline at end of file
+assets/*
+*.ipynb
+dump.rdb
+Pipfile.lock
+
+*.css.map
\ No newline at end of file
diff --git a/.python-version b/.python-version
new file mode 100644
index 000000000..a9f8d1be3
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.9.11
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 96d4df1d0..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-language: python
-
-python:
- - '2.7'
-
-services:
- - redis-server
- - mysql
-
-env:
- global:
- - DJANGO_SETTINGS_MODULE=regluit.settings.travis
- - PYTHONPATH=/home/travis/build/EbookFoundation/
-
-before_install:
- - sudo mkdir /var/log/django
- - sudo chmod 777 /var/log/django
- - mkdir ~/build/EbookFoundation/regluit/settings/keys/
- - cp ~/build/EbookFoundation/regluit/settings/dummy/__init__.py ~/build/EbookFoundation/regluit/settings/keys/__init__.py
- - openssl aes-256-cbc -K $encrypted_56eb2b7cc527_key -iv $encrypted_56eb2b7cc527_iv -in ~/build/EbookFoundation/regluit/test/travis-host.py.enc -out ~/build/EbookFoundation/regluit/settings/keys/host.py -d
-
-install:
- - pip install -r requirements_versioned.pip
-
-script: django-admin test
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 000000000..93e44e5d2
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,43 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+celery = "==4.4.7"
+django = "==1.11.29"
+django-contrib-comments = "==2.0.0"
+django-extensions = "==3.1.1"
+django-registration = "==2.4.1"
+social-auth-app-django = "==2.1.0"
+django-tastypie = "==0.14.1"
+django-el-pagination = "==3.2.4"
+django-selectable = "==1.1.0"
+django-notification = {editable = true, ref = "1ad2be4adf3551a3471d923380368341452e178a", git = "git+https://github.com/eshellman/django-notification.git"}
+django-email-change = {editable = true, ref = "fb063296cbf4e4a6d8a93d34d98fe0c7739c2e0d", git = "git+https://github.com/eshellman/django-email-change.git"}
+django-ckeditor = "==5.6.1"
+django-storages = "==1.5.2"
+sorl-thumbnail = "==12.6.3"
+django-mptt = "==0.8.6"
+pyepub = "==0.5.0"
+django-sass-processor = "==0.8.2"
+mysqlclient = "==1.4.6"
+mailchimp3 = "==3.0.14"
+boto3 = "==1.17.91"
+pymarc = "==4.2.1"
+beautifulsoup4 = "==4.11.1"
+gitberg = "==0.8.7"
+risparser = "==0.4.3"
+pyoai = "==2.5.0"
+django-jsonfield = "==1.0.0"
+mechanize = "==0.4.5"
+stripe = "==2.76.0"
+selenium = "==3.141.0"
+requests-mock = "==1.8.0"
+redis = "==3.5.3"
+xhtml2pdf = ">=0.2.15"
+pillow = "==9.5.0"
+pypdf = ">=5.0.0"
+
+[requires]
+python_version = "3.9"
diff --git a/README.md b/README.md
index 66fc4a291..73a2dca03 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
regluit - "The Unglue.it web application and website"
=======
-This repo - https://github.com/EbookFoundation/regluit will be the place for collaborative development for Unglue.it. Add issues and submit pull requests here. As of January 19, 2017, https://github.com/Gluejar/regluit is still being used for production builds.
+Another repo - https://github.com/EbookFoundation/regluit will eventually be the place for collaborative development for Unglue.it. Add issues and submit pull requests there. As of September 1, 2019, https://github.com/Gluejar/regluit is still being used for production builds.
The first version of the unglue.it codebase was a services-oriented project named "unglu".
We decided that "unglu" was too complicated, so we started over and named the new project "regluit".
@@ -10,49 +10,24 @@ contains four main applications: `core`, `frontend`, `api` and `payment` that ca
and configured on as many ec2 instances that are needed to support traffic.
The partitioning between these modules is not as clean as would be ideal. `payment` is particularly messy because we had to retool it twice because we had to switch from Paypal to Amazon Payments to Stripe.
-regluit was originally developed on Django 1.3 (python 2.7) and currently runs on Django 1.8.
+regluit was originally developed on Django 1.3 (python 2.7) and currently runs on Django 1.11 Python 3.8).
-Development (Vagrant + Virtualbox)
--------
-
-The recommended method for local development is to create a virtual machine with [Vagrant](https://www.vagrantup.com/) and [Virtualbox](https://www.virtualbox.org/wiki/Downloads).
-With this method, the only requirements on the host machine are `virtualbox` and `vagrant`.
-Vagrant will use the `ansible-local` provisioner, therefore installing python and ansible on the host machine is not necessary.
-
-__Instructions for Ubuntu 16:__
-1. Install virtualbox: `sudo apt-get install virtualbox`
-2. Install vagrant: `sudo apt-get install vagrant`
-3. Clone the `EbookFoundation/regluit` repository.
-4. Navigate to the base directory of the cloned repo (where `Vagrantfile` is located).
-5. Run `vagrant up` to create the VM, install dependencies, and start necessary services.
- * Note: This step may take up to 15 minutes to complete.
-6. Once the VM has been created, run `vagrant ssh` to log in to the virtual machine you just created. If provisioning was successful, you should see a success message upon login.
- * If virtualenv doesn't activate upon login, you can do it manually by running `cd /opt/regluit && source venv/bin/activate`
-7. Within the VM, run `./manage.py runserver 0.0.0.0:8000` to start the Django development server.
-8. On your host machine, open your web browser of choice and navigate to `http://127.0.0.1:8000`
-
-__Instructions for other platforms (Windows/OSX):__
-* Steps are essentially the same, except for the installation of Vagrant and Virtualbox. Refer to each package's documentation for specific installation instructions.
-
-_NOTE:_ If running Windows on your host machine, ensure you are running `vagrant up` from an elevated command prompt, e.g. right click on Command Prompt -> Run As Administrator.
-
-
-Development (Host Machine)
+Develop
-------
Here are some instructions for setting up regluit for development on
-an Ubuntu system. If you are on OS X see notes below
-to install python-setuptools in step 1:
+an Ubuntu system. If you are on OS X see notes below.
-1. Ensure MySQL and Redis are installed & running on your system.
+
+- Ensure MySQL 5.7 and Redis are installed & running on your system.
1. Create a MySQL database and user for unglueit.
1. `sudo apt-get upgrade gcc`
-1. `sudo apt-get install python-setuptools git python-lxml build-essential libssl-dev libffi-dev python2.7-dev libxml2-dev libxslt-dev libmysqlclient-dev`
+1. `sudo apt-get install python-setuptools git python-lxml build-essential libssl-dev libffi-dev python3.8-dev libxml2-dev libxslt-dev libmysqlclient-dev`
1. `sudo easy_install virtualenv virtualenvwrapper`
1. `git clone git@github.com:Gluejar/regluit.git`
1. `cd regluit`
1. `mkvirtualenv regluit`
-1. `pip install -r requirements_versioned.pip`
+1. `pip install -r requirements.txt`
1. `add2virtualenv ..`
1. `cp settings/dev.py settings/me.py`
1. `mkdir settings/keys/`
@@ -63,8 +38,9 @@ to install python-setuptools in step 1:
1. `deactivate ; workon regluit`
1. `django-admin.py migrate --noinput`
1. `django-admin.py loaddata core/fixtures/initial_data.json core/fixtures/bookloader.json` populate database with test data to run properly.
-1. `django-admin.py celeryd --loglevel=INFO` start the celery daemon to perform asynchronous tasks like adding related editions, and display logging information in the foreground.
-1. `django-admin.py celerybeat -l INFO` to start the celerybeat daemon to handle scheduled tasks.
+1. `redis-server` to start the task broker
+1. `celery -A regluit worker --loglevel=INFO ` start the celery daemon to perform asynchronous tasks like adding related editions, and display logging information in the foreground. Add ` --logfile=logs/celery.log` if you want the logs to go into a log file.
+1. `celery -A regluit beat --loglevel=INFO` to start the celerybeat daemon to handle scheduled tasks.
1. `django-admin.py runserver 0.0.0.0:8000` (you can change the port number from the default value of 8000)
1. make sure a [redis server](https://redis.io/topics/quickstart) is running
1. Point your browser to http://localhost:8000/
@@ -77,105 +53,33 @@ CSS development
Production Deployment
---------------------
-OBSOLETE
-Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, and talking to an Amazon Relational Data Store instance.
-Instructions for setting please are slightly different.
-
-1. create an ubuntu ec2 instance (e.g, go http://alestic.com/ to find various ubuntu images)
-1. `sudo aptitude update`
-1. `sudo aptitude upgrade`
-1. `sudo aptitude install git-core apache libapache2-mod-wsgi mysql-client python-virtualenv python-mysqldb redis-server python-lxml postfix python-dev libmysqlclient-dev`
-1. `sudo mkdir /opt/regluit`
-1. `sudo chown ubuntu:ubuntu /opt/regluit`
-1. `cd /opt`
-1. `git config --global user.name "Raymond Yee"`
-1. `git config --global user.email "rdhyee@gluejar.com"`
-1. `ssh-keygen`
-1. add `~/.ssh/id\_rsa.pub` as a deploy key on github https://github.com/Gluejar/regluit/admin/keys
-1. `git clone git@github.com:Gluejar/regluit.git`
-1. `cd /opt/regluit`
-1. create an Amazon RDS instance
-1. connect to it, e.g. `mysql -u root -h gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com -p`
-1. `CREATE DATABASE unglueit CHARSET utf8;`
-1. `GRANT ALL ON unglueit.\* TO ‘unglueit’@’ip-10-244-250-168.ec2.internal’ IDENTIFIED BY 'unglueit' REQUIRE SSL;`
-1. update settings/prod.py with database credentials
-1. `virtualenv ENV`
-1. `source ENV/bin/activate`
-1. `pip install -r requirements_versioned.pip`
-1. `echo "/opt/" > ENV/lib/python2.7/site-packages/regluit.pth`
-1. `django-admin.py syncdb --migrate --settings regluit.settings.prod`
-1. `sudo mkdir /var/www/static`
-1. `sudo chown ubuntu:ubuntu /var/www/static`
-1. `django-admin.py collectstatic --settings regluit.settings.prod`
-1. `sudo ln -s /opt/regluit/deploy/regluit.conf /etc/apache2/sites-available/regluit`
-1. `sudo a2ensite regluit`
-1. `sudo a2enmod ssl rewrite`
-1. `cd /home/ubuntu`
-1. copy SSL server key to `/etc/ssl/private/server.key`
-1. copy SSL certificate to `/etc/ssl/certs/server.crt`
-1. `sudo /etc/init.d/apache2 restart`
-1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` (just enter return for all?)
-1. `sudo cp deploy/celeryd /etc/init.d/celeryd`
-1. `sudo chmod 755 /etc/init.d/celeryd`
-1. `sudo cp deploy/celeryd.conf /etc/default/celeryd`
-1. `sudo mkdir /var/log/celery`
-1. `sudo mkdir /var/run/celery`
-1. `sudo chown celery:celery /var/log/celery /var/run/celery`
-1. `sudo /etc/init.d/celeryd start`
-1. `sudo cp deploy/celerybeat /etc/init.d/celerybeat`
-1. `sudo chmod 755 /etc/init.d/celerybeat`
-1. `sudo cp deploy/celerybeat.conf /etc/default/celerybeat`
-1. `sudo mkdir /var/log/celerybeat`
-1. `sudo chown celery:celery /var/log/celerybeat`
-1. `sudo /etc/init.d/celerybeat start`
-
-## setup to enable ckeditor to work properly
-
-1. `mkdir /var/www/static/media/`
-1. `sudo chown ubuntu:www-data /var/www/static/media/`
-
-
-Updating Production
---------------------
-
-1. Study the latest changes in the master branch, especially keep in mind how
-it has [changed from what's in production](https://github.com/Gluejar/regluit/compare/production...master).
-1. Update the production branch accordingly. If everything in `master` is ready to be moved into `production`, you can just merge `master` into `production`. Otherwise, you can grab specific parts. (How to do so is something that should probably be described in greater detail.)
-1. Login to unglue.it and run [`/opt/regluit/deploy/update-prod`](https://github.com/Gluejar/regluit/blob/master/deploy/update-prod)
-
+See http://github.com/EbookFoundation/regluit-provisioning
OS X Developer Notes
-------------------
To run regluit on OS X you should have XCode installed
-Install virtualenvwrapper according
-to the process at http://blog.praveengollakota.com/47430655:
+Install MySQL:
+ `brew install mysql@5.7`
+ `mysql_secure_installation`
+ `mysqld_safe --user=root -p`
+
-1. `sudo easy\_install pip`
-1. `sudo pip install virtualenv`
-1. `pip install virtualenvwrapper`
+We use pyenv and pipenv to set up an environment.
Edit or create .bashrc in ~ to enable virtualenvwrapper commands:
-1. `mkdir ~/.virtualenvs`
-1. Edit .bashrc to include the following lines:
-
- export WORKON_HOME=$HOME/.virtualenvs
- source your_path_to_virtualenvwrapper.sh_here
-In the above web site, the path to virtualenvwrapper.sh was
-/Library/Frameworks/Python.framework/Versions/2.7/bin/virtualenvwrapper.sh
-In Snow Leopard, this may be /usr/local/bin/virtualenvwrapper.sh
+1. `pipenv install -r requirements.txt`
+1. Edit .zshrc to include the following lines:
-Configure Terminal to automatically notice this at startup:
-Terminal –> Preferences –> Settings –> Shell
-Click "run command"; add `source ~/.bashrc`
+ `eval "$(pyenv init -)"`
+ `export PATH=$PATH:/Applications/Postgres.app/Contents/Versions/10/bin`
+ `export PATH=$PATH:/usr/local/opt/mysql-client/bin:$PATH`
+ `export ANSIBLE_VAULT_PASSWORD_FILE=PATH_TO_VAULT_PASSWORD`
-If you get 'EnvironmentError: mysql_config not found'
-edit the line ~/.virtualenvs/regluit/build/MySQL-python/setup_posix.py
-1. mysql_config.path = "mysql_config"
-to be (using a path that exists on your system)
-1. mysql_config.path = "/usr/local/mysql-5.5.20-osx10.6-x86_64/bin/mysql_config"
+If you get `EnvironmentError: mysql_config not found`
+you might need to set a path to mysqlconfig
You may need to set utf8 in /etc/my.cnf
collation-server = utf8_unicode_ci
@@ -183,15 +87,6 @@ collation-server = utf8_unicode_ci
init-connect='SET NAMES utf8'
character-set-server = utf8
-Selenium Install
----------------
-
-Download the selenium server:
-http://selenium.googlecode.com/files/selenium-server-standalone-2.5.0.jar
-
-Start the selenium server:
-'java -jar selenium-server-standalone-2.5.0.jar'
-
MARC Records
------------
@@ -232,7 +127,12 @@ MARC Records
* if you have records with both DIRECT and UNGLUE links, you'll need two MARCRecord instances
* if you have both kinds of link, put them in _separate_ records, as marc_format can only take one value
+MySQL Migration
+---------------
+
+## 5.7 - 8.0 Notes
-# vagrant / ansible
+* Many migration blockers were removed by by dumping, then restoring the database.
+* After that, RDS was able to migrate
+* needed to create the unglueit user from the mysql client
-[How to build machines using Vagrant/ansible](docs/vagrant_ansible.md)
diff --git a/Vagrantfile b/Vagrantfile
deleted file mode 100644
index 0ad9cdbc6..000000000
--- a/Vagrantfile
+++ /dev/null
@@ -1,56 +0,0 @@
-# -*- mode: ruby -*-
-# vi: set ft=ruby :
-
-# All Vagrant configuration is done below. The "2" in Vagrant.configure
-# configures the configuration version (we support older styles for
-# backwards compatibility). Please don't change it unless you know what
-# you're doing.
-Vagrant.configure("2") do |config|
- # The most common configuration options are documented and commented below.
- # For a complete reference, please see the online documentation at
- # https://docs.vagrantup.com.
- # Every Vagrant development environment requires a box. You can search for
- # boxes at https://vagrantcloud.com/search.
- config.vm.box = "ubuntu/xenial64"
-
- # Disable automatic box update checking. If you disable this, then
- # boxes will only be checked for updates when the user runs
- # `vagrant box outdated`. This is not recommended.
- config.vm.box_check_update = false
-
- # Setup specific for local machine
- config.vm.define "regluit-local", primary: true do |local|
- # Create a private network
- local.vm.network "private_network", type: "dhcp"
- local.vm.hostname = "regluit-local"
-
- # VirtuaLBox provider settings for running locally with Oracle VirtualBox
- # --uartmode1 disconnected is necessary to disable serial interface, which
- # is known to cause issues with Ubuntu 16 VM's
- local.vm.provider "virtualbox" do |vb|
- vb.name = "regluit-local"
- vb.memory = 1024
- vb.cpus = 2
- vb.customize [ "modifyvm", :id, "--uartmode1", "disconnected" ]
- end
-
- end
-
- config.vm.synced_folder ".", "/vagrant", disabled: true
- config.vm.synced_folder ".", "/opt/regluit"
-
- config.vm.network "forwarded_port", guest: 8000, host: 8000
-
- # Provision node with Ansible running on the Vagrant host
- # This requires you have Ansible installed locally
- # Vagrant autogenerates an ansible inventory file to use
- config.vm.provision "ansible_local" do |ansible|
- ansible.playbook = "/opt/regluit/provisioning/setup-regluit.yml"
- ansible.provisioning_path = "/opt/regluit"
- ansible.verbose = true
- ansible.install = true
- end
-
- config.vm.post_up_message = "Successfully created regluit-local VM. Run 'vagrant ssh' to log in and start the development server."
-
-end
diff --git a/__init__.py b/__init__.py
index e69de29bb..09eec0846 100755
--- a/__init__.py
+++ b/__init__.py
@@ -0,0 +1,8 @@
+from __future__ import absolute_import, unicode_literals
+
+# This will make sure the app is always imported when
+# Django starts so that shared_task will use this app.
+from .celery_module import app as celery_app
+
+__all__ = ('celery_app',)
+
diff --git a/api/onix.py b/api/onix.py
index c2c3eed49..afa7308e0 100644
--- a/api/onix.py
+++ b/api/onix.py
@@ -1,81 +1,125 @@
import datetime
-import pytz
import re
-from lxml import etree
+
+from bs4 import BeautifulSoup
+import pytz
+
+from django.core.paginator import Paginator, InvalidPage
+
+from regluit.bisac import Bisac
from regluit.core import models
from regluit.core.cc import ccinfo
-from regluit.bisac import Bisac
from .crosswalks import relator_contrib, iso639
-feed_xml = """
-
+
+WORKS_PER_PAGE = 30
+
+feed_header = """
+
"""
+feed_xml = feed_header + '''
+'''
+soup = None
bisac = Bisac()
-def text_node(tag, text, attrib={}):
- node = etree.Element(tag, attrib=attrib)
- node.text = text
+def text_node(tag, text, attrib=None):
+ node = soup.new_tag(tag)
+ if attrib:
+ node.attrs = attrib
+ node.string = text
return node
-def onix_feed(facet, max=None):
- feed = etree.fromstring(feed_xml)
- feed.append(header(facet))
+def sub_element(node, tag, attrib=None):
+ sub = soup.new_tag(tag)
+ if attrib:
+ sub.attrs = attrib
+ node.append(sub)
+ return sub
+
+
+def onix_feed(facet, max=None, page_number=None):
+ global soup
+ if not soup:
+ soup = BeautifulSoup('', 'lxml')
+
+ yield feed_header + str(header(facet))
works = facet.works[0:max] if max else facet.works
+
+ if page_number is not None:
+ try:
+ p = Paginator(works, WORKS_PER_PAGE)
+ works = p.page(page_number)
+ except InvalidPage:
+ works = models.Work.objects.none()
+
for work in works:
- editions = models.Edition.objects.filter(work=work,ebooks__isnull=False)
- editions = facet.facet_object.filter_model("Edition",editions).distinct()
+ editions = models.Edition.objects.filter(work=work, ebooks__isnull=False)
+ editions = facet.facet_object.filter_model("Edition", editions).distinct()
for edition in editions:
edition_prod = product(edition, facet.facet_object)
if edition_prod is not None:
- feed.append(edition_prod)
- return etree.tostring(feed, pretty_print=True)
-
+ yield edition_prod
+ yield ''
+
def onix_feed_for_work(work):
- feed = etree.fromstring(feed_xml)
- feed.append(header(work))
- for edition in models.Edition.objects.filter(work=work,ebooks__isnull=False).distinct():
+ global soup
+ if not soup:
+ soup = BeautifulSoup('', 'lxml')
+
+ feed = BeautifulSoup(feed_xml, 'xml')
+ feed.ONIXMessage.append(header(work))
+ for edition in models.Edition.objects.filter(work=work, ebooks__isnull=False).distinct():
edition_prod = product(edition)
if edition_prod is not None:
- feed.append(product(edition))
- return etree.tostring(feed, pretty_print=True)
-
+ feed.ONIXMessage.append(product(edition))
+ return str(feed)
+
def header(facet=None):
- header_node = etree.Element("Header")
- sender_node = etree.Element("Sender")
+ header_node = soup.new_tag("Header")
+ sender_node = soup.new_tag("Sender")
sender_node.append(text_node("SenderName", "unglue.it"))
sender_node.append(text_node("EmailAddress", "unglueit@ebookfoundation.org"))
header_node.append(sender_node)
- header_node.append(text_node("SentDateTime", pytz.utc.localize(datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%SZ')))
+ header_node.append(text_node(
+ "SentDateTime",
+ pytz.utc.localize(datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%SZ')
+ ))
header_node.append(text_node("MessageNote", facet.title if facet else "Unglue.it Editions"))
return header_node
def product(edition, facet=None):
- ebooks=facet.filter_model("Ebook",edition.ebooks.filter(active=True)) if facet else edition.ebooks.filter(active=True)
- ebooks=ebooks.order_by('-created')
- # Just because an edition satisfies 2 facets with multiple ebooks doesn't mean that there is a single ebook satisfies both facets
+ ebooks = facet.filter_model(
+ "Ebook",
+ edition.ebooks.filter(active=True)
+ ) if facet else edition.ebooks.filter(active=True)
+ ebooks = ebooks.order_by('-created')
+ # Just because an edition satisfies 2 facets with multiple ebooks doesn't mean that there
+ # is a single ebook satisfies both facets
if not ebooks.exists():
return None
-
- work=edition.work
- product_node = etree.Element("Product")
- product_node.append(text_node("RecordReference", "it.unglue.work.%s.%s" % (work.id, edition.id)))
- product_node.append(text_node("NotificationType", "03" )) # final
-
- ident_node = etree.SubElement(product_node, "ProductIdentifier")
- ident_node.append(text_node("ProductIDType", "01" )) #proprietary
- ident_node.append(text_node("IDTypeName", "unglue.it edition id" )) #proprietary
- ident_node.append(text_node("IDValue", unicode(edition.id) ))
-
+
+ work = edition.work
+ product_node = soup.new_tag("Product")
+ product_node.append(text_node(
+ "RecordReference", "it.unglue.work.%s.%s" % (work.id, edition.id)
+ ))
+ product_node.append(text_node("NotificationType", "03")) # final
+
+ ident_node = sub_element(product_node, "ProductIdentifier")
+ ident_node.append(text_node("ProductIDType", "01")) #proprietary
+ ident_node.append(text_node("IDTypeName", "unglue.it edition id")) #proprietary
+ ident_node.append(text_node("IDValue", str(edition.id)))
+
# wrong isbn better than no isbn
isbn = edition.isbn_13 if edition.isbn_13 else edition.work.first_isbn_13()
if isbn:
- ident_node = etree.SubElement(product_node, "ProductIdentifier")
- ident_node.append(text_node("ProductIDType", "03" )) #proprietary
- ident_node.append(text_node("IDValue", isbn ))
+ ident_node = sub_element(product_node, "ProductIdentifier")
+ ident_node.append(text_node("ProductIDType", "03")) #proprietary
+ ident_node.append(text_node("IDValue", isbn))
# Descriptive Detail Block
- descriptive_node = etree.SubElement(product_node, "DescriptiveDetail")
- descriptive_node.append(text_node("ProductComposition", "00" )) # single item
- descriptive_node.append(text_node("ProductForm", "ED" )) # download
+ descriptive_node = sub_element(product_node, "DescriptiveDetail")
+ descriptive_node.append(text_node("ProductComposition", "00")) # single item
+ descriptive_node.append(text_node("ProductForm", "ED")) # download
ebook = None
latest_ebooks = []
@@ -84,129 +128,131 @@ def product(edition, facet=None):
if ebook.format not in ebook_formats:
ebook_formats.append(ebook.format)
latest_ebooks.append(ebook)
- if ebook.format=='epub':
- descriptive_node.append(text_node("ProductFormDetail", "E101" ))
- elif ebook.format=='pdf':
- descriptive_node.append(text_node("ProductFormDetail", "E107" ))
- elif ebook.format=='mobi':
- descriptive_node.append(text_node("ProductFormDetail", "E116" ))
+ if ebook.format == 'epub':
+ descriptive_node.append(text_node("ProductFormDetail", "E101"))
+ elif ebook.format == 'pdf':
+ descriptive_node.append(text_node("ProductFormDetail", "E107"))
+ elif ebook.format == 'mobi':
+ descriptive_node.append(text_node("ProductFormDetail", "E116"))
if ebook.rights:
- license_node = etree.SubElement(descriptive_node, "EpubLicense")
- license_node.append(text_node("EpubLicenseName", ebook.rights ))
- lic_expr_node = etree.SubElement(license_node, "EpubLicenseExpression")
- lic_expr_node.append(text_node("EpubLicenseExpressionType", '01' )) #human readable
- lic_expr_node.append(text_node("EpubLicenseExpressionLink", ccinfo(ebook.rights).url ))
-
- title_node = etree.SubElement(descriptive_node, "TitleDetail")
- title_node.append(text_node("TitleType", '01' )) #distinctive title
- title_el = etree.SubElement(title_node, "TitleElement")
- title_el.append(text_node("TitleElementLevel", '01' ))
- title_el.append(text_node("TitleText", edition.title ))
+ license_node = sub_element(descriptive_node, "EpubLicense")
+ license_node.append(text_node("EpubLicenseName", ebook.rights))
+ lic_expr_node = sub_element(license_node, "EpubLicenseExpression")
+ lic_expr_node.append(text_node("EpubLicenseExpressionType", '01')) #human readable
+ lic_expr_node.append(text_node("EpubLicenseExpressionLink", ccinfo(ebook.rights).url))
+
+ title_node = sub_element(descriptive_node, "TitleDetail")
+ title_node.append(text_node("TitleType", '01')) #distinctive title
+ title_el = sub_element(title_node, "TitleElement")
+ title_el.append(text_node("TitleElementLevel", '01'))
+ title_el.append(text_node("TitleText", edition.title))
contrib_i = 0
for contrib in edition.relators.all():
- contrib_i+=1
- contrib_node = etree.SubElement(descriptive_node, "Contributor")
- contrib_node.append(text_node("SequenceNumber", unicode(contrib_i )))
- contrib_node.append(text_node("ContributorRole", relator_contrib.get(contrib.relation.code,"") ))
+ contrib_i += 1
+ contrib_node = sub_element(descriptive_node, "Contributor")
+ contrib_node.append(text_node("SequenceNumber", str(contrib_i)))
+ contrib_node.append(text_node("ContributorRole",
+ relator_contrib.get(contrib.relation.code, "")))
contrib_node.append(text_node("PersonName", contrib.author.name))
contrib_node.append(text_node("PersonNameInverted", contrib.author.last_name_first))
(lang, locale) = (edition.work.language, None)
if '_' in lang:
(lang, locale) = lang.split('_')
- if len(lang)==2:
+ if len(lang) == 2:
lang = iso639.get(lang, None)
if lang:
- lang_node = etree.SubElement(descriptive_node, "Language")
+ lang_node = sub_element(descriptive_node, "Language")
lang_node.append(text_node("LanguageRole", "01"))
lang_node.append(text_node("LanguageCode", lang))
if locale:
lang_node.append(text_node("CountryCode", locale))
for subject in work.subjects.all():
- subj_node = etree.SubElement(descriptive_node, "Subject")
+ subj_node = sub_element(descriptive_node, "Subject")
if subject.authority == 'lcsh':
subj_node.append(text_node("SubjectSchemeIdentifier", "04"))
- subj_node.append(text_node("SubjectHeadingText", subject.name))
+ subj_node.append(text_node("SubjectHeadingText", subject.name))
elif subject.authority == 'lcc':
subj_node.append(text_node("SubjectSchemeIdentifier", "03"))
subj_node.append(text_node("SubjectCode", subject.name))
- elif subject.authority == 'bisacsh':
+ elif subject.authority == 'bisacsh':
subj_node.append(text_node("SubjectSchemeIdentifier", "10"))
subj_node.append(text_node("SubjectCode", bisac.code(subject.name)))
- subj_node.append(text_node("SubjectHeadingText", subject.name))
+ subj_node.append(text_node("SubjectHeadingText", subject.name))
else:
subj_node.append(text_node("SubjectSchemeIdentifier", "20"))
- subj_node.append(text_node("SubjectHeadingText", subject.name))
+ subj_node.append(text_node("SubjectHeadingText", subject.name))
# audience range composite
if work.age_level:
range_match = re.search(r'(\d?\d?)-(\d?\d?)', work.age_level)
if range_match:
- audience_range_node = etree.SubElement(descriptive_node, "AudienceRange")
- audience_range_node.append(text_node("AudienceRangeQualifier", "17")) #Interest age, years
+ audience_range_node = sub_element(descriptive_node, "AudienceRange")
+ #Interest age, years
+ audience_range_node.append(text_node("AudienceRangeQualifier", "17"))
if range_match.group(1):
audience_range_node.append(text_node("AudienceRangePrecision", "03")) #from
- audience_range_node.append(text_node("AudienceRangeValue", range_match.group(1)))
+ audience_range_node.append(text_node("AudienceRangeValue", range_match.group(1)))
if range_match.group(2):
audience_range_node.append(text_node("AudienceRangePrecision", "04")) #from
- audience_range_node.append(text_node("AudienceRangeValue", range_match.group(2)))
-
+ audience_range_node.append(text_node("AudienceRangeValue", range_match.group(2)))
+
# Collateral Detail Block
- coll_node = etree.SubElement(product_node, "CollateralDetail")
- desc_node = etree.SubElement(coll_node, "TextContent")
+ coll_node = sub_element(product_node, "CollateralDetail")
+ desc_node = sub_element(coll_node, "TextContent")
desc_node.append(text_node("TextType", '03')) # description
desc_node.append(text_node("ContentAudience", '00')) #unrestricted
- desc = (work.description if work.description else '') + '
Listed by Unglue.it.' % work.id
- try :
- content = etree.XML("
" + desc + "
")
- content_node = etree.SubElement(desc_node, "Text", attrib={"textformat":"05"}) #xhtml
- content_node.append(content)
- except etree.XMLSyntaxError:
- content_node = etree.SubElement(desc_node, "Text", attrib={"textformat":"02"}) #html
- content_node.text = etree.CDATA(desc)
- supp_node = etree.SubElement(coll_node, "SupportingResource")
+ desc = (work.description if work.description else '') + \
+ '
Listed by Unglue.it.' % work.id
+ content = BeautifulSoup('' + desc + '
', 'lxml')
+ content_node = sub_element(desc_node, "Text", attrib={"textformat":"05"}) #xhtml
+ content_node.append(content.body.div)
+ supp_node = sub_element(coll_node, "SupportingResource")
supp_node.append(text_node("ResourceContentType", '01')) #front cover
supp_node.append(text_node("ContentAudience", '00')) #unrestricted
supp_node.append(text_node("ResourceMode", '03')) #image
- cover_node = etree.SubElement(supp_node, "ResourceVersion")
+ cover_node = sub_element(supp_node, "ResourceVersion")
cover_node.append(text_node("ResourceForm", '01')) #linkable
- coverfeat_node = etree.SubElement(cover_node, "ResourceVersionFeature")
+ coverfeat_node = sub_element(cover_node, "ResourceVersionFeature")
coverfeat_node.append(text_node("ResourceVersionFeatureType", '01')) #image format
coverfeat_node.append(text_node("FeatureValue", 'D502')) #jpeg
cover_node.append(text_node("ResourceLink", edition.cover_image_thumbnail())) #link
# Publishing Detail Block
- pubdetail_node = etree.SubElement(product_node, "PublishingDetail")
+ pubdetail_node = sub_element(product_node, "PublishingDetail")
if edition.publisher_name:
- pub_node = etree.SubElement(pubdetail_node, "Publisher")
+ pub_node = sub_element(pubdetail_node, "Publisher")
pub_node.append(text_node("PublishingRole", '01')) #publisher
pub_node.append(text_node("PublisherName", edition.publisher_name.name))
pubdetail_node.append(text_node("PublishingStatus", '00')) #unspecified
-
+
#consumers really want a pub date
- publication_date = edition.publication_date if edition.publication_date else edition.work.earliest_publication_date
+ publication_date = edition.publication_date if edition.publication_date else \
+ edition.work.earliest_publication_date
if publication_date:
- pubdate_node = etree.SubElement(pubdetail_node, "PublishingDate")
+ pubdate_node = sub_element(pubdetail_node, "PublishingDate")
pubdate_node.append(text_node("PublishingDateRole", '01')) #nominal pub date
- pubdate_node.append(text_node("Date", publication_date.replace('-','')))
-
+ pubdate_node.append(text_node("Date", publication_date.replace('-', '')))
+
# Product Supply Block
- supply_node = etree.SubElement(product_node,"ProductSupply")
- market_node = etree.SubElement(supply_node,"Market")
- terr_node = etree.SubElement(market_node,"Territory")
+ supply_node = sub_element(product_node, "ProductSupply")
+ market_node = sub_element(supply_node, "Market")
+ terr_node = sub_element(market_node, "Territory")
terr_node.append(text_node("RegionsIncluded", 'WORLD'))
- supply_detail_node = etree.SubElement(supply_node,"SupplyDetail")
- supplier_node = etree.SubElement(supply_detail_node,"Supplier")
+ supply_detail_node = sub_element(supply_node, "SupplyDetail")
+ supplier_node = sub_element(supply_detail_node, "Supplier")
supplier_node.append(text_node("SupplierRole", '11')) #non-exclusive distributer
supplier_node.append(text_node("SupplierName", 'Unglue.it')) #non-exclusive distributer
for ebook in latest_ebooks:
- website_node = etree.SubElement(supplier_node,"Website")
+ website_node = sub_element(supplier_node, "Website")
website_node.append(text_node("WebsiteRole", '29')) #full content
- website_node.append(text_node("WebsiteDescription", '%s file download' % ebook.format, attrib={'textformat':'06'})) #full content
+ #full content
+ website_node.append(text_node("WebsiteDescription",
+ '%s file download' % ebook.format,
+ attrib={'textformat':'06'}))
website_node.append(text_node("WebsiteLink", ebook.download_url)) #full content
supply_detail_node.append(text_node("ProductAvailability", '20')) #Available
- price_node = etree.SubElement(supply_detail_node,"Price")
+ price_node = sub_element(supply_detail_node, "Price")
price_node.append(text_node("PriceType", '01')) #retail excluding tax
price_node.append(text_node("PriceAmount", '0.00')) #retail excluding tax
price_node.append(text_node("CurrencyCode", 'USD')) #retail excluding tax
return product_node
-
\ No newline at end of file
diff --git a/api/opds.py b/api/opds.py
index 907f0efd5..7b514b561 100644
--- a/api/opds.py
+++ b/api/opds.py
@@ -1,32 +1,36 @@
+import datetime
from itertools import islice
+import logging
+from urllib.parse import urlparse, urlunparse
-from lxml import etree
-import datetime
-import urlparse
-from django.core.urlresolvers import reverse
+from bs4 import BeautifulSoup
+import pytz
+
+from django.core.cache import cache
+from django.urls import reverse
from django.utils.http import urlquote
-import pytz
-import logging
-logger = logging.getLogger(__name__)
from regluit.core import models, facets
import regluit.core.cc as cc
licenses = cc.LICENSE_LIST
+logger = logging.getLogger(__name__)
+soup = None
FORMAT_TO_MIMETYPE = {'pdf':"application/pdf",
'epub':"application/epub+zip",
'mobi':"application/x-mobipocket-ebook",
'html':"text/html",
'text':"text/html"}
-UNGLUEIT_URL= 'https://unglue.it'
-ACQUISITION = "application/atom+xml;profile=opds-catalog;kind=acquisition"
+UNGLUEIT_URL = 'https://unglue.it'
+ACQUISITION = "application/atom+xml; profile=opds-catalog ;kind=acquisition; charset=utf-8"
+NAVIGATION = "application/atom+xml; profile=opds-catalog; kind=navigation; charset=utf-8"
FACET_RELATION = "http://opds-spec.org/facet"
-old_facets= ["creative_commons","active_campaigns"]
+old_facets = ["creative_commons", "active_campaigns"]
def feeds():
@@ -40,50 +44,56 @@ def feeds():
def get_facet_class(name):
if name in old_facets:
return globals()[name]
- else:
- return get_facet_facet(name)
-
-
+ return get_facet_facet(name)
+
+
def text_node(tag, text):
- node = etree.Element(tag)
- node.text = text
+ node = soup.new_tag(tag)
+ if text:
+ node.string = text
return node
def html_node(tag, html):
node = text_node(tag, html)
- node.attrib.update({"{http://www.w3.org/2005/Atom}type":'html'})
+ node.attrs.update({"type":'html'})
return node
-
+
def add_query_component(url, qc):
"""
add component qc to the querystring of url
"""
- m = list(urlparse.urlparse(url))
- if len(m[4]):
- m[4] = "&".join([m[4],qc])
+ m = list(urlparse(url))
+ if m[4]:
+ m[4] = "&".join([m[4], qc])
else:
m[4] = qc
- return urlparse.urlunparse(m)
+ return urlunparse(m)
def isbn_node(isbn):
- node = etree.Element("{http://purl.org/dc/terms/}identifier")
- node.attrib.update({"{http://www.w3.org/2001/XMLSchema-instance}type":'dcterms:URI'})
- node.text = 'urn:ISBN:'+ isbn
+ node = soup.new_tag("dcterms:identifier")
+ node.attrs.update({"xsi:type":'dcterms:URI'})
+ node.string = 'urn:ISBN:'+ isbn
return node
def work_node(work, facet=None):
-
- node = etree.Element("entry")
+
+ node = soup.new_tag("entry")
# title
node.append(text_node("title", work.title))
-
+
# id
- node.append(text_node('id', "{base}{url}".format(base=UNGLUEIT_URL,url=reverse('work_identifier',kwargs={'work_id':work.id}))))
-
+ node.append(text_node(
+ 'id',
+ "{base}{url}".format(
+ base=UNGLUEIT_URL,
+ url=reverse('work_identifier', kwargs={'work_id': work.id})
+ )
+ ))
+
updated = None
-
+
# links for all ebooks
- ebooks = facet.filter_model("Ebook",work.ebooks()) if facet else work.ebooks()
+ ebooks = facet.filter_model("Ebook", work.ebooks()) if facet else work.ebooks()
versions = set()
for ebook in ebooks:
if updated is None:
@@ -92,78 +102,85 @@ def work_node(work, facet=None):
node.append(text_node('updated', updated))
if not ebook.version_label in versions:
versions.add(ebook.version_label)
- link_node = etree.Element("link")
-
+ link_node = soup.new_tag("link")
+
# ebook.download_url is an absolute URL with the protocol, domain, and path baked in
- link_rel = "http://opds-spec.org/acquisition/open-access"
- link_node.attrib.update({"href":add_query_component(ebook.download_url, "feed=opds"),
- "rel":link_rel,
- "{http://purl.org/dc/terms/}rights": str(ebook.rights)})
- if ebook.is_direct():
- link_node.attrib["type"] = FORMAT_TO_MIMETYPE.get(ebook.format, "")
+ link_rel = "http://opds-spec.org/acquisition/open-access"
+ link_node.attrs.update({
+ "href":add_query_component(ebook.download_url, "feed=opds"),
+ "rel":link_rel,
+ "dcterms:rights": str(ebook.rights)
+ })
+ if ebook.is_direct():
+ link_node["type"] = FORMAT_TO_MIMETYPE.get(ebook.format, "")
else:
- """ indirect acquisition, i.e. google books """
- link_node.attrib["type"] = "text/html"
- indirect = etree.Element("{http://opds-spec.org/}indirectAcquisition",)
- indirect.attrib["type"] = FORMAT_TO_MIMETYPE.get(ebook.format, "")
+ # indirect acquisition, i.e. google books
+ link_node["type"] = "text/html"
+ indirect = soup.new_tag("opds:indirectAcquisition",)
+ indirect["type"] = FORMAT_TO_MIMETYPE.get(ebook.format, "")
link_node.append(indirect)
if ebook.version_label:
- link_node.attrib.update({"{http://schema.org/}version": ebook.version_label})
+ link_node.attrs.update({"schema:version": ebook.version_label})
node.append(link_node)
-
+
# get the cover -- assume jpg?
-
- cover_node = etree.Element("link")
- cover_node.attrib.update({"href":work.cover_image_small(),
- "type":"image/"+work.cover_filetype(),
- "rel":"http://opds-spec.org/image/thumbnail"})
+
+ cover_node = soup.new_tag("link")
+ cover_node.attrs.update({
+ "href": work.cover_image_small(),
+ "type": "image/" + work.cover_filetype(),
+ "rel": "http://opds-spec.org/image/thumbnail"
+ })
node.append(cover_node)
- cover_node = etree.Element("link")
- cover_node.attrib.update({"href":work.cover_image_thumbnail(),
- "type":"image/"+work.cover_filetype(),
- "rel":"http://opds-spec.org/image"})
+ cover_node = soup.new_tag("link")
+ cover_node.attrs.update({
+ "href": work.cover_image_thumbnail(),
+ "type": "image/" + work.cover_filetype(),
+ "rel": "http://opds-spec.org/image"
+ })
node.append(cover_node)
-
-
+
+
# 2012
- node.append(text_node("{http://purl.org/dc/terms/}issued", work.publication_date))
-
+ node.append(text_node("dcterms:issued", work.publication_date))
+
# author
# TO DO: include all authors?
- author_node = etree.Element("author")
+ author_node = soup.new_tag("author")
author_node.append(text_node("name", work.author()))
node.append(author_node)
-
+
# publisher
#Open Book Publishers
- if len(work.publishers()):
+ if work.publishers().exists():
for publisher in work.publishers():
- node.append(text_node("{http://purl.org/dc/terms/}publisher", publisher.name.name))
-
+ node.append(text_node("dcterms:publisher", publisher.name.name))
+
# language
#en
- node.append(text_node("{http://purl.org/dc/terms/}language", work.language))
-
+ node.append(text_node("dcterms:language", work.language))
+
# description
- node.append(html_node("{http://www.w3.org/2005/Atom}content", work.description))
-
+ node.append(html_node("content", work.description))
+
# identifiers
if work.identifiers.filter(type='isbn'):
for isbn in work.identifiers.filter(type='isbn')[0:9]: #10 should be more than enough
node.append(isbn_node(isbn.value))
-
+
# subject tags
# [[subject.name for subject in work.subjects.all()] for work in ccworks if work.subjects.all()]
for subject in work.subjects.all():
if subject.is_visible:
- category_node = etree.Element("category")
+ category_node = soup.new_tag("category")
try:
- category_node.attrib["term"] = subject.name
+ category_node["term"] = subject.name
node.append(category_node)
try:
subject.works.filter(is_free=True)[1]
# only show feed if there's another work in it
- append_navlink(node, 'related', 'kw.'+ subject.name , 0, 'popular', title=subject.name)
+ node.append(navlink('related', 'kw.' + subject.name, 0,
+ 'popular', title=subject.name))
except:
pass
except ValueError:
@@ -172,48 +189,53 @@ def work_node(work, facet=None):
subject.delete()
# age level
- #
+ #
if work.age_level:
- category_node = etree.Element("category")
- category_node.attrib["scheme"] = 'http://schema.org/typicalAgeRange'
- category_node.attrib["term"] = work.age_level
- category_node.attrib["label"] = work.get_age_level_display()
+ category_node = soup.new_tag("category")
+ category_node["scheme"] = 'http://schema.org/typicalAgeRange'
+ category_node["term"] = work.age_level
+ category_node["label"] = work.get_age_level_display()
node.append(category_node)
-
-
- # rating
- rating_node = etree.Element("{http://schema.org/}Rating")
- rating_node.attrib.update({"{http://schema.org/}ratingValue":"{:}".format(work.priority())})
+
+
+ # rating
+ rating_node = soup.new_tag("schema:Rating")
+ rating_node.attrs.update({"schema:ratingValue":"{:}".format(work.priority())})
node.append(rating_node)
return node
class Facet:
title = ''
- works = None
+ works = models.Work.objects.none()
feed_path = ''
description = ''
-
+
def feed(self, page=None, order_by='newest'):
self.works = self.works.order_by(*facets.get_order_by(order_by))
return opds_feed_for_works(self, page=page, order_by=order_by)
-
+
def updated(self):
# return the creation date for most recently added item
- if not self.works:
+ key = f"{self.feed_path.replace(' ', '_')}_updated"
+ if not self.works.exists():
return pytz.utc.localize(datetime.datetime.utcnow()).isoformat()
- else:
- return pytz.utc.localize(self.works[0].created).isoformat()
+ value = cache.get(key)
+ if value is None:
+ value = pytz.utc.localize(self.works.latest('created').created).isoformat()
+ cache.set(key, value, 100000)
+ return value
def get_facet_facet(facet_path):
class Facet_Facet(Facet):
-
+
def __init__(self, facet_path=facet_path):
self.feed_path = facet_path
self.facet_object = facets.get_facet_object(facet_path)
self.title = "Unglue.it"
for facet in self.facet_object.facets():
self.title = self.title + " " + facet.title
- self.works = self.facet_object.get_query_set().distinct()
+ self.works = self.facet_object.get_query_set()
self.description = self.facet_object.description
return Facet_Facet
@@ -221,11 +243,14 @@ class creative_commons(Facet):
def __init__(self):
self.title = "Unglue.it Catalog: Creative Commons Books"
self.feed_path = "creative_commons"
- self.works = models.Work.objects.filter(editions__ebooks__isnull=False,
- editions__ebooks__rights__in=cc.LICENSE_LIST).distinct()
- self.description= "These Creative Commons licensed ebooks are free to read - the people who created them want you to read and share them."
+ self.works = models.Work.objects.filter(
+ editions__ebooks__isnull=False,
+ editions__ebooks__rights__in=cc.LICENSE_LIST
+ )
+ self.description = """These Creative Commons licensed ebooks are free to read - the people
+ who created them want you to read and share them."""
self.facet_object = facets.get_facet_object(self.feed_path)
-
+
class active_campaigns(Facet):
"""
return opds feed for works associated with active campaigns
@@ -233,115 +258,136 @@ class active_campaigns(Facet):
def __init__(self):
self.title = "Unglue.it Catalog: Books under Active Campaign"
self.feed_path = "active_campaigns"
- self.works = models.Work.objects.filter(campaigns__status='ACTIVE', is_free = True)
- self.description= "With your help we're raising money to make these books free to the world."
+ self.works = models.Work.objects.filter(campaigns__status='ACTIVE', is_free=True)
+ self.description = """With your help we're raising money
+ to make these books free to the world."""
self.facet_object = facets.get_facet_object(self.feed_path)
def opds_feed_for_work(work_id):
class single_work_facet:
def __init__(self, work_id):
try:
- works=models.Work.objects.filter(id=work_id)
+ works = models.Work.objects.filter(id=work_id)
except models.Work.DoesNotExist:
- works=models.Work.objects.none()
+ works = models.Work.objects.none()
except ValueError:
# not a valid work_id
- works=models.Work.objects.none()
- self.works=works
- self.title='Unglue.it work #%s' % work_id
- self.feed_path=''
- self.facet_object= facets.BaseFacet(None)
- return opds_feed_for_works( single_work_facet(work_id) )
+ works = models.Work.objects.none()
+ self.works = works
+ self.title = 'Unglue.it work #%s' % work_id
+ self.feed_path = ''
+ self.facet_object = facets.BaseFacet(None)
+ return opds_feed_for_works(single_work_facet(work_id))
def opds_feed_for_works(the_facet, page=None, order_by='newest'):
- works = the_facet.works
+ global soup
+ if not soup:
+ soup = BeautifulSoup('', 'lxml')
+ works = the_facet.works.distinct()
feed_path = the_facet.feed_path
title = the_facet.title
- feed_xml = """
+ """
-
- feed = etree.fromstring(feed_xml)
-
+ xsi:schemaLocation="http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dc.xsd
+ http://purl.org/dc/terms/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd">
+ """
+
+ yield feed_header
+
# add title
# TO DO: will need to calculate the number items and where in the feed we are
-
- feed.append(text_node('title', title + ' - sorted by ' + order_by))
-
- # id
-
- feed.append(text_node('id', "{url}/api/opds/{feed_path}/?order_by={order_by}".format(url=UNGLUEIT_URL,
- feed_path=urlquote(feed_path), order_by=order_by)))
-
+
+ yield text_node('title', title + ' - sorted by ' + order_by).prettify()
+
+ # id
+
+ feed = text_node(
+ 'id',
+ "{url}/api/opds/{feed_path}/?order_by={order_by}".format(
+ url=UNGLUEIT_URL,
+ feed_path=urlquote(feed_path),
+ order_by=order_by,
+ ),
+ )
+ yield feed.prettify()
+
# updated
# TO DO: fix time zone?
# also use our wrapped datetime code
-
- feed.append(text_node('updated',
- pytz.utc.localize(datetime.datetime.utcnow()).isoformat()))
-
+
+ feed = text_node('updated', pytz.utc.localize(datetime.datetime.utcnow()).isoformat())
+ yield feed.prettify()
+
# author
-
- author_node = etree.Element("author")
+
+ author_node = soup.new_tag("author")
author_node.append(text_node('name', 'unglue.it'))
author_node.append(text_node('uri', UNGLUEIT_URL))
- feed.append(author_node)
-
+ yield author_node.prettify()
+
# links: start, self, next/prev (depending what's necessary -- to start with put all CC books)
-
+
# start link
- append_navlink(feed, 'start', feed_path, None , order_by, title="First 10")
-
+ yield navlink('start', feed_path, None, order_by, title="First 10").prettify()
+
# next link
-
+
if not page:
- page =0
+ page = 0
else:
try:
- page=int(page)
+ page = int(page)
except TypeError:
- page=0
-
+ page = 0
+
try:
works[10 * page + 10]
- append_navlink(feed, 'next', feed_path, page+1 , order_by, title="Next 10")
+ yield navlink('next', feed_path, page+1, order_by, title="Next 10").prettify()
except IndexError:
pass
-
+
# sort facets
- append_navlink(feed, FACET_RELATION, feed_path, None, 'popular', group="Order", active = order_by=='popular', title="Sorted by popularity")
- append_navlink(feed, FACET_RELATION, feed_path, None, 'newest', group="Order", active = order_by=='newest', title="Sorted by newest")
-
+ yield navlink(FACET_RELATION, feed_path, None, 'popular', group="Order",
+ active=order_by == 'popular', title="Sorted by popularity").prettify()
+ yield navlink(FACET_RELATION, feed_path, None, 'newest', group="Order",
+ active=order_by == 'newest', title="Sorted by newest").prettify()
+
#other facets
if feed_path not in old_facets:
for other_group in the_facet.facet_object.get_other_groups():
for facet_object in other_group.get_facets():
- append_navlink(feed, FACET_RELATION, feed_path + '/' + facet_object.facet_name, None, order_by, group=other_group.title, title=facet_object.title)
-
- works = islice(works, 10 * page, 10 * page + 10)
+ yield navlink(FACET_RELATION, feed_path + '/' + facet_object.facet_name,
+ None, order_by, group=other_group.title,
+ title=facet_object.title).prettify()
+
+ works = islice(works, 10 * page, 10 * page + 10)
if page > 0:
- append_navlink(feed, 'previous', feed_path, page-1, order_by, title="Previous 10")
+ yield navlink('previous', feed_path, page-1, order_by, title="Previous 10").prettify()
+
for work in works:
- node = work_node(work, facet=the_facet.facet_object)
- feed.append(node)
-
- return etree.tostring(feed, pretty_print=True)
-
-def append_navlink(feed, rel, path, page, order_by, group=None, active=None , title=""):
- link = etree.Element("link")
- link.attrib.update({"rel":rel,
- "href": UNGLUEIT_URL + "/api/opds/" + urlquote(path) + '/?order_by=' + order_by + ('&page=' + unicode(page) if page!=None else ''),
- "type": ACQUISITION,
- "title": title,
- })
+ yield work_node(work, facet=the_facet.facet_object).prettify()
+
+ yield '''
+'''
+
+def navlink(rel, path, page, order_by, group=None, active=None, title=""):
+ link = soup.new_tag("link")
+ link.attrs.update({
+ "rel":rel,
+ "href": UNGLUEIT_URL + "/api/opds/" + urlquote(path) + '/?order_by=' + order_by + (
+ '&page=' + str(page) if page is not None else ''
+ ),
+ "type": ACQUISITION,
+ "title": title,
+ })
if rel == FACET_RELATION:
if group:
- link.attrib['{http://opds-spec.org/}facetGroup'] = group
+ link['opds:facetGroup'] = group
if active:
- link.attrib['{http://opds-spec.org/}activeFacet'] = 'true'
- feed.append(link)
\ No newline at end of file
+ link['opds:activeFacet'] = 'true'
+ return link
diff --git a/api/opds_json.py b/api/opds_json.py
index 59e218029..4007b99ae 100644
--- a/api/opds_json.py
+++ b/api/opds_json.py
@@ -1,26 +1,22 @@
-from itertools import islice
-
import datetime
-import urlparse
-from django.core.urlresolvers import reverse
-from django.utils.http import urlquote
+from itertools import islice
+import logging
import json
+
import pytz
-import logging
-logger = logging.getLogger(__name__)
+from django.urls import reverse
+from django.utils.http import urlquote
from regluit.core import models, facets
import regluit.core.cc as cc
+
from .opds import (
- feeds,
- get_facet_class,
add_query_component,
- Facet,
- get_facet_facet,
- opds_feed_for_work,
)
+logger = logging.getLogger(__name__)
+
licenses = cc.LICENSE_LIST
FORMAT_TO_MIMETYPE = {'pdf':"application/pdf",
@@ -29,7 +25,7 @@
'html':"text/html",
'text':"text/html"}
-UNGLUEIT_URL= 'https://unglue.it'
+UNGLUEIT_URL = 'https://unglue.it'
ACQUISITION = "application/opds+json"
FACET_RELATION = "opds:facet"
JSONCONTEXT = "http://opds-spec.org/opds.jsonld"
@@ -42,24 +38,22 @@ def feeds():
def get_facet_class(name):
return get_facet_facet(name)
-
+
def text_node(tag, text):
return {tag:text}
def html_node(tag, html):
return {tag:html}
-
+
def isbn_node(isbn):
return 'urn:ISBN:'+ isbn
def work_node(work, facet=None):
-
-
- metadata = {"@type": "http://schema.org/EBook",
+ metadata = {
+ "@type": "http://schema.org/EBook",
"id": "{base}{url}".format(
base=UNGLUEIT_URL,
- url=reverse('work_identifier',
- kwargs={'work_id':work.id})
+ url=reverse('work_identifier', kwargs={'work_id':work.id})
)
}
links = []
@@ -73,7 +67,7 @@ def work_node(work, facet=None):
}
# title
metadata["title"] = work.title
-
+
# id
links.append({
"rel": "self",
@@ -84,18 +78,18 @@ def work_node(work, facet=None):
),
"type": "application/opds-publication+json"
})
-
+
updated = None
-
+
# links for all ebooks
- ebooks = facet.filter_model("Ebook",work.ebooks()) if facet else work.ebooks()
+ ebooks = facet.filter_model("Ebook", work.ebooks()) if facet else work.ebooks()
versions = set()
for ebook in ebooks:
if updated is None:
# most recent ebook, first ebook in loop
updated = ebook.created.isoformat()
- metadata['updated'] = updated
+ metadata['updated'] = updated
if not ebook.version_label in versions:
versions.add(ebook.version_label)
# ebook.download_url is an absolute URL with the protocol, domain, and path baked in
@@ -104,19 +98,19 @@ def work_node(work, facet=None):
"href": add_query_component(ebook.download_url, "feed=opds"),
"rights": str(ebook.rights)
}
- if ebook.is_direct():
+ if ebook.is_direct():
acquire["type"] = FORMAT_TO_MIMETYPE.get(ebook.format, "")
else:
- """ indirect acquisition, i.e. google books """
+ # indirect acquisition, i.e. google books
acquire["type"] = "text/html"
acquire["indirectAcquisition"] = {
"type": FORMAT_TO_MIMETYPE.get(ebook.format)
}
if ebook.version_label:
acquire["version"] = ebook.version_label
-
+
acquires.append(acquire)
-
+
# get the cover -- assume jpg?
if work.cover_image_small():
cover_node = {
@@ -130,80 +124,85 @@ def work_node(work, facet=None):
"type": "image/"+work.cover_filetype(),
}
images.append(cover_node2)
-
-
+
+
# 2012
metadata["issued"] = work.publication_date
-
+
# author
# TO DO: include all authors?
- metadata["author"] = work.author()
-
+ metadata["author"] = work.author()
+
# publisher
#Open Book Publishers
- if len(work.publishers()):
- metadata["publishers"] = [{"publisher": publisher.name.name}
- for publisher in work.publishers()]
-
+ if work.publishers().exists():
+ metadata["publishers"] = [
+ {"publisher": publisher.name.name} for publisher in work.publishers()
+ ]
# language
metadata["language"] = work.language
-
+
# description
metadata["summary"] = work.description
-
+
# identifiers
if work.identifiers.filter(type='isbn'):
- metadata['identifiers'] = [isbn_node(isbn.value)
- for isbn in work.identifiers.filter(type='isbn')[0:9]] #10 should be more than enough
+ metadata['identifiers'] = [
+ isbn_node(isbn.value) for isbn in work.identifiers.filter(type='isbn')[0:9]
+ ] # 10 should be more than enough
-
# subject tags
- subjects = [subject.name for subject in work.subjects.all()]
+ subjects = [subject.name for subject in work.subjects.all()]
if subjects:
metadata["subjects"] = subjects
# age level
- #
+ #
if work.age_level:
age_level_node_attrib = {}
age_level_node = {"category": age_level_node_attrib}
- age_level_node_attrib["scheme"] = 'http://schema.org/typicalAgeRange'
- age_level_node_attrib["term"] = work.age_level
- age_level_node_attrib["label"] = work.get_age_level_display()
+ age_level_node_attrib["scheme"] = 'http://schema.org/typicalAgeRange'
+ age_level_node_attrib["term"] = work.age_level
+ age_level_node_attrib["label"] = work.get_age_level_display()
metadata.update(age_level_node)
-
-
- # rating
+
+
+ # rating
metadata["rating"] = {"ratingValue":"{:}".format(work.priority())}
return content
class Facet:
title = ''
- works = None
+ works = models.Work.objects.none()
feed_path = ''
description = ''
-
+
def feed(self, page=None, order_by='newest'):
self.works = self.works.order_by(*facets.get_order_by(order_by))
return opds_feed_for_works(self, page=page, order_by=order_by)
-
+
def updated(self):
# return the creation date for most recently added item
- if not self.works:
+ key = f"{self.feed_path.replace(' ', '_')}_updated"
+ if not self.works.exists():
return pytz.utc.localize(datetime.datetime.utcnow()).isoformat()
- else:
- return pytz.utc.localize(self.works[0].created).isoformat()
+ value = cache.get(key)
+ if value is None:
+ value = pytz.utc.localize(self.works.latest('created').created).isoformat()
+ cache.set(key, value, 100000)
+ return value
def get_facet_facet(facet_path):
class Facet_Facet(Facet):
-
+
def __init__(self, facet_path=facet_path):
self.feed_path = facet_path
self.facet_object = facets.get_facet_object(facet_path)
self.title = "Unglue.it"
for facet in self.facet_object.facets():
self.title = self.title + " " + facet.title
- self.works = self.facet_object.get_query_set().distinct()
+ self.works = self.facet_object.get_query_set()
self.description = self.facet_object.description
return Facet_Facet
@@ -214,38 +213,37 @@ class NullFacet(facets.BaseFacet):
def get_other_groups(self):
return[]
try:
- works=models.Work.objects.filter(id=work_id)
+ works = models.Work.objects.filter(id=work_id)
except models.Work.DoesNotExist:
- works=models.Work.objects.none()
+ works = models.Work.objects.none()
except ValueError:
# not a valid work_id
- works=models.Work.objects.none()
- self.works=works
- self.title='Unglue.it work #%s' % work_id
- self.feed_path=''
- self.facet_object= NullFacet(None)
- return opds_feed_for_works( single_work_facet(work_id) )
+ works = models.Work.objects.none()
+ self.works = works
+ self.title = 'Unglue.it work #%s' % work_id
+ self.feed_path = ''
+ self.facet_object = NullFacet(None)
+ return opds_feed_for_works(single_work_facet(work_id))
def opds_feed_for_works(the_facet, page=None, order_by='newest'):
if order_by == 'none':
books_per_page = 50000
+ order_by = 'newest'
else:
books_per_page = 50
- works = the_facet.works
+ works = the_facet.works.distinct()
feed_path = the_facet.feed_path
title = the_facet.title
metadata = {"title": title}
links = []
- feedlist = []
- feed = {"@context": JSONCONTEXT, "metadata": metadata, "links": links, "publications": feedlist}
-
+
# add title
# TO DO: will need to calculate the number items and where in the feed we are
-
+
metadata['title'] = title + ' - sorted by ' + order_by
-
+
# links: start, self, next/prev (depending what's necessary -- to start with put all CC books)
-
+
if not page:
page = 0
else:
@@ -255,37 +253,55 @@ def opds_feed_for_works(the_facet, page=None, order_by='newest'):
page = 0
# self link
- append_navlink(feed, 'self', feed_path, page , order_by, title="First {}".format(books_per_page))
-
- # next link
+ append_navlink(links, 'self', feed_path, page, order_by,
+ title="First {}".format(books_per_page))
+
+ # next link
try:
works[books_per_page * page + books_per_page]
- append_navlink(feed, 'next', feed_path, page+1 , order_by,
- title="Next {}".format(books_per_page))
+ append_navlink(links, 'next', feed_path, page+1, order_by,
+ title="Next {}".format(books_per_page))
except IndexError:
pass
-
+
# sort facets
- append_navlink(feed, FACET_RELATION, feed_path, None, 'popular', group="Order", active = order_by=='popular', title="Sorted by popularity")
- append_navlink(feed, FACET_RELATION, feed_path, None, 'newest', group="Order", active = order_by=='newest', title="Sorted by newest")
-
+ append_navlink(links, FACET_RELATION, feed_path, None, 'popular', group="Order",
+ active=order_by == 'popular', title="Sorted by popularity")
+ append_navlink(links, FACET_RELATION, feed_path, None, 'newest', group="Order",
+ active=order_by == 'newest', title="Sorted by newest")
+
#other facets
for other_group in the_facet.facet_object.get_other_groups():
for facet_object in other_group.get_facets():
- append_navlink(feed, FACET_RELATION, feed_path + '/' + facet_object.facet_name, None, order_by, group=other_group.title, title=facet_object.title)
-
- works = islice(works, books_per_page * page, books_per_page * page + books_per_page)
+ append_navlink(
+ links, FACET_RELATION,
+ feed_path + '/' + facet_object.facet_name, None, order_by,
+ group=other_group.title, title=facet_object.title
+ )
+
+ works = islice(works, books_per_page * page, books_per_page * page + books_per_page)
if page > 0:
- append_navlink(feed, 'previous', feed_path, page-1, order_by, title="Previous {}".format(books_per_page))
+ append_navlink(links, 'previous', feed_path, page-1, order_by,
+ title="Previous {}".format(books_per_page))
+
+ yield '{' + f"""
+"@context": {JSONCONTEXT},
+"metadata": {json.dumps(metadata, indent=2,)},
+"links": {json.dumps(links, indent=2,)},
+"publications":
+[
+"""
+
for work in works:
node = work_node(work, facet=the_facet.facet_object)
- feedlist.append(node)
- return json.dumps(feed,indent=2, separators=(',', ': '), sort_keys=False)
+ yield json.dumps(node, indent=2) + ',\r'
+ yield '\r]\r}'
-def append_navlink(feed, rel, path, page, order_by, group=None, active=None , title=""):
- link = {
+def append_navlink(links, rel, path, page, order_by, group=None, active=None, title=""):
+ link = {
"rel": rel,
- "href": UNGLUEIT_URL + "/api/opdsjson/" + urlquote(path) + '/?order_by=' + order_by + ('&page=' + unicode(page) ),
+ "href": UNGLUEIT_URL + "/api/opdsjson/" + urlquote(path) +
+ '/?order_by=' + order_by + '&page=' + str(page),
"type": ACQUISITION,
"title": title,
}
@@ -294,4 +310,4 @@ def append_navlink(feed, rel, path, page, order_by, group=None, active=None , ti
link['facetGroup'] = group
if active:
link['activeFacet'] = 'true'
- feed['links'].append(link)
\ No newline at end of file
+ links.append(link)
diff --git a/api/resources.py b/api/resources.py
index bab1fc0db..8dc250349 100755
--- a/api/resources.py
+++ b/api/resources.py
@@ -10,7 +10,7 @@
from django.conf.urls import url
from django.contrib import auth
from django.contrib.auth.models import User
-from django.core.urlresolvers import reverse
+from django.urls import reverse
from regluit.core import models
import regluit.core.isbn
@@ -20,7 +20,7 @@
class EditionResource(ModelResource):
work = fields.ForeignKey('regluit.api.resources.WorkResource', 'work')
- identifiers = fields.ToManyField('regluit.api.resources.IdentifierResource', 'identifiers')
+ identifiers = fields.ToManyField('regluit.api.resources.IdentifierResource', 'identifiers', full=True)
ebooks = fields.ToManyField('regluit.api.resources.EbookResource', 'ebooks')
class Meta:
authentication = ApiKeyAuthentication()
@@ -45,10 +45,11 @@ def build_filters(self, filters = None, **kwargs):
class IdentifierResource(ModelResource):
work = fields.ForeignKey('regluit.api.resources.WorkResource', 'work')
- edition = fields.ForeignKey('regluit.api.resources.EditionResource', 'edition')
+ edition = fields.ForeignKey('regluit.api.resources.EditionResource', 'edition', null=True)
class Meta:
authentication = ApiKeyAuthentication()
+ include_resource_uri = False
queryset = models.Identifier.objects.all()
resource_name = 'identifier'
filtering = {
@@ -59,7 +60,7 @@ class Meta:
class WorkResource(ModelResource):
editions = fields.ToManyField(EditionResource, 'editions')
- identifiers = fields.ToManyField(IdentifierResource, 'identifiers')
+ identifiers = fields.ToManyField(IdentifierResource, 'identifiers', full=True)
class Meta:
authentication = ApiKeyAuthentication()
@@ -170,6 +171,7 @@ def obj_get_list(self, bundle, **kwargs):
return models.Ebook.objects.none()
class Meta:
+ queryset = models.Ebook.objects.all()
authentication = ApiKeyAuthentication()
fields = [ 'provider', 'rights' ]
limit = 0
diff --git a/api/templates/api_help.html b/api/templates/api_help.html
index 97fd8239d..7189f0bba 100644
--- a/api/templates/api_help.html
+++ b/api/templates/api_help.html
@@ -55,45 +55,65 @@ Free Ebooks by ISBN
{% else %}
(Log in to see links)
{% endif %}
+
Identifier Resolution
- Here's how to get work/edition data for an isbn
+ Here's how to get use the api to find related identifiers:
+
+
+
+
+
{% if user.is_authenticated %}
Campaign info
Here's how to get data on all campaigns. if the user is logged in to Unglue.it, they can tell if the campaign book is on their fave list
- JSON: {{base_url}}/api/v1/campaign/?format=json&api_key={your_api_key}&username={your_username}<
XML: {{base_url}}/api/v1/campaign/?format=json&api_key={your_api_key}&username={your_username}
- Identifier Resolution
- Here's how to get work/edition data for an isbn
- JSON: {{base_url}}/api/v1/identifier/?format=json&api_key={your_api_key}&username={your_username}&type=isbn&value=9780441012039
- XML: {{base_url}}/api/v1/identifier/?format=xml&api_key={your_api_key}&username={your_username}&type=isbn&value=9780441012039
- In addition to isbn, you can use 'goog' if you have a google books id, and 'oclc' for oclc numbers.
- {% endif %}
-
+ JSON: {{base_url}}/api/v1/campaign/?format=json&api_key={your_api_key}&username={your_username}
XML: {{base_url}}/api/v1/campaign/?format=json&api_key={your_api_key}&username={your_username}
+ {% endif %}
OPDS Catalog Feeds
- We have a basic implementation of OPDS feeds. You don't need a key to use them. The starting point is {{base_url}}{% url 'opds' %}
+ We have a basic implementation of OPDS feeds. You don't need a key to use them. The starting point is {{base_url}}{% url 'opds' %}
. Use the page
parameter to page through the results.
Examples:
- filtered by format
- {{base_url}}{% url 'opds_acqusition' 'epub' %}
+ {{base_url}}{% url 'opds_acqusition' 'epub' %}
- filtered by license
- {{base_url}}{% url 'opds_acqusition' 'by-sa' %}
+ {{base_url}}{% url 'opds_acqusition' 'by-sa' %}
- filtered by title search
- {{base_url}}{% url 'opds_acqusition' 's.open' %}
+ {{base_url}}{% url 'opds_acqusition' 's.open' %}
- filtered by keyword
- {{base_url}}{% url 'opds_acqusition' 'kw.fiction' %}
+ {{base_url}}{% url 'opds_acqusition' 'kw.fiction' %}
- filtered by ungluer
- {{base_url}}{% url 'opds_acqusition' '@eric' %}
+ {{base_url}}{% url 'opds_acqusition' '@eric' %}
+ - filtered by having a Project Gutenberg or DOAB identifier (doab, gtbg)
+ {{base_url}}{% url 'opds_acqusition' 'doab/-gtbg' %}?page=1
There's also an OPDS record available for every work on unglue.it. For example, requesting, {{base_url}}{% url 'opds_acqusition' 'all'%}?work=13950
get you to the web page or opds record for A Christmas Carol.
ONIX Catalog Feeds
- There is an ONIX 3.0 feed corresponding to every facet of our free ebook lists. You don't need a key to use them. There is a maximum of 100 books per result you can change with the max parameter. For example, here are the first hundred CC BY-ND-ND licensed books available in EPUB.
+ There is an ONIX 3.0 feed corresponding to every facet of our free ebook lists. You don't need a key to use them. There is a maximum of 100 books per result you can change with the max
parameter. For example, here are the first twenty CC BY-ND-ND licensed books available in EPUB. Pages of 30 records each are available via the page
parameter. Here's the first page of books from the Directory of Open Access Books.
There's also an ONIX record available for every free ebook on unglue.it. For example, here is Issues in Open Research Data.
Identifiers with Content type negotiation
diff --git a/api/templates/editions.html b/api/templates/editions.html
index 71e815320..10fb0f8e0 100644
--- a/api/templates/editions.html
+++ b/api/templates/editions.html
@@ -8,7 +8,7 @@
- {% if editions %}
+ {% if editions.exists %}
{% for edition in editions %}
- {{edition.id}} | {{edition.title}} |
@@ -22,5 +22,3 @@