"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from bs4 import BeautifulSoup\n",
+ "from difflib import HtmlDiff\n",
+ "from IPython.display import display, HTML\n",
+ "\n",
+ "# Extract text content from HTML\n",
+ "soup_original = BeautifulSoup(original, 'html.parser')\n",
+ "soup_changed = BeautifulSoup(changed, 'html.parser')\n",
+ "\n",
+ "text_original = soup_original.get_text()\n",
+ "text_changed = soup_changed.get_text()\n",
+ "\n",
+ "# Compare the text content\n",
+ "diff_text = HtmlDiff().make_file(text_original.splitlines(), text_changed.splitlines())\n",
+ "\n",
+ "# Display the diff\n",
+ "display(HTML(diff_text))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. htmldiff2\n",
+ "\n",
+ "* Source: https://github.com/edsu/htmldiff2\n",
+ "* Install: `./pip install htmldiff2`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from htmldiff2 import render_html_diff\n",
+ "import re\n",
+ "from IPython.display import display, HTML\n",
+ "\n",
+ "original = ''''First Document
\\n\\n\\n\\n\\n\\t\\n\\t\\t\\n\\nLorem ipsum sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
\\n\\nLorem ipsum dolor
\\n\\nDuis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.
\\n\\nvulputate velit molestie consequat
\\n\\nDuis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.
\\n\\n\\n\\nLorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
\\n\\n\\n\\t\\n\\t\\n\\n\\n\\n'''\n",
+ "changed = ''''Second Document
\\n\\n\\n\\n\\n\\t\\n\\t\\t\\n\\nLorem ipsum sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
\\n\\nLorem ipsum dolor
\\n\\nDuis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.
\\n\\nvulputate velit molestie consequat
\\n\\nDuis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.
\\n\\n\\n\\nLorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
\\n\\n\\n\\t\\n\\t\\n\\n\\n\\n'''\n",
+ "# Remove the HTML comments\n",
+ "original = re.sub(r'', '', original, flags=re.DOTALL)\n",
+ "changed = re.sub(r'', '', changed, flags=re.DOTALL)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "'
FirstSecond Document
\n",
+ "
Lorem ipsum sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
\n",
+ "
Lorem ipsum dolor
\n",
+ "
Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.
\n",
+ "
vulputate velit molestie consequat
\n",
+ "
Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.
\n",
+ "
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "diff = render_html_diff(original,changed)\n",
+ "#print(diff)\n",
+ "display(HTML(diff))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "vpy38",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.10"
+ },
+ "orig_nbformat": 2
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/versioning.md b/docs/versioning.md
new file mode 100644
index 000000000..115a4142e
--- /dev/null
+++ b/docs/versioning.md
@@ -0,0 +1,243 @@
+# Workflow
+
+## Introduction
+
+The key tools for content quality assurance are _workflow_ and _versioning_;
+both approaches go hand in hand in content production:
+
+* **workflow** ensures that the right person edits or reviews content at the right time, while
+* **versioning** ensures that the history of editing steps remains transparent and traceable.
+
+A workflow requires at least two _versions_ of a document:
+
+1. _Working version_ that is being edited, and
+2. _Published version_ (aka. _live version_).
+
+When a document enters the workflow, a copy of the currently published document version is created and serves as the _working version_. And when the document is published, the working version becomes the current _live version_ by irrevocably overwriting the former live version.
+
+To model a workflow ZMS allows you to define state names for the content and programm the transitions between these states. The following general principles apply:
+
+1. A workflow is the sequence of predefined state _transitions_ in a logical order - with the goal of document release.
+2. A workflow step requires a transition from one active workflow state (activity state) of the content object to another.
+3. Starting from the basic state, the workflow always starts automatically with the `TR_ENTER` transition, i.e. with the transfer of the object to the active workflow-specific initial state _Changed_.
+4. The workflow always ends with the `TR_LEAVE` transition to the target state `AC_COMMITTED` _Commmitted_ (corresponds to the empty basic state `None`).
+
+
+## Content States
+
+### Basic States (STATE)
+
+When an editor makes a content change and clicks the save button, the system records this change by assigning four basic states to the object.
+Moreover there is the possibility that no state is assigned. So these basic states are:
+
+1. `STATE_NEW`
+2. `STATE_MODIFIED`
+3. `STATE_DELETED`
+4. `None` (no state assigned, means _committed_/published)
+
+
+These states are fundamental and operate independently of any activated workflow. Once the workflow is activated, _transitions_ become relevant to add more, workflow specific state values to the content: if a content object is assigned one of basic states it automatically triggers a virtual transition to enter the workflow process, specifically the transition (tr) `TR_ENTER` for the PAGE container of the edited content object.
+As a result, the PAGE container, along with the affected content object, is assigned the initial workflow status, which is labeled as _changed_ by an activity (ac) status `AC_CHANGED`.
+
+
+### Activity States (AC)
+
+Activity states are induced by specific workflow transitions; so any _activity state_ can get changed to another activity state by a _transition_ method that will exactly perform this specific action.
+
+![Activity States](images/admin_wf_ac.gif)
+
+The workflow model above starts implicitly with the basic state "Changed" and will be left implicitly with the activity state "committed"; besides the implicit initial activity state `AC_CHANGED` (as a result of the basic state settings like new, modified or deleted) the use performed _activity states_ of the workflow are:
+
+1. Commit requested
+2. Committed
+
+To perform the changes of the activity states two _transitions_ are needed:
+
+1. Request commit
+2. Commit
+
+## Transitions (TR)
+
+A _transition_ is the change of a document state from one to another executed by a transaction and fully decribed by these three elements:
+
+```
+State-A -----Transaction-----> State-B
+```
+To ensure a logical _flow_ of transitions any ending state shall be the starting starting state of another transition. Otherwise the workflow may end prematurely and document changes cannot not be published.
+
+```
+State-A -----Transaction-AB-----> State-B
+State-B -----Transaction-BC-----> State-C
+```
+
+This is a linear flow from _State-A_ to _State-C_. But how does a document get to _State-A_? And when does the flow end, when will the document be published? That is why two preset transitions for _entering_ (TR_ENTER) and _leaving_ (TR_LEAVE) the workflow are needed:
+
+```
+ Enter Workflow-----> State-A
+State-A -----Transaction-AB-----> State-B
+State-B -----Transaction-BC-----> State-C
+State-C -----Leave Workflow
+```
+
+The ZMS-UI allows to model this stepwise: First you define a set of activity states beginning with `AC_CHANGED` (_Changed_). Then you add a set of _transitions_ starting with `TR_ENTER` and ending with `TR_LEAVE`. Any transition has one or more states where it can start from and exactly one state where it ends to.
+The visualisation of a very simple workflow may look like this:
+
+![Simple Workfow Model](images/admin_wf_minimal.gif)
+
+_Screen image: Simple workflow with two major transitions: 1. requesting a commit and 2. committing_
+
+Besides the transitions from one state to another, the screen image shows two more important aspects for designing a workflow:
+
+1. A transition can get started from more than one state, e.g. "Commit" can be performed from "Changed" (like a shortcut for faster publication) and from "Commit requested" (to get the approval first)
+2. A transition can be performed by certain user roles
+
+This very simple workflow can be made more flexible by adding more _transitions_, e.g. a transition for rejecting a request for document commit or a transition for rolling back all document changes. A _rollback_ would leave the workflow as well as a _commit_. And you can add a "Express Commit" transition for instant publishing.
+
+![Extended Workfow Model](images/admin_wf_extended.gif)
+
+_Screen image: The simple workflow has got some more transitions to cover variants in the workflow and to make it more flexible_
+
+
+## Selective workflow
+
+The content nodes that follow the workflow on publishing a document can be assigned individually. The assignment works recursively. So if the just the root-node is assigned, the workflow is set to the whole content tree.
+
+# Versioning
+
+## Introduction
+
+Each content block object (being a set of attributes) can be stored in its own version.
+This object has a unique id and this id is referenced by the `ZMSCustom`-container.
+Any object is designed to exist in two versions; its container-object aggregates these two versions by id-linking to the corresponding content object:
+* `version_live_id` for the current published live-version
+* `version_work_id` for the current version in progress
+
+Because document a massively fragmented into small block objects, a useful aggregate is the committable container-object. Thus committing a container-object (document) will be equivalent to tagging a changeset.
+
+The following example shows that if only two blocks are versioned atomically, it cannot be resolved if there is no change documentation for the container document, i.e., the sum of the blocks:
+
+```
+document-e1: 0.0.1
+ |---block-e2: 0.0.1
+ |---block-e3: 0.0.2
+ |---block-e4: 0.0.1
+
+document-e1: 0.0.1
+ |---block-e2: 0.0.1
+ |---block-e3: 0.0.3
+ |---block-e4: 0.0.3
+```
+
+The document container must keep a log and increment its version with each child change, as if it were changed itself. This is the only way to historically trace the changes.
+
+
+```
+document-e1: - 0.0.1 {e2:0.0.1, e3:0.0.1, e4:0.0.1}
+ | - 0.0.2 {e2:0.0.1, e3:0.0.2, e4:0.0.1}
+ | - 0.0.3 {e2:0.0.1, e3:0.0.3, e4:0.0.1}
+ | - 0.0.4 {e2:0.0.1, e3:0.0.3, e4:0.0.2}
+ | - 0.0.5 {e2:0.0.1, e3:0.0.3, e4:0.0.3}
+ |---block-e2: [0.0.1]
+ |---block-e3: [0.0.1, 0.0.2, 0.0.3]
+ |---block-e4: [0.0.1, 0.0.2, 0.0.3]
+```
+
+### Implementation (DRAFT)
+
+To implement the versioning system for a container object that contains an arbitrary number of sub-objects, each individually versioned on any changes, consider implementing a versioning vector that captures the state of the entire container and its sub-objects. Here is a proposed solution:
+
+1. **Version Vector Structure**: Use a version vector that includes the version of the container object itself and the versions of all its sub-objects. This vector should be updated whenever any sub-object or the container object changes.
+
+2. **Composite Versioning**: Maintain a composite version for the container object that reflects the versions of all its sub-objects. This composite version can be a hash or a concatenation of the individual versions.
+
+3. **Change Log**: Keep a detailed change log for the container object that records changes to both the container and its sub-objects. This log should include timestamps and the specific changes made.
+
+4. **Incremental Updates**: Increment the version of the container object whenever any sub-object changes. This ensures that the container's version always reflects the latest state of its contents.
+
+5. **Efficient Storage**: Store the version vector in a way that minimizes redundancy and allows for efficient retrieval and comparison of versions.
+
+
+_**Example Implementation:**_
+
+```py
+class VersionedObject:
+ def __init__(self, id):
+ self.id = id
+ self.version = 0
+ self.sub_objects = {}
+ self.change_log = []
+
+ def add_sub_object(self, sub_object):
+ self.sub_objects[sub_object.id] = sub_object
+ self.update_version()
+
+ def update_sub_object(self, sub_object_id, new_version):
+ if sub_object_id in self.sub_objects:
+ self.sub_objects[sub_object_id].version = new_version
+ self.update_version()
+
+ def update_version(self):
+ self.version += 1
+ self.change_log.append(self.get_version_vector())
+
+ def get_version_vector(self):
+ version_vector = {self.id: self.version}
+ for sub_object_id, sub_object in self.sub_objects.items():
+ version_vector[sub_object_id] = sub_object.version
+ return version_vector
+
+class SubObject:
+ def __init__(self, id):
+ self.id = id
+ self.version = 0
+
+# Example usage
+container = VersionedObject('container')
+block1 = SubObject('block1')
+block2 = SubObject('block2')
+
+container.add_sub_object(block1)
+container.add_sub_object(block2)
+
+container.update_sub_object('block1', 1)
+container.update_sub_object('block2', 2)
+
+print(container.get_version_vector())
+```
+
+_**Explanation:**_
+
+* **VersionedObject Class**: Represents the container object. It maintains a version, a dictionary of sub-objects, and a change log.
+* **SubObject Class**: Represents a sub-object with its own version.
+* **add_sub_object Method**: Adds a sub-object to the container and updates the container's version.
+* **update_sub_object Method**: Updates the version of a sub-object and increments the container's version.
+* **update_version Method**: Increments the container's version and logs the current version vector.
+get_version_vector Method: Returns the current version vector, which includes the versions of the container and all its sub-objects.
+
+This approach ensures that the container's version always reflects the latest state of its sub-objects, providing a clear and traceable version history.
+
+
+
+## Numbering
+
+The version numbering follows the scheme:
+
+```
+major.minor.patch
+```
+
+* **major**: Significant changes, possibly incompatible with previous versions (aka. _master_ version).
+* **minor**: Minor feature additions, backward-compatible.
+* **patch**: Bug fixes and minor changes, backward-compatible.
+
+## Versioning with activated workflow
+
+When the workflow is activated, versioning integrates seamlessly with the workflow states and transitions. Each state change or transition can trigger the creation of a new _patch_ version, ensuring that every step in the workflow is documented and traceable. Any committing of a document creates a new _minor_ version and all atomic _patch_ versions during the workflow cycle are ommited.
+This integration provides a robust mechanism for content management, combining the benefits of both versioning and workflow to maintain high content quality and accountability.
+
+## Versioning without workflow
+
+In scenarios where the workflow is not activated, versioning still ensures that changes are tracked and can be reverted if necessary. Each save action creates a new _minor_ version of the content. Users can manually switch between versions or restore previous versions as needed.
+
+In both cases ZMS does not implicitly create a _major_ version (like _patch_ and _minor_), but it has to be done explicity by a user interaction ("Create Major Version"). Important note: Creating a _major_ versions omits all _minor_ and _patch_ versions, and thus helps to reduce the amount of data.
+
diff --git a/requirements-full.txt b/requirements-full.txt
index 8a320d6c2..24f1fec69 100644
--- a/requirements-full.txt
+++ b/requirements-full.txt
@@ -23,6 +23,7 @@ Markdown
pyScss
ftfy
pdfminer.six
+htmldiff2
# OpenSearch
opensearch-py