From 491f9af1d1250b570f6c8ce75e712b2efe8c9879 Mon Sep 17 00:00:00 2001
From: Carl Wilson <carl@openpreservation.org>
Date: Sun, 11 Aug 2024 11:55:00 +0100
Subject: [PATCH 1/7] FEAT: Commons IP schema validation

- removed expansions of status enums; and
- added a `ruleId` alias for validation `rule_id`.
---
 eark_validator/model/constants.py         | 8 ++++----
 eark_validator/model/validation_report.py | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/eark_validator/model/constants.py b/eark_validator/model/constants.py
index c412eca..a1c70c5 100644
--- a/eark_validator/model/constants.py
+++ b/eark_validator/model/constants.py
@@ -34,9 +34,9 @@
 SHOULD = 'SHOULD'
 MUST = 'MUST'
 UNKNOWN = 'Unknown'
-INFORMATION = 'Information'
-WARNING = 'Warning'
+INFORMATION = 'Info'
+WARNING = 'Warn'
 ERROR = 'Error'
-NOTWELLFORMED = 'Not Well Formed'
-WELLFORMED = 'Well Formed'
+NOTWELLFORMED = 'NotWellFormed'
+WELLFORMED = 'WellFormed'
 PACKAGE = 'Package'
diff --git a/eark_validator/model/validation_report.py b/eark_validator/model/validation_report.py
index 7ff4675..11e6096 100644
--- a/eark_validator/model/validation_report.py
+++ b/eark_validator/model/validation_report.py
@@ -33,7 +33,7 @@
 from typing import List, Optional
 import uuid
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 from .package_details import InformationPackage
 from .specifications import Level
@@ -84,14 +84,14 @@ class Location(BaseModel):
     description: str = ''
 
 class Result(BaseModel):
-    rule_id: str | None
+    rule_id: Optional[str] = Field(default=None, alias='ruleId')
     severity: Severity = Severity.UNKNOWN
     location: Location | None
     message: str | None
 
 @unique
 class StructureStatus(str, Enum):
-    """Enum covering information package validation statuses."""
+    """Enum for information package structure status values."""
     UNKNOWN = UNKNOWN
     # Package has basic parse / structure problems and can't be validated
     NOTWELLFORMED = NOTWELLFORMED

From 3f8ad916148649229d28247e41c38dc7d7921583 Mon Sep 17 00:00:00 2001
From: Carl Wilson <carl@openpreservation.org>
Date: Tue, 27 Aug 2024 12:20:25 +0100
Subject: [PATCH 2/7] MAINT: Merge integration into current branch.

---
 eark_validator/mets.py                    | 450 +++++++++++-----------
 eark_validator/model/validation_report.py |   8 +-
 eark_validator/rules.py                   |   8 +-
 eark_validator/structure.py               |  30 +-
 4 files changed, 233 insertions(+), 263 deletions(-)

diff --git a/eark_validator/mets.py b/eark_validator/mets.py
index 56a24ff..6aaa870 100644
--- a/eark_validator/mets.py
+++ b/eark_validator/mets.py
@@ -1,228 +1,224 @@
-#!/usr/bin/env python
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
-#
-# E-ARK Validation
-# Copyright (C) 2019
-# All rights reserved.
-#
-# Licensed to the E-ARK project under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The E-ARK project licenses
-# this file to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-"""METS Schema validation."""
-import os
-from pathlib import Path
-from typing import Dict, List
-
-from lxml import etree
-
-from eark_validator.ipxml.schema import IP_SCHEMA
-from eark_validator.ipxml.namespaces import Namespaces
-from eark_validator.model.checksum import Checksum, ChecksumAlg
-from eark_validator.model.metadata import FileEntry, MetsFile, MetsRoot
-from eark_validator.model.validation_report import Location, Result
-from eark_validator.utils import get_path
-from eark_validator.const import NOT_FILE, NOT_VALID_FILE
-
-NAMESPACES : str = 'namespaces'
-OBJID: str = 'objid'
-LABEL: str = 'label'
-TYPE: str = 'type'
-PROFILE: str = 'profile'
-OTHERTYPE: str = 'OTHERTYPE'
-
-START_ELE: str = 'start'
-START_NS: str = 'start-ns'
-
-class MetsFiles():
-    @staticmethod
-    def details_from_mets_root(namespaces: dict[str,str], root_element: etree.Element) -> MetsRoot:
-        return MetsRoot.model_validate({
-            NAMESPACES: namespaces,
-            OBJID: root_element.get(OBJID.upper(), ''),
-            LABEL: root_element.get(LABEL.upper(), ''),
-            TYPE: root_element.get(TYPE.upper(), ''),
-            PROFILE: root_element.get(PROFILE.upper(), '')
-            })
-
-    @staticmethod
-    def from_file(mets_file: Path | str) -> MetsFile:
-        path: Path = get_path(mets_file, True)
-        if not path.is_file():
-            raise ValueError(NOT_FILE.format(mets_file))
-        ns: dict[str, str] = {}
-        entries: list[FileEntry] = []
-        othertype = contentinformationtype = oaispackagetype = mets_root = ''
-        try:
-            parsed_mets = etree.iterparse(mets_file, events=[START_ELE, START_NS])
-            for event, element in parsed_mets:
-                if event == START_NS:
-                    prefix = element[0]
-                    ns_uri = element[1]
-                    ns[prefix] = ns_uri
-                if event == 'start':
-                    if element.tag == Namespaces.METS.qualify('mets'):
-                        mets_root: MetsRoot = MetsFiles.details_from_mets_root(ns, element)
-                        othertype = element.get(Namespaces.CSIP.qualify(OTHERTYPE), '')
-                        contentinformationtype = element.get(
-                            Namespaces.CSIP.qualify('CONTENTINFORMATIONTYPE'),
-                            ''
-                        )
-                    elif element.tag == Namespaces.METS.qualify('metsHdr'):
-                        oaispackagetype = element.get(
-                            Namespaces.CSIP.qualify('OAISPACKAGETYPE'), ''
-                        )
-                    elif element.tag in [
-                            Namespaces.METS.qualify('file'),
-                            Namespaces.METS.qualify('mdRef')
-                        ]:
-                        entries.append(_parse_file_entry(element))
-        except etree.XMLSyntaxError as ex:
-            raise ValueError(NOT_VALID_FILE.format(mets_file, 'XML')) from ex
-        return MetsFile.model_validate({
-            'root': mets_root,
-            'oaispackagetype': oaispackagetype,
-            'othertype': othertype,
-            'contentinformationtype': contentinformationtype,
-            'file_entries': entries
-            })
-
-class MetsValidator():
-    """Encapsulates METS schema validation."""
-    def __init__(self, root: str):
-        self._validation_errors: List[Result] = []
-        self._package_root: str = root
-        self._reps_mets: Dict[str , str] = {}
-        self._file_refs: List[FileEntry] = []
-
-    @property
-    def root(self) -> str:
-        return self._package_root
-
-    @property
-    def validation_errors(self) -> List[Result]:
-        return self._validation_errors
-
-    @property
-    def representations(self) -> List[str]:
-        return self._reps_mets.keys()
-
-    @property
-    def representation_mets(self) -> List[str]:
-        return self._reps_mets.values()
-
-    @property
-    def file_references(self) -> List[FileEntry]:
-        return self._file_refs
-
-    @property
-    def is_valid(self) -> bool:
-        return len(self._validation_errors) == 0
-
-    def get_mets_path(self, rep_name: str) -> str:
-        return self._reps_mets[rep_name]
-
-    def validate_mets(self, mets: str) -> bool:
-        '''
-        Validates a Mets file. The Mets file is parsed with etree.iterparse(),
-        which allows event-driven parsing of large files. On certain events/conditions
-        actions are taken, like file validation or adding Mets files found inside
-        representations to a list so that they will be evaluated later on.
-
-        @param mets:    Path leading to a Mets file that will be evaluated.
-        @return:        Boolean validation result.
-        '''
-        # Handle relative package paths for representation METS files.
-        self._package_root, mets = _handle_rel_paths(self._package_root, mets)
-        try:
-            parsed_mets = etree.iterparse(mets, schema=IP_SCHEMA.get('csip'))
-            for _, element in parsed_mets:
-                self._process_element(element)
-        except etree.XMLSyntaxError as synt_err:
-            self._validation_errors.append(
-                Result.model_validate({
-                    'rule_id': 'XML-1',
-                    'location': Location.model_validate({
-                                        'context': synt_err.filename,
-                                        'test': str(synt_err.lineno),
-                                        'description': str(synt_err.offset)
-                                }),
-                    'message': f'File {mets} is not valid XML. {synt_err.msg}',
-                    'severity': 'Error'
-                    })
-            )
-        return len(self._validation_errors) == 0
-
-    def _process_element(self, element: etree.Element) -> None:
-        # Define what to do with specific tags.
-        if element.tag == Namespaces.METS.qualify('div') and \
-            element.attrib['LABEL'].lower().startswith('representations/'):
-            self._process_rep_div(element)
-            return
-        if element.tag in [ Namespaces.METS.qualify('file'), Namespaces.METS.qualify('mdRef') ]:
-            self._file_refs.append(_parse_file_entry(element))
-
-    def _process_rep_div(self, element: etree.Element) -> None:
-        rep = element.attrib['LABEL'].rsplit('/', 1)[1]
-        for child in element.getchildren():
-            if child.tag == Namespaces.METS.qualify('mptr'):
-                self._reps_mets.update({
-                    rep:  child.attrib[Namespaces.XLINK.qualify('href')]
-                })
-
-def _parse_file_entry(element: etree.Element) -> FileEntry:
-    """Create a FileItem from an etree element."""
-    return FileEntry.model_validate({
-        'path': _path_from_xml_element(element),
-        'size': int(element.attrib['SIZE']),
-        'checksum': _checksum_from_mets_element(element),
-        'mimetype': element.attrib.get('MIMETYPE') or ''
-        })
-
-def _path_from_xml_element(element: etree.Element) -> str:
-    loc_ele: etree.Element = element
-    if element.tag in [ Namespaces.METS.qualify('file'), 'file' ]:
-        tag: str = Namespaces.METS.qualify('FLocat') if hasattr(element, 'nsmap') else 'FLocat'
-        loc_ele = element.find(tag)
-    if element.tag in [
-        Namespaces.METS.qualify('file'),
-        'file', Namespaces.METS.qualify('mdRef'),
-        'mdRef'
-        ]:
-        return  _get_path_attrib(loc_ele)
-    raise ValueError(f'Element {element.tag} is not a METS:file or METS:mdRef element.')
-
-def _get_path_attrib(element: etree.Element) -> str:
-    """Get the path attribute from an etree element."""
-    attrib_name = Namespaces.XLINK.qualify('href') if hasattr(element, 'nsmap') else 'href'
-    return element.attrib.get(attrib_name) or ''
-
-def _checksum_from_mets_element(element: etree.Element) -> Checksum:
-    """Create a Checksum from an etree element."""
-    # Get the child flocat element and grab the href attribute.
-    return Checksum.model_validate({
-        'algorithm': ChecksumAlg.from_string(element.attrib['CHECKSUMTYPE']),
-        'value': element.attrib['CHECKSUM']},
-            strict=True)
-
-def _handle_rel_paths(rootpath: str, metspath: str) -> tuple[str, str]:
-    if metspath.startswith('file:///') or os.path.isabs(metspath):
-        return metspath.rsplit('/', 1)[0], metspath
-    if metspath.startswith('file://./'):
-        relpath = os.path.join(rootpath, metspath[9:])
-    else:
-        relpath = os.path.join(rootpath, metspath)
-    return relpath.rsplit('/', 1)[0], relpath
+#
+# E-ARK Validation
+# Copyright (C) 2019
+# All rights reserved.
+#
+# Licensed to the E-ARK project under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The E-ARK project licenses
+# this file to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+"""METS Schema validation."""
+import os
+from pathlib import Path
+from typing import Dict, List
+
+from lxml import etree
+
+from eark_validator.ipxml.schema import IP_SCHEMA
+from eark_validator.ipxml.namespaces import Namespaces
+from eark_validator.model.checksum import Checksum, ChecksumAlg
+from eark_validator.model.metadata import FileEntry, MetsFile, MetsRoot
+from eark_validator.model.validation_report import Result
+from eark_validator.utils import get_path
+from eark_validator.const import NOT_FILE, NOT_VALID_FILE
+
+NAMESPACES : str = 'namespaces'
+OBJID: str = 'objid'
+LABEL: str = 'label'
+TYPE: str = 'type'
+PROFILE: str = 'profile'
+OTHERTYPE: str = 'OTHERTYPE'
+
+START_ELE: str = 'start'
+START_NS: str = 'start-ns'
+
+class MetsFiles():
+    @staticmethod
+    def details_from_mets_root(namespaces: dict[str,str], root_element: etree.Element) -> MetsRoot:
+        return MetsRoot.model_validate({
+            NAMESPACES: namespaces,
+            OBJID: root_element.get(OBJID.upper(), ''),
+            LABEL: root_element.get(LABEL.upper(), ''),
+            TYPE: root_element.get(TYPE.upper(), ''),
+            PROFILE: root_element.get(PROFILE.upper(), '')
+            })
+
+    @staticmethod
+    def from_file(mets_file: Path | str) -> MetsFile:
+        path: Path = get_path(mets_file, True)
+        if not path.is_file():
+            raise ValueError(NOT_FILE.format(mets_file))
+        ns: dict[str, str] = {}
+        entries: list[FileEntry] = []
+        othertype = contentinformationtype = oaispackagetype = mets_root = ''
+        try:
+            parsed_mets = etree.iterparse(mets_file, events=[START_ELE, START_NS])
+            for event, element in parsed_mets:
+                if event == START_NS:
+                    prefix = element[0]
+                    ns_uri = element[1]
+                    ns[prefix] = ns_uri
+                if event == 'start':
+                    if element.tag == Namespaces.METS.qualify('mets'):
+                        mets_root: MetsRoot = MetsFiles.details_from_mets_root(ns, element)
+                        othertype = element.get(Namespaces.CSIP.qualify(OTHERTYPE), '')
+                        contentinformationtype = element.get(
+                            Namespaces.CSIP.qualify('CONTENTINFORMATIONTYPE'),
+                            ''
+                        )
+                    elif element.tag == Namespaces.METS.qualify('metsHdr'):
+                        oaispackagetype = element.get(
+                            Namespaces.CSIP.qualify('OAISPACKAGETYPE'), ''
+                        )
+                    elif element.tag in [
+                            Namespaces.METS.qualify('file'),
+                            Namespaces.METS.qualify('mdRef')
+                        ]:
+                        entries.append(_parse_file_entry(element))
+        except etree.XMLSyntaxError as ex:
+            raise ValueError(NOT_VALID_FILE.format(mets_file, 'XML')) from ex
+        return MetsFile.model_validate({
+            'root': mets_root,
+            'oaispackagetype': oaispackagetype,
+            'othertype': othertype,
+            'contentinformationtype': contentinformationtype,
+            'file_entries': entries
+            })
+
+class MetsValidator():
+    """Encapsulates METS schema validation."""
+    def __init__(self, root: str):
+        self._validation_errors: List[Result] = []
+        self._package_root: str = root
+        self._reps_mets: Dict[str , str] = {}
+        self._file_refs: List[FileEntry] = []
+
+    @property
+    def root(self) -> str:
+        return self._package_root
+
+    @property
+    def validation_errors(self) -> List[Result]:
+        return self._validation_errors
+
+    @property
+    def representations(self) -> List[str]:
+        return self._reps_mets.keys()
+
+    @property
+    def representation_mets(self) -> List[str]:
+        return self._reps_mets.values()
+
+    @property
+    def file_references(self) -> List[FileEntry]:
+        return self._file_refs
+
+    @property
+    def is_valid(self) -> bool:
+        return len(self._validation_errors) == 0
+
+    def get_mets_path(self, rep_name: str) -> str:
+        return self._reps_mets[rep_name]
+
+    def validate_mets(self, mets: str) -> bool:
+        '''
+        Validates a Mets file. The Mets file is parsed with etree.iterparse(),
+        which allows event-driven parsing of large files. On certain events/conditions
+        actions are taken, like file validation or adding Mets files found inside
+        representations to a list so that they will be evaluated later on.
+
+        @param mets:    Path leading to a Mets file that will be evaluated.
+        @return:        Boolean validation result.
+        '''
+        # Handle relative package paths for representation METS files.
+        self._package_root, mets = _handle_rel_paths(self._package_root, mets)
+        try:
+            parsed_mets = etree.iterparse(mets, schema=IP_SCHEMA.get('csip'))
+            for _, element in parsed_mets:
+                self._process_element(element)
+        except etree.XMLSyntaxError as synt_err:
+            self._validation_errors.append(
+                Result.model_validate({
+                    'rule_id': 'XML-1',
+                    'location': synt_err.filename + str(synt_err.lineno) + str(synt_err.offset),
+                    'message': f'File {mets} is not valid XML. {synt_err.msg}',
+                    'severity': 'Error'
+                    })
+            )
+        return len(self._validation_errors) == 0
+
+    def _process_element(self, element: etree.Element) -> None:
+        # Define what to do with specific tags.
+        if element.tag == Namespaces.METS.qualify('div') and \
+            element.attrib['LABEL'].lower().startswith('representations/'):
+            self._process_rep_div(element)
+            return
+        if element.tag in [ Namespaces.METS.qualify('file'), Namespaces.METS.qualify('mdRef') ]:
+            self._file_refs.append(_parse_file_entry(element))
+
+    def _process_rep_div(self, element: etree.Element) -> None:
+        rep = element.attrib['LABEL'].rsplit('/', 1)[1]
+        for child in element.getchildren():
+            if child.tag == Namespaces.METS.qualify('mptr'):
+                self._reps_mets.update({
+                    rep:  child.attrib[Namespaces.XLINK.qualify('href')]
+                })
+
+def _parse_file_entry(element: etree.Element) -> FileEntry:
+    """Create a FileItem from an etree element."""
+    return FileEntry.model_validate({
+        'path': _path_from_xml_element(element),
+        'size': int(element.attrib['SIZE']),
+        'checksum': _checksum_from_mets_element(element),
+        'mimetype': element.attrib.get('MIMETYPE') or ''
+        })
+
+def _path_from_xml_element(element: etree.Element) -> str:
+    loc_ele: etree.Element = element
+    if element.tag in [ Namespaces.METS.qualify('file'), 'file' ]:
+        tag: str = Namespaces.METS.qualify('FLocat') if hasattr(element, 'nsmap') else 'FLocat'
+        loc_ele = element.find(tag)
+    if element.tag in [
+        Namespaces.METS.qualify('file'),
+        'file', Namespaces.METS.qualify('mdRef'),
+        'mdRef'
+        ]:
+        return  _get_path_attrib(loc_ele)
+    raise ValueError(f'Element {element.tag} is not a METS:file or METS:mdRef element.')
+
+def _get_path_attrib(element: etree.Element) -> str:
+    """Get the path attribute from an etree element."""
+    attrib_name = Namespaces.XLINK.qualify('href') if hasattr(element, 'nsmap') else 'href'
+    return element.attrib.get(attrib_name) or ''
+
+def _checksum_from_mets_element(element: etree.Element) -> Checksum:
+    """Create a Checksum from an etree element."""
+    # Get the child flocat element and grab the href attribute.
+    return Checksum.model_validate({
+        'algorithm': ChecksumAlg.from_string(element.attrib['CHECKSUMTYPE']),
+        'value': element.attrib['CHECKSUM']},
+            strict=True)
+
+def _handle_rel_paths(rootpath: str, metspath: str) -> tuple[str, str]:
+    if metspath.startswith('file:///') or os.path.isabs(metspath):
+        return metspath.rsplit('/', 1)[0], metspath
+    if metspath.startswith('file://./'):
+        relpath = os.path.join(rootpath, metspath[9:])
+    else:
+        relpath = os.path.join(rootpath, metspath)
+    return relpath.rsplit('/', 1)[0], relpath
diff --git a/eark_validator/model/validation_report.py b/eark_validator/model/validation_report.py
index 11e6096..eebc6ba 100644
--- a/eark_validator/model/validation_report.py
+++ b/eark_validator/model/validation_report.py
@@ -77,16 +77,10 @@ def from_level(cls, level: Level) -> 'Severity':
             return Severity.WARNING
         return Severity.INFORMATION
 
-class Location(BaseModel):
-    """All details of the location of an error."""
-    context: str = ''
-    test: str = ''
-    description: str = ''
-
 class Result(BaseModel):
     rule_id: Optional[str] = Field(default=None, alias='ruleId')
     severity: Severity = Severity.UNKNOWN
-    location: Location | None
+    location: str | None
     message: str | None
 
 @unique
diff --git a/eark_validator/rules.py b/eark_validator/rules.py
index 5382752..92c82b7 100644
--- a/eark_validator/rules.py
+++ b/eark_validator/rules.py
@@ -29,7 +29,7 @@
 from lxml import etree as ET
 
 from eark_validator.ipxml.schematron import SchematronRuleset, SVRL_NS, get_schematron_path
-from eark_validator.model.validation_report import Location, Result
+from eark_validator.model.validation_report import Result
 from eark_validator.specifications.specification import EarkSpecification, Specification, SpecificationType, SpecificationVersion
 from eark_validator.const import NO_PATH, NOT_FILE
 from eark_validator.model import Severity
@@ -112,11 +112,7 @@ def from_element(rule: ET.Element, failed_assert: ET.Element) -> Result:
         severity = Severity.from_role(failed_assert.get('role', Severity.ERROR))
         location = failed_assert.get('location')
         message = failed_assert.find(SVRL_NS + 'text').text
-        location = Location.model_validate({
-            'context':context,
-            'test':test,
-            'description': location
-        })
+        location = context + test + location
         return Result.model_validate({
             'rule_id': rule_id, 'location':location, 'message':message, 'severity':severity
         })
diff --git a/eark_validator/structure.py b/eark_validator/structure.py
index 8bbecb9..31762cf 100644
--- a/eark_validator/structure.py
+++ b/eark_validator/structure.py
@@ -26,7 +26,6 @@
 import os
 from pathlib import Path
 from typing import Dict, List, Optional, Set, Tuple
-from eark_validator.model.validation_report import Location
 
 from eark_validator.specifications.struct_reqs import REQUIREMENTS
 from eark_validator.infopacks.package_handler import PackageHandler, PackageError
@@ -134,10 +133,7 @@ def get_test_results(self) -> StructResults:
         results: List[Result] = self.get_root_results()
         results = results + self.get_package_results()
         for name, tests in self.representations.items():
-            location = Location.model_validate({
-                'context': str(name),
-                'description': 'representation'
-                })
+            location = str(name) + ' representation'
             if not tests.has_data():
                 results.append(test_result_from_id(11, location))
             if not tests.has_mets():
@@ -149,18 +145,15 @@ def get_test_results(self) -> StructResults:
             'messages': results
             })
 
-    def get_representations(self) -> List[Representation]:
-        reps: List[Representation] = []
+    def get_representations(self) -> Dict[str, str]:
+        reps: Dict[str, str] = {}
         for rep in self.representations: # pylint: disable=C0201
             reps.append(Representation.model_validate({ 'name': rep }))
         return reps
 
     def get_root_results(self) -> List[Result]:
         results: List[Result] = []
-        location: Location = Location.model_validate({
-            'context': 'root',
-            'description': self.name
-            })
+        location: str = 'root ' + self.name
         if not self.parser.is_archive:
             results.append(test_result_from_id(3, location))
         if not self.parser.has_mets():
@@ -201,19 +194,13 @@ def _get_schema_results(self) -> Optional[Result]:
         for tests in self.representations.values():
             if tests.has_schemas():
                 return None
-        return test_result_from_id(15, Location.model_validate({
-            'context': 'root',
-            'description': self.name
-            }))
+        return test_result_from_id(15, 'root ' + self.name)
 
     def _get_dox_results(self) -> Optional[Result]:
         for tests in self.representations.values():
             if tests.has_documentation():
                 return None
-        return test_result_from_id(16, Location.model_validate({
-            'context': 'root',
-            'description': self.name
-            }))
+        return test_result_from_id(16, 'root ' + self.name)
 
     @classmethod
     def get_status(cls, results: List[Result]) -> StructureStatus:
@@ -252,10 +239,7 @@ def get_bad_path_results(path) -> StructResults:
         })
 
 def _get_str1_result_list(name: str) -> List[Result]:
-    return [ test_result_from_id(1, Location.model_validate({
-            'context': 'root',
-            'description': str(name)
-            })) ]
+    return [ test_result_from_id(1, 'root ' + str(name)) ]
 
 def validate(to_validate) -> Tuple[bool, StructResults]:
     try:

From 834fdfa400a23129777c863aaff72f065125fd8e Mon Sep 17 00:00:00 2001
From: Carl Wilson <carl@openpreservation.org>
Date: Tue, 27 Aug 2024 20:59:09 +0100
Subject: [PATCH 3/7] FIX: Deserialisation of rule_id

- added a `model_config` paramter to allow both `rule_id` and `ruleId` as keys in Pydantic model validation;
- fixes issue with deserialisation of `rule_id` in `Rule` model.
---
 eark_validator/model/validation_report.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/eark_validator/model/validation_report.py b/eark_validator/model/validation_report.py
index eebc6ba..34a94f5 100644
--- a/eark_validator/model/validation_report.py
+++ b/eark_validator/model/validation_report.py
@@ -33,7 +33,7 @@
 from typing import List, Optional
 import uuid
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 from .package_details import InformationPackage
 from .specifications import Level
@@ -78,11 +78,13 @@ def from_level(cls, level: Level) -> 'Severity':
         return Severity.INFORMATION
 
 class Result(BaseModel):
-    rule_id: Optional[str] = Field(default=None, alias='ruleId')
+    model_config = ConfigDict(populate_by_name=True)
+    rule_id: Optional[str] = Field(validation_alias='ruleId')
     severity: Severity = Severity.UNKNOWN
     location: str | None
     message: str | None
 
+
 @unique
 class StructureStatus(str, Enum):
     """Enum for information package structure status values."""

From 2935fdb8caebfe02d910aff7ab1522075347a899 Mon Sep 17 00:00:00 2001
From: Carl Wilson <carl@openpreservation.org>
Date: Tue, 27 Aug 2024 21:05:57 +0100
Subject: [PATCH 4/7] FEAT: Convert commons-ip representations

- added a `model_validator` to the `PackageDetails` class to convert incoming JSON dictionary to a `List` for now;
- added PyDantic config to `Result` to allow multiple names for `rule_id` during validation;
- reverted change to dictionary as it was not necessary;
- added tests and data for deserialisation of commons-ip types; and
- fixed minor compiler warnings.
---
 eark_validator/model/package_details.py     | 18 +++++++-
 eark_validator/packages.py                  |  2 +-
 eark_validator/structure.py                 | 13 +++---
 tests/resources/json/__init__.py            | 30 +++++++++++++
 tests/resources/json/commons-ip-report.json | 49 +++++++++++++++++++++
 tests/rules_test.py                         | 12 ++++-
 tests/specification_test.py                 | 13 +++---
 7 files changed, 121 insertions(+), 16 deletions(-)
 create mode 100644 tests/resources/json/__init__.py
 create mode 100644 tests/resources/json/commons-ip-report.json

diff --git a/eark_validator/model/package_details.py b/eark_validator/model/package_details.py
index af724c9..853a52b 100644
--- a/eark_validator/model/package_details.py
+++ b/eark_validator/model/package_details.py
@@ -27,13 +27,14 @@
 E-ARK : Information Package Validation
         Information Package Package Details type
 """
-from typing import List, Optional
+from typing import Any, List, Optional
 
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationInfo, model_validator
 
 from .checksum import Checksum
 from .metadata import MetsFile
 
+
 class PackageDetails(BaseModel):
     label: str = ''
     oaispackagetype: str = ''
@@ -50,3 +51,16 @@ class InformationPackage(BaseModel):
     mets: Optional[MetsFile] = None
     package: Optional[PackageDetails] = None
     representations: List[Representation] = []
+
+    @model_validator(mode='before')
+    @classmethod
+    def convert_dict(cls, data: Any) -> list[Representation]:
+        representations = data.get('representations')
+        if isinstance(representations, dict):
+            # If this is a dict type then it's a commons-ip type, coerce to list
+            reps : list[Representation] = []
+            for k, v in representations.items():
+                reps.append(Representation(name=v,))
+            data['representations'] = reps
+        # Return the reps for further validation.
+        return data
diff --git a/eark_validator/packages.py b/eark_validator/packages.py
index 0143d29..404bbcb 100644
--- a/eark_validator/packages.py
+++ b/eark_validator/packages.py
@@ -86,7 +86,7 @@ def version(self) -> SpecificationVersion:
         return self._version
 
     @classmethod
-    def validate(self, version: SpecificationVersion, to_validate: Path) -> ValidationReport:
+    def validate(cls, version: SpecificationVersion, to_validate: Path) -> ValidationReport:
         """Returns the validation report that results from validating the path
         to_validate as a folder. The method does not validate archive files."""
         is_struct_valid, struct_results = structure.validate(to_validate)
diff --git a/eark_validator/structure.py b/eark_validator/structure.py
index 31762cf..04e4f6b 100644
--- a/eark_validator/structure.py
+++ b/eark_validator/structure.py
@@ -39,6 +39,7 @@
 
 METS_NAME = 'METS.xml'
 STR_REQ_PREFIX = 'CSIPSTR'
+ROOT = 'root '
 DIR_NAMES = {
     'DATA': 'data',
     'DESC': 'descriptive',
@@ -145,15 +146,15 @@ def get_test_results(self) -> StructResults:
             'messages': results
             })
 
-    def get_representations(self) -> Dict[str, str]:
-        reps: Dict[str, str] = {}
+    def get_representations(self) -> List[Representation]:
+        reps: List[Representation] = []
         for rep in self.representations: # pylint: disable=C0201
             reps.append(Representation.model_validate({ 'name': rep }))
         return reps
 
     def get_root_results(self) -> List[Result]:
         results: List[Result] = []
-        location: str = 'root ' + self.name
+        location: str = ROOT + self.name
         if not self.parser.is_archive:
             results.append(test_result_from_id(3, location))
         if not self.parser.has_mets():
@@ -194,13 +195,13 @@ def _get_schema_results(self) -> Optional[Result]:
         for tests in self.representations.values():
             if tests.has_schemas():
                 return None
-        return test_result_from_id(15, 'root ' + self.name)
+        return test_result_from_id(15, ROOT + self.name)
 
     def _get_dox_results(self) -> Optional[Result]:
         for tests in self.representations.values():
             if tests.has_documentation():
                 return None
-        return test_result_from_id(16, 'root ' + self.name)
+        return test_result_from_id(16, ROOT + self.name)
 
     @classmethod
     def get_status(cls, results: List[Result]) -> StructureStatus:
@@ -239,7 +240,7 @@ def get_bad_path_results(path) -> StructResults:
         })
 
 def _get_str1_result_list(name: str) -> List[Result]:
-    return [ test_result_from_id(1, 'root ' + str(name)) ]
+    return [ test_result_from_id(1, ROOT + str(name)) ]
 
 def validate(to_validate) -> Tuple[bool, StructResults]:
     try:
diff --git a/tests/resources/json/__init__.py b/tests/resources/json/__init__.py
new file mode 100644
index 0000000..d9d1168
--- /dev/null
+++ b/tests/resources/json/__init__.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# flake8: noqa
+# -*- coding: utf-8 -*-
+#
+# E-ARK Validation
+# Copyright (C) 2019
+# All rights reserved.
+#
+# Licensed to the E-ARK project under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The E-ARK project licenses
+# this file to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+"""
+E-ARK : Information Package Validation
+        JSON report representation
+"""
diff --git a/tests/resources/json/commons-ip-report.json b/tests/resources/json/commons-ip-report.json
new file mode 100644
index 0000000..815d66a
--- /dev/null
+++ b/tests/resources/json/commons-ip-report.json
@@ -0,0 +1,49 @@
+{
+  "package" : {
+    "details" : {
+      "name" : "733dc055-34be-4260-85c7-5549a7083031.zip",
+      "checksums" : [ {
+        "algorithm" : "SHA1",
+        "value" : "B8EFAE7679EF63CDB9EF80B643672EE31E1C2898"
+      } ]
+    },
+    "profile" : {
+      "type" : null,
+      "name" : "733dc055-34be-4260-85c7-5549a7083031.zip",
+      "version" : "CSIP-"
+    },
+    "representations" : {
+      "name" : "733dc055-34be-4260-85c7-5549a7083031.zip"
+    }
+  },
+  "uid" : "d991e991-8abc-4ad3-984c-c936de6c53d7",
+  "structure" : {
+    "status" : "WellFormed",
+    "messages" : [ {
+      "ruleId" : "CSIPSTR16",
+      "location" : "",
+      "message" : "There is no documentation folder in the representation folder 733dc055-34be-4260-85c7-5549a7083031/representations/rep1. ",
+      "severity" : "Warn"
+    } ]
+  },
+  "metadata" : {
+    "schemaResults" : {
+      "status" : "VALID",
+      "messages" : [ ]
+    },
+    "schematronResults" : {
+      "status" : "VALID",
+      "messages" : [ {
+        "ruleId" : "CSIP17",
+        "location" : "mets/dmdSec",
+        "message" : "Doesn't have files in metadata/descriptive folder but have dmdSec in 733dc055-34be-4260-85c7-5549a7083031/representations/rep1/METS.xml; Put the files under metadata folder ",
+        "severity" : "Warn"
+      }, {
+        "ruleId" : "CSIP45",
+        "location" : "mets/amdSec/rightsMD",
+        "message" : "Individual representations should state their specific rights in their representation METS file (733dc055-34be-4260-85c7-5549a7083031/representations/rep1/METS.xml) Individual representations should state their specific rights in their representation METS file (Root METS.xml) ",
+        "severity" : "Info"
+      } ]
+    }
+  }
+}
diff --git a/tests/rules_test.py b/tests/rules_test.py
index cf68634..3d3f852 100644
--- a/tests/rules_test.py
+++ b/tests/rules_test.py
@@ -31,13 +31,16 @@
 from pydantic import ValidationError
 
 from eark_validator import rules as SC
-from eark_validator.model.validation_report import Severity, Result
+from eark_validator.model.validation_report import Severity, Result, ValidationReport
 from eark_validator.specifications.specification import SpecificationType, SpecificationVersion
 import tests.resources.schematron as SCHEMATRON
 import tests.resources.xml as XML
+import tests.resources.json as JSON
 
 TEST_RES = 'tests.resources'
 TEST_RES_XML = TEST_RES + '.xml'
+TEST_RES_JSON = TEST_RES + '.json'
+COMMONS_IP_JSON = str(files(JSON).joinpath('commons-ip-report.json'))
 PERSON_PATH = str(files(SCHEMATRON).joinpath('person.xml'))
 NOT_FOUND_PATH = str(files(SCHEMATRON).joinpath('not-found.xml'))
 EMPTY_FILE_PATH = str(files(TEST_RES).joinpath('empty.file'))
@@ -225,6 +228,13 @@ def test_get_bad_key(self):
         result = profile.get_result('badkey')
         self.assertIsNone(result)
 
+    def test_deserialise_commons_ip_report(self):
+        file_name = COMMONS_IP_JSON
+        with open(file_name, 'r', encoding='utf-8') as _f:
+            contents = _f.read()
+        result: ValidationReport = ValidationReport.model_validate_json(contents)
+        self.assertIsNotNone(result)
+
 class SeverityTest(str, Enum):
     NOT_SEV = 'NOT_SEV'
 
diff --git a/tests/specification_test.py b/tests/specification_test.py
index 785163e..cfef0a0 100644
--- a/tests/specification_test.py
+++ b/tests/specification_test.py
@@ -23,6 +23,7 @@
 # under the License.
 #
 
+from typing import Optional
 import unittest
 
 from lxml import etree as ET
@@ -72,12 +73,12 @@ def test_from_rule_no(self):
 
 class SpecificationTypeTest(unittest.TestCase):
     def test_value(self):
-        type = SpecificationType.CSIP
-        self.assertEqual(type.value, 'E-ARK-CSIP')
-        type = SpecificationType.SIP
-        self.assertEqual(type.value, 'E-ARK-SIP')
-        type = SpecificationType.DIP
-        self.assertEqual(type.value, 'E-ARK-DIP')
+        spec_type: SpecificationType = SpecificationType.CSIP
+        self.assertEqual(spec_type.value, 'E-ARK-CSIP')
+        spec_type = SpecificationType.SIP
+        self.assertEqual(spec_type.value, 'E-ARK-SIP')
+        spec_type = SpecificationType.DIP
+        self.assertEqual(spec_type.value, 'E-ARK-DIP')
 
 class SpecificationVersionTest(unittest.TestCase):
     def test_value(self):

From 56deacc7b733571babcc3d550815163df165fec5 Mon Sep 17 00:00:00 2001
From: Carl Wilson <carl@openpreservation.org>
Date: Wed, 28 Aug 2024 14:29:58 +0100
Subject: [PATCH 5/7] FEAT: Final commons-ip compatibility tweaks

- refactored `MetadataResults` to match `commons-ip`, it's probably better as well;
- moved `name` from `InformationPackage` to `PackageDetails` class;
- renamed `InformationPackage.package` to `InformationPackage.details`;
- renamed existing `ValidationReport.convert_dict` validator to `ValidationReport.convert_representations_dict` (more explicit);
- added a second validator, `VaidationReport.convert_checksum_ids`, to convert `commons-ip` checksum ids to `eark_validator` hyphenated form;
- `is_valid` convenience property to `ValidationReport` class;
- string constants for 'VALID' and 'INVALID'; and
- fixed tests to accomodate.
---
 .../infopacks/information_package.py          |  4 +--
 eark_validator/model/constants.py             |  2 ++
 eark_validator/model/package_details.py       | 22 ++++++++++++--
 eark_validator/model/validation_report.py     | 29 +++++++++++++++----
 eark_validator/packages.py                    | 15 ++++++----
 tests/ips_test.py                             |  2 +-
 6 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/eark_validator/infopacks/information_package.py b/eark_validator/infopacks/information_package.py
index 7f04aca..b88e247 100644
--- a/eark_validator/infopacks/information_package.py
+++ b/eark_validator/infopacks/information_package.py
@@ -70,6 +70,7 @@ def details_from_mets_file(mets_file: Path) -> PackageDetails:
         except (etree.XMLSyntaxError, AttributeError) as ex:
             raise ValueError(NOT_VALID_FILE.format(mets_file, 'XML')) from ex
         return PackageDetails.model_validate({
+            'name': mets_file.parent.stem,
             'label': label,
             'othertype': othertype,
             CONTENTINFORMATIONTYPE: contentinformationtype,
@@ -87,9 +88,8 @@ def from_path(package_path: Path) -> InformationPackage:
             raise ValueError('No METS file found in package')
         mets: MetsFile = MetsFiles.from_file(to_parse.joinpath(METS_FILE))
         return InformationPackage.model_validate({
-            'name': to_parse.stem,
             METS: mets,
-            'package': InformationPackages.details_from_mets_file(to_parse.joinpath(METS_FILE))
+            'details': InformationPackages.details_from_mets_file(to_parse.joinpath(METS_FILE))
         })
 
     @staticmethod
diff --git a/eark_validator/model/constants.py b/eark_validator/model/constants.py
index a1c70c5..05b0211 100644
--- a/eark_validator/model/constants.py
+++ b/eark_validator/model/constants.py
@@ -40,3 +40,5 @@
 NOTWELLFORMED = 'NotWellFormed'
 WELLFORMED = 'WellFormed'
 PACKAGE = 'Package'
+VALID = 'VALID'
+INVALID = 'INVALID'
diff --git a/eark_validator/model/package_details.py b/eark_validator/model/package_details.py
index 853a52b..23ca48a 100644
--- a/eark_validator/model/package_details.py
+++ b/eark_validator/model/package_details.py
@@ -36,6 +36,7 @@
 
 
 class PackageDetails(BaseModel):
+    name: str = ''
     label: str = ''
     oaispackagetype: str = ''
     othertype: str = ''
@@ -47,14 +48,13 @@ class Representation(BaseModel):
     name: Optional[str] = ''
 
 class InformationPackage(BaseModel):
-    name: str = ''
     mets: Optional[MetsFile] = None
-    package: Optional[PackageDetails] = None
+    details: Optional[PackageDetails] = None
     representations: List[Representation] = []
 
     @model_validator(mode='before')
     @classmethod
-    def convert_dict(cls, data: Any) -> list[Representation]:
+    def convert_representations_dict(cls, data: Any) -> list[Representation]:
         representations = data.get('representations')
         if isinstance(representations, dict):
             # If this is a dict type then it's a commons-ip type, coerce to list
@@ -64,3 +64,19 @@ def convert_dict(cls, data: Any) -> list[Representation]:
             data['representations'] = reps
         # Return the reps for further validation.
         return data
+
+    @model_validator(mode='before')
+    @classmethod
+    def convert_checksum_ids(cls, data: Any) -> list[Representation]:
+        details = data.get('details', {})
+        if isinstance(details, dict):
+            incoming_checksums = details.get('checksums', [])
+            checksums : list[Checksum] = []
+            for checksum in incoming_checksums:
+                alg_name = checksum.get('algorithm')
+                if alg_name and alg_name.startswith('SHA') and '-' not in alg_name:
+                    alg_name = f'{alg_name[:3]}-{alg_name[3:]}'
+                checksums.append(Checksum(algorithm=alg_name, value=checksum.get('value')))
+            data['details']['checksums'] = checksums
+        # Return the reps for further validation.
+        return data
diff --git a/eark_validator/model/validation_report.py b/eark_validator/model/validation_report.py
index 34a94f5..05aa2a2 100644
--- a/eark_validator/model/validation_report.py
+++ b/eark_validator/model/validation_report.py
@@ -38,7 +38,7 @@
 from .package_details import InformationPackage
 from .specifications import Level
 from .constants import (
-    UNKNOWN, INFORMATION, WARNING, ERROR, WELLFORMED, NOTWELLFORMED)
+    UNKNOWN, INFORMATION, WARNING, ERROR, WELLFORMED, NOTWELLFORMED, VALID, INVALID)
 
 @unique
 class Severity(str, Enum):
@@ -110,12 +110,31 @@ def warnings(self) -> List[Result]:
     def infos(self) -> List[Result]:
         return [m for m in self.messages if m.severity == Severity.INFORMATION]
 
-class MetatdataResults(BaseModel):
-    schema_results: List[Result] = []
-    schematron_results: List[Result] = []
+@unique
+class MetadataStatus(str, Enum):
+    """Enum for information package metadata status values."""
+    UNKNOWN = UNKNOWN
+    # Package metadata is valid according to the schema/schematron rules
+    VALID = VALID
+    # Package metadata is invalid according to the schema/schematron rules
+    INVALID = INVALID
+
+class MetadataResults(BaseModel):
+    status: MetadataStatus = MetadataStatus.UNKNOWN
+    messages: List[Result] = []
+
+class MetatdataResultSet(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+    schema_results: MetadataResults = Field(validation_alias='schemaResults')
+    model_config = ConfigDict(populate_by_name=True)
+    schematron_results: MetadataResults = Field(validation_alias='schematronResults')
 
 class ValidationReport(BaseModel):
     uid: uuid.UUID = uuid.uuid4()
     structure: Optional[StructResults] = None
-    metadata: Optional[MetatdataResults] = None
+    metadata: Optional[MetatdataResultSet] = None
     package: Optional[InformationPackage] = None
+
+    @property
+    def is_valid(self) -> bool:
+        return self.structure.status == StructureStatus.WELLFORMED and self.metadata.schema_results.status == MetadataStatus.VALID and self.metadata.schematron_results.status == MetadataStatus.VALID
diff --git a/eark_validator/packages.py b/eark_validator/packages.py
index 404bbcb..6bb28fd 100644
--- a/eark_validator/packages.py
+++ b/eark_validator/packages.py
@@ -35,7 +35,7 @@
 from eark_validator.mets import MetsValidator
 from eark_validator.model import ValidationReport
 from eark_validator.model.package_details import InformationPackage
-from eark_validator.model.validation_report import MetatdataResults
+from eark_validator.model.validation_report import MetadataResults, MetadataStatus, MetatdataResultSet, Result, Severity
 from eark_validator.specifications.specification import SpecificationType, SpecificationVersion
 
 METS: str = 'METS.xml'
@@ -100,14 +100,14 @@ def validate(cls, version: SpecificationVersion, to_validate: Path) -> Validatio
         results = csip_profile.get_all_results()
 
         package: InformationPackage = InformationPackages.from_path(to_validate)
-        if package.package.oaispackagetype in ['SIP', 'DIP']:
-            profile = SC.ValidationProfile(SpecificationType.from_string(package.package.oaispackagetype), version)
+        if package.details.oaispackagetype in ['SIP', 'DIP']:
+            profile = SC.ValidationProfile(SpecificationType.from_string(package.details.oaispackagetype), version)
             profile.validate(to_validate.joinpath(METS))
             results.extend(profile.get_all_results())
 
-        metadata: MetatdataResults = MetatdataResults.model_validate({
-            'schema_results': validator.validation_errors,
-            'schematron_results': results
+        metadata: MetatdataResultSet = MetatdataResultSet.model_validate({
+            'schema_results': MetadataResults.model_validate({ 'status': _validity_from_messages(validator.validation_errors), 'messages': validator.validation_errors }),
+            'schematron_results': MetadataResults.model_validate({ 'status': _validity_from_messages(results), 'messages': results })
             })
         return ValidationReport.model_validate({
             'structure': struct_results,
@@ -115,6 +115,9 @@ def validate(cls, version: SpecificationVersion, to_validate: Path) -> Validatio
             'metadata': metadata
             })
 
+def _validity_from_messages(messages: list[Result]) -> MetadataStatus:
+    return MetadataStatus.VALID if len([ res for res in messages if res.severity == Severity.ERROR]) == 0 else MetadataStatus.INVALID
+
 def _report_from_bad_path(package_path: Path) -> ValidationReport:
     struct_results = structure.get_bad_path_results(package_path)
     return ValidationReport.model_validate({ 'structure': struct_results })
diff --git a/tests/ips_test.py b/tests/ips_test.py
index b3849da..e7d7592 100644
--- a/tests/ips_test.py
+++ b/tests/ips_test.py
@@ -86,7 +86,7 @@ def test_from_path_dir_no_mets(self):
 
     def test_from_path_dir(self):
         ip: InformationPackage = InformationPackages.from_path(Path(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031')))
-        self.assertEqual(ip.name, '733dc055-34be-4260-85c7-5549a7083031')
+        self.assertEqual(ip.details.name, '733dc055-34be-4260-85c7-5549a7083031')
 
 class SchemaTest(unittest.TestCase):
     def test_schema(self):

From 3d3ce76b78ef7f9a480ddfb75c2be758c82d4290 Mon Sep 17 00:00:00 2001
From: Carl Wilson <carl@openpreservation.org>
Date: Thu, 29 Aug 2024 10:24:25 +0100
Subject: [PATCH 6/7] FIX: Support commons-ip use of NOTVALID

- added `convert_status` validator to `MetadataResults` class to convert commons-ip `NOTVALID` status to `INVALID`;
- moved checksum algorithm ID validation to the `PackageDetails` class;
- added test and test data for status conversion; and
- fixed type hinting for validation methods to `Any`.
---
 eark_validator/model/package_details.py      | 38 +++++-----
 eark_validator/model/validation_report.py    | 13 +++-
 tests/resources/json/commons-ip-invalid.json | 79 ++++++++++++++++++++
 tests/rules_test.py                          |  8 ++
 4 files changed, 119 insertions(+), 19 deletions(-)
 create mode 100644 tests/resources/json/commons-ip-invalid.json

diff --git a/eark_validator/model/package_details.py b/eark_validator/model/package_details.py
index 23ca48a..84aaeb8 100644
--- a/eark_validator/model/package_details.py
+++ b/eark_validator/model/package_details.py
@@ -43,6 +43,25 @@ class PackageDetails(BaseModel):
     contentinformationtype: str = ''
     checksums: List[Checksum] = []
 
+    # Validator to add a hyphen to the SHA checksum algorithm IDs generated by commons-ip
+    @model_validator(mode='before')
+    @classmethod
+    def convert_checksum_ids(cls, data: Any) -> Any:
+        incoming_checksums = data.get('checksums', [])
+        if isinstance(incoming_checksums, list):
+            # If the details are a dict type then it's a commons-ip set
+            checksums : list[Checksum] = []
+            # Loop through the checksums
+            for checksum in incoming_checksums:
+                alg_name = checksum.get('algorithm')
+                if alg_name and alg_name.startswith('SHA') and '-' not in alg_name:
+                    # If it's a SHA checksum alg ID without a hyphen, add one
+                    alg_name = f'{alg_name[:3]}-{alg_name[3:]}'
+                checksums.append(Checksum(algorithm=alg_name, value=checksum.get('value')))
+            data['checksums'] = checksums
+        # Return the reps for further validation.
+        return data
+
 class Representation(BaseModel):
     mets: Optional[MetsFile] = None
     name: Optional[str] = ''
@@ -52,9 +71,10 @@ class InformationPackage(BaseModel):
     details: Optional[PackageDetails] = None
     representations: List[Representation] = []
 
+    # Validator to convert the commons-ip representations dict to a list of representations
     @model_validator(mode='before')
     @classmethod
-    def convert_representations_dict(cls, data: Any) -> list[Representation]:
+    def convert_representations_dict(cls, data: Any) -> Any:
         representations = data.get('representations')
         if isinstance(representations, dict):
             # If this is a dict type then it's a commons-ip type, coerce to list
@@ -64,19 +84,3 @@ def convert_representations_dict(cls, data: Any) -> list[Representation]:
             data['representations'] = reps
         # Return the reps for further validation.
         return data
-
-    @model_validator(mode='before')
-    @classmethod
-    def convert_checksum_ids(cls, data: Any) -> list[Representation]:
-        details = data.get('details', {})
-        if isinstance(details, dict):
-            incoming_checksums = details.get('checksums', [])
-            checksums : list[Checksum] = []
-            for checksum in incoming_checksums:
-                alg_name = checksum.get('algorithm')
-                if alg_name and alg_name.startswith('SHA') and '-' not in alg_name:
-                    alg_name = f'{alg_name[:3]}-{alg_name[3:]}'
-                checksums.append(Checksum(algorithm=alg_name, value=checksum.get('value')))
-            data['details']['checksums'] = checksums
-        # Return the reps for further validation.
-        return data
diff --git a/eark_validator/model/validation_report.py b/eark_validator/model/validation_report.py
index 05aa2a2..5ff102d 100644
--- a/eark_validator/model/validation_report.py
+++ b/eark_validator/model/validation_report.py
@@ -30,10 +30,10 @@
 """
 
 from enum import Enum, unique
-from typing import List, Optional
+from typing import Any, List, Optional
 import uuid
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, model_validator
 
 from .package_details import InformationPackage
 from .specifications import Level
@@ -123,6 +123,15 @@ class MetadataResults(BaseModel):
     status: MetadataStatus = MetadataStatus.UNKNOWN
     messages: List[Result] = []
 
+    # Validator to convert commons-ip status from NOTVALID to INVALID
+    @model_validator(mode='before')
+    @classmethod
+    def convert_status(cls, data: Any) -> Any:
+        status = data.get('status')
+        if status and status == 'NOTVALID':
+            data['status'] = 'INVALID'
+        return data
+
 class MetatdataResultSet(BaseModel):
     model_config = ConfigDict(populate_by_name=True)
     schema_results: MetadataResults = Field(validation_alias='schemaResults')
diff --git a/tests/resources/json/commons-ip-invalid.json b/tests/resources/json/commons-ip-invalid.json
new file mode 100644
index 0000000..f60419d
--- /dev/null
+++ b/tests/resources/json/commons-ip-invalid.json
@@ -0,0 +1,79 @@
+{
+  "package" : {
+    "details" : {
+      "name" : "minimal_IP_with_schemas.zip",
+      "checksums" : [ {
+        "algorithm" : "SHA1",
+        "value" : "54BBE654FE332B51569BAF21338BC811CAD2AF66"
+      } ]
+    },
+    "profile" : {
+      "type" : "CSIP",
+      "name" : "minimal_IP_with_schemas.zip",
+      "version" : "CSIP-"
+    },
+    "representations" : {
+      "name" : "minimal_IP_with_schemas.zip"
+    }
+  },
+  "uid" : "e11da9f2-e59e-4f23-b7f8-5ac86632231f",
+  "structure" : {
+    "status" : "WellFormed",
+    "messages" : [ {
+      "ruleId" : "CSIPSTR7",
+      "location" : "",
+      "message" : "If descriptive metadata are available should include inside metadata/descriptive ",
+      "severity" : "Warn"
+    }, {
+      "ruleId" : "CSIPSTR8",
+      "location" : "",
+      "message" : "If any other metadata are available, they MAY be included in separate sub-folders, for example an additional folder named other. ",
+      "severity" : "Info"
+    }, {
+      "ruleId" : "CSIPSTR12",
+      "location" : "",
+      "message" : "The recommended best practice is to always have a METS.xml in the representation folder. ",
+      "severity" : "Warn"
+    }, {
+      "ruleId" : "CSIPSTR13",
+      "location" : "",
+      "message" : "The representation folder SHOULD include a sub-folder named metadata which MAY include all metadata about the specific representation. ",
+      "severity" : "Warn"
+    }, {
+      "ruleId" : "CSIPSTR16",
+      "location" : "",
+      "message" : "There is no documentation folder in the representation folder minimal_IP_with_schemas/representations/rep1. ",
+      "severity" : "Warn"
+    } ]
+  },
+  "metadata" : {
+    "schemaResults" : {
+      "status" : "VALID",
+      "messages" : [ ]
+    },
+    "schematronResults" : {
+      "status" : "NOTVALID",
+      "messages" : [ {
+        "ruleId" : "CSIP31",
+        "location" : "mets/amdSec",
+        "message" : "You have administrative files in the metadata/folder, you must have mets/amdSec in Root METS.xml ",
+        "severity" : "Warn"
+      }, {
+        "ruleId" : "CSIP45",
+        "location" : "mets/amdSec/rightsMD",
+        "message" : "Individual representations should state their specific rights in their representation METS file (Root METS.xml) ",
+        "severity" : "Info"
+      }, {
+        "ruleId" : "CSIP66",
+        "location" : "mets/fileSec/fileGrp/file",
+        "message" : "You have files in SIP that are not referenced in Root METS.xml ",
+        "severity" : "Error"
+      }, {
+        "ruleId" : "CSIP80",
+        "location" : "mets/structMap",
+        "message" : "Must have one structMap with the mets/structMap[@LABEL='CSIP'] in Root METS.xml doens't appear mets/structMap[@LABEL='CSIP']. ",
+        "severity" : "Error"
+      } ]
+    }
+  }
+}
diff --git a/tests/rules_test.py b/tests/rules_test.py
index 3d3f852..0de3b04 100644
--- a/tests/rules_test.py
+++ b/tests/rules_test.py
@@ -41,6 +41,7 @@
 TEST_RES_XML = TEST_RES + '.xml'
 TEST_RES_JSON = TEST_RES + '.json'
 COMMONS_IP_JSON = str(files(JSON).joinpath('commons-ip-report.json'))
+COMMONS_IP_INVALID_JSON = str(files(JSON).joinpath('commons-ip-invalid.json'))
 PERSON_PATH = str(files(SCHEMATRON).joinpath('person.xml'))
 NOT_FOUND_PATH = str(files(SCHEMATRON).joinpath('not-found.xml'))
 EMPTY_FILE_PATH = str(files(TEST_RES).joinpath('empty.file'))
@@ -235,6 +236,13 @@ def test_deserialise_commons_ip_report(self):
         result: ValidationReport = ValidationReport.model_validate_json(contents)
         self.assertIsNotNone(result)
 
+    def test_deserialise_commons_ip_invalid(self):
+        file_name = COMMONS_IP_INVALID_JSON
+        with open(file_name, 'r', encoding='utf-8') as _f:
+            contents = _f.read()
+        result: ValidationReport = ValidationReport.model_validate_json(contents)
+        self.assertIsNotNone(result)
+
 class SeverityTest(str, Enum):
     NOT_SEV = 'NOT_SEV'
 

From 1c65fe9d452a42c9f675479292932d540ebbe64f Mon Sep 17 00:00:00 2001
From: Carl Wilson <carl@openpreservation.org>
Date: Thu, 29 Aug 2024 13:34:21 +0100
Subject: [PATCH 7/7] REV: Tidier use of ROOT in structure.py

As suggested in this [review comment](https://github.com/E-ARK-Software/eark-validator/pull/60#discussion_r1735205478), thanks.
---
 eark_validator/structure.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/eark_validator/structure.py b/eark_validator/structure.py
index 04e4f6b..0d78d59 100644
--- a/eark_validator/structure.py
+++ b/eark_validator/structure.py
@@ -39,7 +39,7 @@
 
 METS_NAME = 'METS.xml'
 STR_REQ_PREFIX = 'CSIPSTR'
-ROOT = 'root '
+ROOT = 'root'
 DIR_NAMES = {
     'DATA': 'data',
     'DESC': 'descriptive',
@@ -154,7 +154,7 @@ def get_representations(self) -> List[Representation]:
 
     def get_root_results(self) -> List[Result]:
         results: List[Result] = []
-        location: str = ROOT + self.name
+        location: str = _root_loc(self.name)
         if not self.parser.is_archive:
             results.append(test_result_from_id(3, location))
         if not self.parser.has_mets():
@@ -195,13 +195,13 @@ def _get_schema_results(self) -> Optional[Result]:
         for tests in self.representations.values():
             if tests.has_schemas():
                 return None
-        return test_result_from_id(15, ROOT + self.name)
+        return test_result_from_id(15, _root_loc(self.name))
 
     def _get_dox_results(self) -> Optional[Result]:
         for tests in self.representations.values():
             if tests.has_documentation():
                 return None
-        return test_result_from_id(16, ROOT + self.name)
+        return test_result_from_id(16, _root_loc(self.name))
 
     @classmethod
     def get_status(cls, results: List[Result]) -> StructureStatus:
@@ -240,7 +240,10 @@ def get_bad_path_results(path) -> StructResults:
         })
 
 def _get_str1_result_list(name: str) -> List[Result]:
-    return [ test_result_from_id(1, ROOT + str(name)) ]
+    return [ test_result_from_id(1, _root_loc(name)) ]
+
+def _root_loc(name: str) -> str:
+    return f'{ROOT} {name}'
 
 def validate(to_validate) -> Tuple[bool, StructResults]:
     try: