diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 7e14347..691235f 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -20,6 +20,8 @@ jobs: steps: - uses: actions/checkout@v4.1.1 + with: + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v4.7.1 with: @@ -27,6 +29,24 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + pip install ".[testing]" + - name: Install python package + run: | + pip install --editable ".[testing]" + - name: Static Pylint code QA + run: | + pylint --errors-only eark_validator + - name: Run pre-commit tests + run: pre-commit run --all-files --verbose + - name: Test with pytest + run: | + pytest + - name: Test setuptools-git-versioning versioning + run: | + python -m pip install setuptools_git_versioning + python -m setuptools_git_versioning + - name: Install build utils + run: | pip install build - name: Build package run: python -m build diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 0dff4bd..b1a45a1 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -20,13 +20,16 @@ jobs: build: runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["pypy3.10", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/README.md b/README.md index c48c45d..c98cbdb 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,10 @@ website of the Digital Information LifeCycle Interoperability Standards Board (D ### Pre-requisites -You must be running either a Debian/Ubuntu Linux distribution or Windows Subsystem for Linux on Windows to follow these commands. +Python 3.10 or later is required to run the E-ARK Python Information Package Validator. +You must be running either a Debian/Ubuntu Linux distribution or Windows Subsystem for Linux on Windows to follow these commands. If you are running a different Linux distribution you must change the apt commands to your package manager. - For getting Windows Subsystem for Linux up and running, please follow the guide further down and then come back to this step. ### Getting up and running with the E-ARK Python Information Package Validator @@ -88,7 +88,7 @@ pip install -U pip pip install . ``` -You are now able to run the application "ip-check". It will validate an Information Package for you. +You are now able to run the application "eark-validator". It will validate an Information Package for you. #### Testing a valid package. @@ -111,10 +111,10 @@ Delete the .zip-file you just downloaded: rm mets-xml_metsHdr_agent_TYPE_exist.zip ``` -Run the ip-check: +Run the eark-validator: ```shell -ip-check mets-xml_metsHdr_agent_TYPE_exist/ +eark-validator mets-xml_metsHdr_agent_TYPE_exist/ ``` Result: @@ -146,7 +146,7 @@ user@machine:~$ tree input If you do not have Linux and have not previously used WSL please perform the following steps. You must either be logged in as Administrator on the machine or as a user with Administrator rights on the machine. -Start er command prompt (cmd.exe) and then enter the following command: +Start a command prompt (cmd.exe) and then enter the following command: ```shell wsl --install @@ -199,4 +199,4 @@ pip install --editable ".[testing]" ### Running tests -You can run unit tests from the project root: `pytest ./tests/`, or generate test coverage figures by: `pytest --cov=ip_validation ./tests/`. If you want to see which parts of your code aren't tested then: `pytest --cov=ip_validation --cov-report=html ./tests/`. After this you can open the file [`/htmlcov/index.html`](./htmlcov/index.html) in your browser and survey the gory details. +You can run unit tests from the project root: `pytest ./tests/`, or generate test coverage figures by: `pytest --cov=eark_validator ./tests/`. If you want to see which parts of your code aren't tested then: `pytest --cov=eark_validator --cov-report=html ./tests/`. After this you can open the file [`/htmlcov/index.html`](./htmlcov/index.html) in your browser and survey the gory details. diff --git a/VERSION b/VERSION deleted file mode 100644 index 0d91a54..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.3.0 diff --git a/eark_validator/__init__.py b/eark_validator/__init__.py index ebc8f9e..c9c1932 100644 --- a/eark_validator/__init__.py +++ b/eark_validator/__init__.py @@ -26,5 +26,3 @@ E-ARK : Python information package validation """ - -__version__ = '1.1.1' diff --git a/eark_validator/cli/app.py b/eark_validator/cli/app.py index 9601589..11a3c40 100644 --- a/eark_validator/cli/app.py +++ b/eark_validator/cli/app.py @@ -26,30 +26,39 @@ E-ARK : Information package validation Command line validation application """ -import argparse -from pprint import pprint +import json import os.path +from pathlib import Path import sys +from typing import Optional, Tuple +import importlib.metadata + +import argparse -import eark_validator.structure as STRUCT +from eark_validator.model import ValidationReport +import eark_validator.packages as PACKAGES +from eark_validator.infopacks.package_handler import PackageHandler +from eark_validator.specifications.specification import SpecificationVersion -__version__ = '0.1.0' +__version__ = importlib.metadata.version('eark_validator') defaults = { - 'description': """E-ARK Information Package validation (ip-check). -ip-check is a command-line tool to analyse and validate the structure and + 'description': """E-ARK Information Package validation (eark-validator). +eark-validator is a command-line tool to analyse and validate the structure and metadata against the E-ARK Information Package specifications. It is designed for simple integration into automated work-flows.""", 'epilog': """ DILCIS Board (http://dilcis.eu) See LICENSE for license information. -GitHub: https://github.com/E-ARK-Software/py-rest-ip-validator -Author: Carl Wilson (OPF), 2020-2023 -Maintainer: Carl Wilson (OPF), 2020-2023""" +GitHub: https://github.com/E-ARK-Software/eark-validator +Author: Carl Wilson (OPF), 2020-2024 +Maintainer: Carl Wilson (OPF), 2020-2024""" } # Create PARSER -PARSER = argparse.ArgumentParser(description=defaults['description'], epilog=defaults['epilog']) +PARSER = argparse.ArgumentParser(prog='eark-validator', + description=defaults['description'], + epilog=defaults['epilog']) def parse_command_line(): """Parse command line arguments.""" @@ -57,18 +66,35 @@ def parse_command_line(): PARSER.add_argument('-r', '--recurse', action='store_true', dest='inputRecursiveFlag', - default=True, + default=False, help='When analysing an information package recurse into representations.') PARSER.add_argument('-c', '--checksum', action='store_true', dest='inputChecksumFlag', default=False, - help='Calculate and verify file checksums in packages.') + help='Calculate and verify package checksums.') + PARSER.add_argument('-m', '--manifest', + action='store_true', + dest='inputManifestFlag', + default=False, + help='Display package manifest information.') PARSER.add_argument('-v', '--verbose', action='store_true', dest='outputVerboseFlag', default=False, - help='report results in verbose format') + help='Verbose reporting for selected output options.') + PARSER.add_argument('--schema', + action='store_true', + dest='output_schema', + default=False, + help='Request display of the JSON schema of the output report.') + PARSER.add_argument('-s', '--specification_version', + nargs='?', + dest='specification_version', + default=SpecificationVersion.V2_1_0, + type=SpecificationVersion, + choices=list(SpecificationVersion), + help='Specification version used for validation. Default is %(default)s.') PARSER.add_argument('--version', action='version', version=__version__) @@ -89,37 +115,48 @@ def main(): # Get input from command line args = parse_command_line() # If no target files or folders specified then print usage and exit - if not args.files: + if _is_show_help(args): PARSER.print_help() + if args.output_schema: + print(json.dumps(ValidationReport.model_json_schema(), indent=2)) + sys.exit(0) + # Iterate the file arguments for file_arg in args.files: - _loop_exit, _ = _validate_ip(file_arg) + _loop_exit, _ = _validate_ip(file_arg, args.specification_version) _exit = _loop_exit if (_loop_exit > 0) else _exit sys.exit(_exit) -def _validate_ip(info_pack): - ret_stat = _check_path(info_pack) - struct_details = STRUCT.validate_package_structure(info_pack) - pprint('Path {}, struct result is: {}'.format(info_pack, - struct_details.status)) - for error in struct_details.errors: - pprint(error.to_json()) +def _validate_ip(path: str, version: SpecificationVersion) -> Tuple[int, Optional[ValidationReport]]: + ret_stat, checked_path = _check_path(path) + if ret_stat > 0: + return ret_stat, None + report = PACKAGES.PackageValidator(checked_path, version).validation_report + print(f'Path {checked_path}, struct result is: {report.structure.status.value}') + # for message in report.structure.messages: + print(report.model_dump_json()) - return ret_stat, struct_details + return ret_stat, report -def _check_path(path): +def _check_path(path: str) -> Tuple[int, Optional[Path]]: if not os.path.exists(path): # Skip files that don't exist - pprint('Path {} does not exist'.format(path)) - return 1 + print(_format_check_path_message(path, 'does not exist')) + return 1, None if os.path.isfile(path): # Check if file is a archive format - if not STRUCT.ArchivePackageHandler.is_archive(path): + if not PackageHandler.is_archive(path): # If not we can't process so report and iterate - pprint('Path {} is not a file we can process.'.format(path)) - return 2 - return 0 + print(_format_check_path_message(path, 'is not an archive file or directory')) + return 2, None + return 0, Path(path) + +def _format_check_path_message(path: Path, message: str) -> str: + return f'Processing terminated, path: {path} {message}.' + +def _is_show_help(args) -> bool: + return not args.files and not args.output_schema # def _test_case_schema_checks(): if __name__ == '__main__': diff --git a/eark_validator/const.py b/eark_validator/const.py index cd674b5..a9c9169 100644 --- a/eark_validator/const.py +++ b/eark_validator/const.py @@ -28,7 +28,7 @@ E-ARK (https://e-ark4all.eu/) Open Preservation Foundation (http://www.openpreservation.org) See LICENSE for license information. -Author: Carl Wilson (OPF), 2016-17 +Author: Carl Wilson (OPF), 2016-24 This work was funded by the European commission project funded as grant number LC-01390244 CEF-TC-2019-3 E-ARK3 under CONNECTING EUROPE FACILITY (CEF) - TELECOMMUNICATIONS SECTOR diff --git a/eark_validator/infopacks/information_package.py b/eark_validator/infopacks/information_package.py index 561c087..b88e247 100644 --- a/eark_validator/infopacks/information_package.py +++ b/eark_validator/infopacks/information_package.py @@ -23,115 +23,82 @@ # under the License. # """Module covering information package structure validation and navigation.""" -import os +from pathlib import Path from lxml import etree from eark_validator.const import NO_PATH, NOT_FILE, NOT_VALID_FILE +from eark_validator.mets import MetsFiles, MetsFile from eark_validator.ipxml.namespaces import Namespaces -from eark_validator.infopacks.manifest import Manifest - -class PackageDetails: - - def __init__( - self: str, - objid: str, - label: str, - type: str, - othertype: str, - contentinformationtype: str, - profile: str, - oaispackagetype: str, - ns: str): - self._objid = objid - self._label = label - self._type = type - self._othertype = othertype - self._contentinformationtype = contentinformationtype - self._profile = profile - self._oaispackagetype = oaispackagetype - self._ns = ns - - @property - def objid(self) -> str: - return self._objid - - @property - def label(self) -> str: - return self._label - - @property - def type(self) -> str: - return self._type - - @property - def othertype(self) -> str: - return self._othertype - - @property - def contentinformationtype(self) -> str: - return self._contentinformationtype - - @property - def profile(self) -> str: - return self._profile - - @property - def oaispackagetype(self) -> str: - return self._oaispackagetype - - @property - def namespaces(self) -> str: - return self._ns - - @classmethod - def from_mets_file(cls, mets_file: str) -> 'PackageDetails': - if (not os.path.exists(mets_file)): +from eark_validator.model import PackageDetails +from eark_validator.model.package_details import InformationPackage +from eark_validator.model.validation_report import Result +from .package_handler import PackageHandler + +CONTENTINFORMATIONTYPE = 'contentinformationtype' +QUAL_CONTENTINFORMATIONTYPE = Namespaces.CSIP.qualify(CONTENTINFORMATIONTYPE.upper()) +QUAL_OTHERTYPE = Namespaces.CSIP.qualify('OTHERTYPE') +QUAL_OAISPACKAGETYPE = Namespaces.CSIP.qualify('OAISPACKAGETYPE') +METS = 'mets' +METS_FILE = 'METS.xml' +QUAL_METS = Namespaces.METS.qualify(METS) +QUAL_METSHDR = Namespaces.METS.qualify('metsHdr') + +class InformationPackages: + + @staticmethod + def details_from_mets_file(mets_file: Path) -> PackageDetails: + if not mets_file.exists(): raise FileNotFoundError(NO_PATH.format(mets_file)) - if (not os.path.isfile(mets_file)): + if not mets_file.is_file(): raise ValueError(NOT_FILE.format(mets_file)) ns = {} - objid = label = ptype = othertype = contentinformationtype = profile = oaispackagetype = '' + label = othertype = contentinformationtype = oaispackagetype = '' try: parsed_mets = etree.iterparse(mets_file, events=['start', 'start-ns']) for event, element in parsed_mets: if event == 'start-ns': - prefix = element[0] - ns_uri = element[1] - ns[prefix] = ns_uri + # Add namespace id to the dictionary + ns[element[1]] = element[0] if event == 'start': - if element.tag == Namespaces.METS.qualify('mets'): - objid = element.get('OBJID', '') + if element.tag == QUAL_METS: label = element.get('LABEL', '') - ptype = element.get('TYPE', '') - othertype = element.get(Namespaces.CSIP.qualify('OTHERTYPE'), '') - contentinformationtype = element.get(Namespaces.CSIP.qualify('CONTENTINFORMATIONTYPE'), '') - profile = element.get('PROFILE', '') - oaispackagetype = element.find(Namespaces.METS.qualify('metsHdr')).get(Namespaces.CSIP.qualify('OAISPACKAGETYPE'), '') - elif element.tag == Namespaces.METS.qualify('metsHdr'): + othertype = element.get(QUAL_OTHERTYPE, '') + contentinformationtype = element.get(QUAL_CONTENTINFORMATIONTYPE, '') + oaispackagetype = element.find(QUAL_METSHDR).get(QUAL_OAISPACKAGETYPE, '') + else: break - except etree.XMLSyntaxError: - raise ValueError(NOT_VALID_FILE.format(mets_file, 'XML')) - return cls(objid, label, ptype, othertype, contentinformationtype, profile, oaispackagetype, ns) - - -class InformationPackage: - """Stores the vital facts and figures about a package.""" - def __init__(self, path: str, details: PackageDetails, manifest: Manifest=None): - self._path = path - self._details = details - self._manifest = manifest if manifest else Manifest.from_directory(path) - - @property - def path(self) -> str: - """Get the specification of the package.""" - return self._path - - @property - def details(self) -> PackageDetails: - """Get the package details.""" - return self._details - - @property - def manifest(self) -> Manifest: - """Return the package manifest.""" - return self._manifest + except (etree.XMLSyntaxError, AttributeError) as ex: + raise ValueError(NOT_VALID_FILE.format(mets_file, 'XML')) from ex + return PackageDetails.model_validate({ + 'name': mets_file.parent.stem, + 'label': label, + 'othertype': othertype, + CONTENTINFORMATIONTYPE: contentinformationtype, + 'oaispackagetype': oaispackagetype + }) + + @staticmethod + def from_path(package_path: Path) -> InformationPackage: + if not package_path.exists(): + raise FileNotFoundError(NO_PATH.format(package_path)) + handler: PackageHandler = PackageHandler() + to_parse:Path = handler.prepare_package(package_path) + mets_path: Path = to_parse.joinpath(METS_FILE) + if not mets_path.is_file(): + raise ValueError('No METS file found in package') + mets: MetsFile = MetsFiles.from_file(to_parse.joinpath(METS_FILE)) + return InformationPackage.model_validate({ + METS: mets, + 'details': InformationPackages.details_from_mets_file(to_parse.joinpath(METS_FILE)) + }) + + @staticmethod + def validate(package_path: Path) -> Result: + if not package_path.exists(): + raise FileNotFoundError(NO_PATH.format(package_path)) + handler: PackageHandler = PackageHandler() + to_parse:Path = handler.prepare_package(package_path) + mets_path: Path = to_parse.joinpath(METS_FILE) + if not mets_path.is_file(): + raise ValueError('No METS file found in package') + return True diff --git a/eark_validator/infopacks/manifest.py b/eark_validator/infopacks/manifest.py index 164ae67..7e695c8 100644 --- a/eark_validator/infopacks/manifest.py +++ b/eark_validator/infopacks/manifest.py @@ -23,242 +23,183 @@ # under the License. # """Information Package manifests.""" -from enum import Enum, unique -import hashlib import os +import pickle +from pathlib import Path +from typing import Optional -import lxml.etree as ET - -from eark_validator.ipxml.schema import Namespaces from eark_validator.const import NO_PATH, NOT_DIR, NOT_FILE -@unique -class HashAlgorithms(Enum): - """Enum covering information package validation statuses.""" - MD5 = 'MD5' - SHA1 = 'SHA-1' - SHA256 = 'SHA-256' - SHA384 = 'SHA-384' - SHA512 = 'SHA-512' +from eark_validator.mets import MetsFiles +from eark_validator.model import Checksum, ChecksumAlg, Manifest, ManifestEntry +from eark_validator.model.manifest import SourceType +from eark_validator.model.metadata import FileEntry +from eark_validator.utils import get_path - def hash_file(self, path: str) -> 'Checksum': - if (not os.path.exists(path)): - raise FileNotFoundError(NO_PATH.format(path)) - if (not os.path.isfile(path)): - raise ValueError(NOT_FILE.format(path)) - implemenation = self.get_implementation(self) - with open(path, 'rb') as file: - for chunk in iter(lambda: file.read(4096), b''): - implemenation.update(chunk) - return Checksum(self, implemenation.hexdigest()) - - @classmethod - def from_string(cls, value: str) -> 'HashAlgorithms': - search_value = value.upper() if hasattr(value, 'upper') else value - for algorithm in cls: - if (algorithm.value == search_value) or (algorithm.name == search_value) or (algorithm == value): - return algorithm - return None - @classmethod - def get_implementation(cls, algorithm: 'HashAlgorithms'): - if algorithm not in cls: - algorithm = cls.from_string(algorithm) - if algorithm is None: - raise ValueError('Algorithm {} not supported.'.format(algorithm)) - algorithms = { - cls.MD5: hashlib.md5(), - cls.SHA1: hashlib.sha1(), - cls.SHA256: hashlib.sha256(), - cls.SHA384: hashlib.sha384(), - cls.SHA512: hashlib.sha512() - } - return algorithms.get(algorithm) - - -class Checksum: - def __init__(self, algorithm: HashAlgorithms, value: str): - self._algorithm = algorithm - self._value = value.lower() +class Checksummer: + def __init__(self, algorithm: ChecksumAlg | str): + if isinstance(algorithm, ChecksumAlg): + self._algorithm: ChecksumAlg = algorithm + else: + self._algorithm: ChecksumAlg = ChecksumAlg.from_string(algorithm) @property - def algorithm(self) -> HashAlgorithms: - """Get the algorithm.""" + def algorithm(self) -> ChecksumAlg: + """Return the checksum algorithm used by this checksummer.""" return self._algorithm - @property - def value(self) -> str: - """Get the value.""" - return self._value - - def is_value(self, value: 'Checksum') -> bool: - """Check if the checksum value is equal to the given value.""" - if isinstance(value, Checksum): - return (self._value == value.value) and (self._algorithm == value.algorithm) - return self._value == value.lower() - - @classmethod - def from_mets_element(cls, element: ET.Element) -> 'Checksum': - """Create a Checksum from an etree element.""" - # Get the child flocat element and grab the href attribute. - algorithm = HashAlgorithms.from_string(element.attrib['CHECKSUMTYPE']) - value = element.attrib['CHECKSUM'] - return cls(algorithm, value) - - @classmethod - def from_file(cls, path: str, algorithm: 'Checksum') -> 'Checksum': - """Create a Checksum from an etree element.""" - # Get the child flocat element and grab the href attribute. - algorithm = HashAlgorithms.from_string(algorithm) - return algorithm.hash_file(path) + def hash_file(self, path: Path) -> 'Checksum': + """Calculate the checksum of a file. + Args: + path (Path): A path to a file to checksum. -class FileItem: - def __init__(self, path: str, size: int, checksum: Checksum, mime: str): - self._path = path - self._size = size - self._checksum = checksum - self._mime = mime - - @property - def path(self) -> str: - """Get the path.""" - return self._path - - @property - def name(self) -> str: - """Get the name.""" - return os.path.basename(self._path) + Raises: + FileNotFoundError: If the path parameter is found. + ValueError: If the path parameter resolves to a directory. - @property - def size(self) -> int: - """Get the size.""" - return self._size - - @property - def checksum(self) -> Checksum: - """Get the checksum value.""" - return self._checksum - - @property - def mime(self) -> str: - """Get the mime type.""" - return self._mime - - @classmethod - def path_from_file_element(cls, element: ET.Element) -> str: - return element.find(Namespaces.METS.qualify('FLocat'), namespaces=element.nsmap).attrib[Namespaces.XLINK.qualify('href')] if hasattr(element, 'nsmap') else element.find('FLocat').attrib['href'] + Returns: + Checksum: A Checksum object containing the Hexadecimal digest of the file. + """ + if not path.exists(): + raise FileNotFoundError(NO_PATH.format(path)) + if not path.is_file(): + raise ValueError(NOT_FILE.format(path)) + implemenation: ChecksumAlg = ChecksumAlg.get_implementation(self._algorithm) + with open(path, 'rb') as file: + for chunk in iter(lambda: file.read(4096), b''): + implemenation.update(chunk) + return Checksum.model_validate({ + 'algorithm': self._algorithm, + 'value': implemenation.hexdigest() + }, strict=True + ) @classmethod - def path_from_mdref_element(cls, element: ET.Element) -> 'FileItem': - """Create a FileItem from a METS:mdRef etree element.""" + def from_file(cls, path: Path, algorithm: 'ChecksumAlg') -> 'Checksum': + """Create a Checksum from an etree element.""" # Get the child flocat element and grab the href attribute. - return element.attrib[Namespaces.XLINK.qualify('href')] if hasattr(element, 'nsmap') else element.find('FLocat').attrib['href'] + return Checksummer(algorithm).hash_file(path) - @classmethod - def from_element(cls, element: ET.Element) -> 'FileItem': - """Create a FileItem from an etree element.""" - path = '' - if element.tag in [Namespaces.METS.qualify('file'), 'file']: - path = cls.path_from_file_element(element) - elif element.tag in [Namespaces.METS.qualify('mdRef'), 'mdRef']: - path = cls.path_from_mdref_element(element) +class ManifestEntries: + @staticmethod + def from_file_path(root: Path, entry_path: Path, + checksum_algorithm: ChecksumAlg | str=None) -> ManifestEntry: + """Create a FileItem from a file path.""" + abs_path: Path = root.joinpath(entry_path).absolute() + if not os.path.exists(abs_path): + raise FileNotFoundError(NO_PATH.format(abs_path)) + if not os.path.isfile(abs_path): + raise ValueError(f'Path {abs_path} is not a file.') + if isinstance(checksum_algorithm, ChecksumAlg): + algorithm: ChecksumAlg = checksum_algorithm else: - raise ValueError('Element {} is not a METS:file or METS:mdRef element.'.format(element.tag)) - size = int(element.attrib['SIZE']) - mime = element.attrib['MIMETYPE'] - checksum = Checksum.from_mets_element(element) - return cls(path, size, checksum, mime) + algorithm: ChecksumAlg = ChecksumAlg.from_string(checksum_algorithm) + checksums = [ Checksummer.from_file(abs_path, algorithm) ] if checksum_algorithm else [] + return ManifestEntry.model_validate({ + 'path': entry_path, + 'size': os.path.getsize(abs_path), + 'checksums': checksums + }) + @staticmethod + def from_file_entry(entry: FileEntry) -> ManifestEntry: + """Create a FileItem from a FileEntry.""" + return ManifestEntry.model_validate({ + 'path': entry.path, + 'size': entry.size, + 'checksums': [ entry.checksum ] + }) + +class Manifests: @classmethod - def from_file_path(cls, path: str, mime:str=None, checksum_algorithm:HashAlgorithms=None) -> 'FileItem': - """Create a FileItem from a file path.""" - if (not os.path.exists(path)): - raise FileNotFoundError(NO_PATH.format(path)) - if (not os.path.isfile(path)): - raise ValueError('Path {} is not a file.'.format(path)) - size = os.path.getsize(path) - mimetype = mime or 'application/octet-stream' - checksum = Checksum.from_file(path, checksum_algorithm) if checksum_algorithm else None - return cls(path, size, checksum, mimetype) - -class Manifest: - def __init__(self, root_path: str, file_items: dict[str, FileItem] or list[FileItem] = None): - if (not os.path.exists(root_path)): - raise FileNotFoundError(NO_PATH.format(root_path)) - if (not os.path.isdir(root_path)): - raise ValueError(NOT_DIR.format(root_path)) - self._root_path = root_path - self._file_items = file_items if isinstance(file_items, dict) else self._list_to_dict(root_path, file_items) - - @property - def root_path(self) -> str: - """Get the root path.""" - return self._root_path - - @property - def file_count(self) -> int: - """Get the number of files.""" - return len(self._file_items) - - @property - def size(self) -> int: - """Get the total file size in bytes.""" - return sum([item.size for item in self._file_items.values()]) - - @property - def items(self) -> dict[str, FileItem]: - """Get the file items.""" - return self._file_items - - def get_item(self, path: str) -> FileItem or None: - """Get a file item by path.""" - search_path = self._relative_path(self._root_path, path) - return self._file_items.get(search_path) - - def check_integrity(self) -> tuple[bool, list[str]]: + def validate_manifest(cls, manifest: Manifest, + alt_root: Optional[Path] = None) -> tuple[bool, list[str]]: """Check the integrity of the manifest.""" - is_valid = True - issues = [] - for item in self._file_items.values(): - abs_path = os.path.join(self._root_path, item.path) - if (not os.path.isfile(abs_path)): - is_valid = False - issues.append('File {} is missing.'.format(item.path)) + issues: list[str] = [] + root = alt_root if alt_root else _resolve_manifest_root(manifest) + for entry in manifest.entries: + abs_path = Path(os.path.join(root, entry.path)) + if not abs_path.is_file(): + issues.append(f'File {abs_path} is missing.') continue - if (item.size != os.path.getsize(abs_path)): - issues.append('File {} manifest size {}, filesystem size {}.'.format(item.path, item.size, os.path.getsize(abs_path))) - is_valid = False - calced_checksum = item.checksum.algorithm.hash_file(abs_path) - if (not item.checksum.is_value(calced_checksum)): - issues.append('File {} manifest checksum {}, calculated checksum {}.'.format(item.path, item.checksum, calced_checksum)) - is_valid = False - return is_valid, issues + if entry.size != os.path.getsize(abs_path): + size = os.path.getsize(abs_path) + issues.append(f'File {entry.path} manifest size {entry.size}, file size {size}.') + check_issues: list[str] = _test_checksums(abs_path, entry.checksums) + if not bool(check_issues): + issues.extend(check_issues) + return (not bool(issues)), issues @staticmethod - def _relative_path(root_path: str, path: str) -> str: - return path if not os.path.isabs(path) else os.path.relpath(path, root_path) + def from_source(source: Path | str, checksum_algorithm: ChecksumAlg=None) -> Manifest: + path = get_path(source, True) + if path.is_file(): + return Manifests.from_mets_file(path) + if path.is_dir(): + return Manifests.from_directory(path, checksum_algorithm=checksum_algorithm) + raise ValueError(f'Path {source} is neither a file nor a directory.') - @classmethod - def from_directory(cls, root_path: str, checksum_algorithm: HashAlgorithms=None) -> 'Manifest': - if (not os.path.exists(root_path)): - raise FileNotFoundError(NO_PATH.format(root_path)) - if (not os.path.isdir(root_path)): - raise ValueError(NOT_DIR.format(root_path)) - items = [] - for subdir, dirs, files in os.walk(root_path): - for file in files: - file_path = os.path.join(subdir, file) - items.append(FileItem.from_file_path(file_path, checksum_algorithm=checksum_algorithm)) - return cls(root_path, items) + @staticmethod + def to_file(manifest: Manifest, path: Path | str) -> None: + path = get_path(path, False) + with open(path, 'wb') as file: + pickle.dump(manifest, file) - @classmethod - def from_file_items(cls, root_path: str, file_items: dict[str, FileItem] or list[FileItem]) -> 'Manifest': - if (not os.path.exists(root_path)): - raise FileNotFoundError(NO_PATH.format(root_path)) - if (not os.path.isdir(root_path)): - raise ValueError(NOT_DIR.format(root_path)) - return cls(root_path, file_items) + @staticmethod + def from_file(path: Path | str) -> Manifest: + path = get_path(path, False) + with open(path, 'rb') as file: + return pickle.load(file) - @classmethod - def _list_to_dict(cls, root_path: str, file_items: list[FileItem]) -> dict[str, FileItem]: - return {cls._relative_path(root_path, item.path): FileItem(cls._relative_path(root_path, item.path), item.size, item.checksum, item.mime) for item in file_items} + @staticmethod + def from_directory(source: Path | str, checksum_algorithm: ChecksumAlg=None) -> Manifest: + path = get_path(source, True) + if not path.is_dir(): + raise ValueError(NOT_DIR.format(source)) + entries = [] + for subdir, _, files in os.walk(source): + for file in files: + root = Path(os.path.join(subdir, file)) + entry_path = root.relative_to(path) + entries.append( + ManifestEntries.from_file_path(path, + entry_path, + checksum_algorithm=checksum_algorithm)) + return Manifest.model_validate({ + 'root': path, + 'source': SourceType.PACKAGE, + 'summary': None, + 'entries': entries + }) + + @staticmethod + def from_mets_file(source: Path | str) -> Manifest: + path: Path = get_path(source, True) + if not path.is_file(): + raise ValueError(NOT_FILE.format(source)) + mets_file = MetsFiles.from_file(path) + entries: list[ManifestEntry] = list(map(ManifestEntries.from_file_entry, + mets_file.file_entries)) + return Manifest.model_validate({ + 'root': path, + 'source': SourceType.METS, + 'summary': None, + 'entries': entries + }) + +def _test_checksums(path: Path, checksums: list[Checksum]) -> list[str]: + issues: list[str] = [] + for checksum in checksums: + calced_checksum = Checksummer(checksum.algorithm).hash_file(path) + if not checksum == calced_checksum: + issues.append(f'File {path} manifest checksum {checksum.value},' + + f'calculated checksum {calced_checksum}.') + return issues + +def _resolve_manifest_root(manifest: Manifest) -> Path: + if manifest.source == SourceType.PACKAGE: + return manifest.root + if manifest.source == SourceType.METS: + return manifest.root.parent + raise ValueError(f'Unknown source type {manifest.source}') diff --git a/eark_validator/infopacks/package_handler.py b/eark_validator/infopacks/package_handler.py new file mode 100644 index 0000000..0230016 --- /dev/null +++ b/eark_validator/infopacks/package_handler.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +Factory methods for the package classes. +""" +import os +from pathlib import Path +import tarfile +import tempfile +import zipfile +from eark_validator.infopacks.manifest import Checksummer +SUB_MESS_NOT_EXIST = 'Path {} does not exist' +SUB_MESS_NOT_ARCH = 'Parameter "to_unpack": {} does not reference' + \ + 'a file of known archive format (zip or tar).' + +class PackageError(Exception): + """Exception used to mark validation error when unpacking archive.""" + +class PackageHandler(): + """Class to handle archive / compressed information packages.""" + def __init__(self, unpack_root: Path=Path(tempfile.gettempdir())): + self._unpack_root : Path = unpack_root + + @property + def unpack_root(self) -> Path: + """Returns the root directory for archive unpacking.""" + return self._unpack_root + + def prepare_package(self, to_prepare: Path, dest: Path=None) -> Path: + if not os.path.exists(to_prepare): + raise ValueError(SUB_MESS_NOT_EXIST.format(to_prepare)) + if os.path.isdir(to_prepare): + return to_prepare + return self.unpack_package(to_prepare, dest) + + def unpack_package(self, to_unpack: Path, dest: Path=None) -> Path: + """Unpack an archived package to a destination (defaults to tempdir). + returns the destination folder.""" + if not os.path.isfile(to_unpack) or not self.is_archive(to_unpack): + raise ValueError(SUB_MESS_NOT_ARCH.format(to_unpack)) + sha1 = Checksummer('SHA-1').hash_file(to_unpack) + dest_root = dest if dest else self.unpack_root + destination = os.path.join(dest_root, sha1.value) + self._unpack(to_unpack, destination) + + children = [] + for path in Path(destination).iterdir(): + children.append(path) + if len(children) != 1: + # Dir unpacks to more than a single folder + raise PackageError('Unpacking archive yields' + f'{len(children)} children.') + if not os.path.isdir(children[0]): + raise PackageError('Unpacking archive yields' + f'a single file child {children[0]}.') + return children[0].absolute() + + @staticmethod + def _unpack(to_unpack: Path, destination: Path) -> None: + if zipfile.is_zipfile(to_unpack): + with zipfile.ZipFile(to_unpack) as zip_ip: + zip_ip.extractall(path=destination) + elif tarfile.is_tarfile(to_unpack): + with tarfile.open(to_unpack) as tar_ip: + tar_ip.extractall(path=destination) + + @staticmethod + def is_archive(to_test: Path) -> bool: + """Return True if the file is a recognised archive type, False otherwise.""" + if os.path.isfile(to_test): + if zipfile.is_zipfile(to_test): + return True + return tarfile.is_tarfile(to_test) + return False diff --git a/eark_validator/ipxml/__init__.py b/eark_validator/ipxml/__init__.py index 5c09bad..4b7b0f4 100644 --- a/eark_validator/ipxml/__init__.py +++ b/eark_validator/ipxml/__init__.py @@ -23,9 +23,6 @@ # under the License. # """ -E-ARK : Information package validation - Information Package modules +E-ARK : Information Package Validation + Information Package XML module """ -from .resources import profiles as PROFILES -from .resources import schema as SCHEMA -from .resources import schematron as SCHEMATRON diff --git a/eark_validator/ipxml/namespaces.py b/eark_validator/ipxml/namespaces.py index 9a530d7..c7195d8 100644 --- a/eark_validator/ipxml/namespaces.py +++ b/eark_validator/ipxml/namespaces.py @@ -24,31 +24,30 @@ # """ E-ARK : Information package validation - Information Package modules + METS/E-ARK namespaces. """ from enum import Enum, unique -from lxml import etree -from importlib_resources import files @unique -class Namespaces(Enum): +class Namespaces(str, Enum): + """Enumeration of the common namespace prefixes and URIs used in METS and E-ARK.""" METS = 'http://www.loc.gov/METS/' CSIP = 'https://DILCIS.eu/XML/METS/CSIPExtensionMETS' SIP = 'https://DILCIS.eu/XML/METS/SIPExtensionMETS' XML = 'http://www.w3.org/XML/1998/namespace' XHTML = 'http://www.w3.org/1999/xhtml' XLINK = 'http://www.w3.org/1999/xlink' - PROFILE = 'http://www.loc.gov/METS_Profile/v2', + PROFILE = 'http://www.loc.gov/METS_Profile/v2' XSI = 'http://www.w3.org/2001/XMLSchema-instance' def __init__(self, value: str): - self._id = value - self._qualifier = '{{{}}}'.format(value) + self._uri = value + self._qualifier = f'{{{value}}}' self._prefix = self.name.lower() @property - def id(self) -> str: - return self._id + def uri(self) -> str: + return self._uri @property def prefix(self) -> str: @@ -62,18 +61,27 @@ def qualify(self, value: str) -> str: return _qualify(self.qualifier, value) @classmethod - def from_id(cls, id: str) -> 'Namespaces': + def from_uri(cls, uri: str) -> 'Namespaces': for namespace in cls: - if namespace.id == id: + if namespace.uri == uri: return namespace return cls.METS @classmethod def from_prefix(cls, prefix: str) -> 'Namespaces': + """Request a namespace instance by prefix. + + Args: + prefix (str): the prefix of the namespace to be returned. + + Returns: + Namespaces: The namespace instance with the given prefix. + """ + search: str = prefix.lower() if prefix else '' for namespace in cls: - if namespace.prefix == prefix.lower(): + if namespace.prefix == search: return namespace return cls.METS def _qualify(_ns: str, _v: str) -> str: - return '{}{}'.format(_ns, _v) + return f'{_ns}{_v}' diff --git a/eark_validator/ipxml/resources/__init__.py b/eark_validator/ipxml/resources/__init__.py index eebb199..94353b5 100644 --- a/eark_validator/ipxml/resources/__init__.py +++ b/eark_validator/ipxml/resources/__init__.py @@ -23,9 +23,6 @@ # under the License. # """ -E-ARK : Information package validation - Information Package modules +E-ARK : Information Package Validation + Information Package XML vocabularies """ -from . import profiles as PROFILES -from . import schema as SCHEMA -from . import schematron as SCHEMATRON diff --git a/eark_validator/ipxml/resources/profiles/E-ARK-CSIP.xml b/eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-CSIP.xml similarity index 100% rename from eark_validator/ipxml/resources/profiles/E-ARK-CSIP.xml rename to eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-CSIP.xml diff --git a/eark_validator/ipxml/resources/profiles/E-ARK-DIP.xml b/eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-DIP.xml similarity index 100% rename from eark_validator/ipxml/resources/profiles/E-ARK-DIP.xml rename to eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-DIP.xml diff --git a/eark_validator/ipxml/resources/profiles/E-ARK-SIP.xml b/eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-SIP.xml similarity index 100% rename from eark_validator/ipxml/resources/profiles/E-ARK-SIP.xml rename to eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-SIP.xml diff --git a/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-CSIP.xml b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-CSIP.xml new file mode 100644 index 0000000..5cdf14a --- /dev/null +++ b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-CSIP.xml @@ -0,0 +1,1729 @@ + + + + + https://earkcsip.dilcis.eu/profile/E-ARK-CSIP.xml + E-ARK CSIP METS Profile + This base profile describes the Common Specification for Information Packages (CSIP) and the implementation of METS for packaging OAIS conformant Information Packages. The profile is accompanied with a text document explaning the details of use of this profile. + This will enable repository interoperability and assist in the management of the preservation of digital content. + This profile is a base profile which is extended with E-ARK implementation of SIP, AIP and DIP. + The profile can be used as is, but it is recommended that the supplied extending implementation are used. Alternatively, an own extension fulfilling the extending needs of the implementer can be created. + 2021-10-01T09:00:00 + + DILCIS Board +
http://dilcis.eu/
+ info@dilcis.eu +
+ This profile has no related profiles + + + Principles for a package conforming to the Common Specification for Information Packages (CSIP) +

CSIP Principles

+
+
+ + E-ARK CSIP METS Extension + http://earkcsip.dilcis.eu/schema/DILCISExtensionMETS.xsd + XML-schema for the attributes added by CSIP + +

An extension schema with the added attributes for use in this profile.

+

The schema is identified using the namespace prefix csip.

+
+
+ + PREMIS + http://www.loc.gov/standards/premis/ + Used for preservation metadata + +

A rule set for use with this profile is under development.

+
+
+ +

The filepath must be decoded consistently throughout all file references within the information package.

+
+ + + Content information type specification + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyContentInformationType.xml + Values for `@csip:CONTENTINFORMATIONTYPE` + +

Lists the names of specific E-ARK content information type specifications supported or maintained in this METS profile.

+
+
+ + Content Category + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyContentCategory.xml + Values for `mets/@type` + +

Declares the categorical classification of package content.

+
+
+ + OAIS Package type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyOAISPackageType.xml + Values for `@csip:OAISPACKAGETYPE` + +

Describes the OAIS type the package belongs to in the OAIS reference model.

+
+
+ + Note type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyNoteType.xml + Values for `@csip:NOTETYPE` + +

Provides values for the type of a note for an agent.

+
+
+ + Other agent type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyAgentOtherType.xml + Values for `metsHdr/agent/@OTHERTYPE` + +

Describes the other agent types supported by the profile.

+
+
+ + Identifier type + Library of Congress + http://id.loc.gov/vocabulary/identifiers.html + Values for `metsHdr/altRecordID/@TYPE` + +

Describes the type of the identifier.

+
+
+ + dmdSec status + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyStatus.xml + Values for `dmdSec/@STATUS` + +

Describes the status of the descriptive metadata section (dmdSec) which is supported by the profile.

+
+
+ + IANA media types + IANAs + https://www.iana.org/assignments/media-types/media-types.xhtml + Values for `@MIMETYPE` + +

Valid values for the mime types of referenced files.

+
+
+ + File group names + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyFileGrpAndStructMapDivisionLabel.xml + Values for `fileGrp/@USE` + +

Describes the uses of the file group `<fileGrp>` that are supported by the profile.

+

Own names should be placed in an own extending vocabulary.

+
+
+ + Structural map typing + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyStructMapType.xml + Values for `structMap/@TYPE` + +

Describes the type of the structural map `<structMap>` that is supported by the profile.

+

Own types should be placed in an own extending vocabulary.

+
+
+ + Structural map label + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyStructMapLabel.xml + Values for `structMap/@LABEL` + +

Describes the label of the structural map that is supported by the profile.

+

Own labels should be placed in an own extending vocabulary.

+
+
+
+ + + + + Package Identifier +

The `mets/@OBJID` attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder.

+

For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder.

+
+
METS XPath
mets/@OBJID
+
Cardinality
1..1
+
+
+
+ + + Content Category +

The `mets/@TYPE` attribute MUST be used to declare the category of the content held in the package, e.g. "Datasets", "Websites", "Mixes" , "Other", etc.. Legal values are defined in a fixed vocabulary. When the content category used falls outside of the defined vocabulary the `mets/@TYPE` value must be set to "OTHER" and the specific value declared in `mets/@csip:OTHERTYPE`. The vocabulary will develop under the curation of the DILCIS Board as additional content information type specifications are produced.

+
+
METS XPath
mets/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Other Content Category +

When the `mets/@TYPE` attribute has the value "OTHER" the `mets/@csip:OTHERTYPE` attribute MUST be used to declare the content category of the package/representation. The value can either be "OTHER" or any other string that are not present in the vocabulary used in the `mets/@TYPE` attribute.

+
+
METS XPath
mets[@TYPE='OTHER']/@csip:OTHERTYPE
+
Cardinality
0..1
+
+
+
+ + + Content Information Type Specification +

Used to declare the Content Information Type Specification used when creating the package. Legal values are defined in a fixed vocabulary. The attribute is mandatory for representation level METS documents. The vocabulary will evolve under the care of the DILCIS Board as additional Content Information Type Specifications are developed.

+
+
METS XPath
mets/@csip:CONTENTINFORMATIONTYPE
+
Cardinality
0..1
+
+
+
+ + + Other Content Information Type Specification +

When the `mets/@csip:CONTENTINFORMATIONTYPE` has the value "OTHER" the `mets/@csip:OTHERCONTENTINFORMATIONTYPE` must state the content information type.

+
+
METS XPath
mets[@csip:CONTENTINFORMATIONTYPE='OTHER']/@csip:OTHERCONTENTINFORMATIONTYPE
+
Cardinality
0..1
+
+
+
+ + + METS Profile +

The URL of the METS profile that the information package conforms with.

+
+
METS XPath
mets/@PROFILE
+
Cardinality
1..1
+
+
+
+
+ + + + Package header +

General element for describing the package.

+
+
METS XPath
mets/metsHdr
+
Cardinality
1..1
+
+
+
+ + + Package creation datetime +

`mets/metsHdr/@CREATEDATE` records the date and time the package was created.

+
+
METS XPath
mets/metsHdr/@CREATEDATE
+
Cardinality
1..1
+
+
+
+ + + Package last modification datetime +

`mets/metsHdr/@LASTMODDATE` records the data and time the package was modified and is mandatory when the package has been modified.

+
+
METS XPath
mets/metsHdr/@LASTMODDATE
+
Cardinality
0..1
+
+
+
+ + + OAIS Package type information +

`mets/metsHdr/@csip:OAISPACKAGETYPE` is an additional CSIP attribute that declares the type of the IP.

+
+
METS XPath
mets/metsHdr/@csip:OAISPACKAGETYPE
+
Cardinality
1..1
+
+
+
+ + + Agent +

A mandatory agent element records the software used to create the package. Other uses of agents may be described in any local implementations that extend the profile.

+
+
METS XPath
mets/metsHdr/agent
+
Cardinality
1..n
+
+
+
+ + + Agent role +

The mandatory agent element MUST have a `@ROLE` attribute with the value “CREATOR”.

+
+
METS XPath
mets/metsHdr/agent[@ROLE='CREATOR']
+
Cardinality
1..1
+
+
+
+ + + Agent type +

The mandatory agent element MUST have a `@TYPE` attribute with the value “OTHER”.

+
+
METS XPath
mets/metsHdr/agent[@TYPE='OTHER']
+
Cardinality
1..1
+
+
+
+ + + Agent other type +

The mandatory agent element MUST have a `@OTHERTYPE` attribute with the value “SOFTWARE”.

+
+
METS XPath
mets/metsHdr/agent[@OTHERTYPE='SOFTWARE']
+
Cardinality
1..1
+
+
+
+ + + Agent name +

The mandatory agent's name element records the name of the software tool used to create the IP.

+
+
METS XPath
mets/metsHdr/agent/name
+
Cardinality
1..1
+
+
+
+ + + Agent additional information +

The mandatory agent's note element records the version of the tool used to create the IP.

+
+
METS XPath
mets/metsHdr/agent/note
+
Cardinality
1..1
+
+
+
+ + + Classification of the agent additional information +

The mandatory agent element's note child has a `@csip:NOTETYPE` attribute with a fixed value of "SOFTWARE VERSION".

+
+
METS XPath
mets/metsHdr/agent/note[@csip:NOTETYPE='SOFTWARE VERSION']
+
Cardinality
1..1
+
+
+
+
+ + + + Descriptive metadata +

Must be used if descriptive metadata for the package content is available. Each descriptive metadata section (`<dmdSec>`) contains a single description and must be repeated for multiple descriptions, when available.

+

It is possible to transfer metadata in a package using just the descriptive metadata section and/or administrative metadata section.

+
+
METS XPath
mets/dmdSec
+
Cardinality
0..n
+
+
+
+ + + Descriptive metadata identifier +

An `xml:id` identifier for the descriptive metadata section (`<dmdSec>`) used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/dmdSec/@ID
+
Cardinality
1..1
+
+
+
+ + + Descriptive metadata creation datetime +

Creation date and time of the descriptive metadata in this section.

+
+
METS XPath
mets/dmdSec/@CREATED
+
Cardinality
1..1
+
+
+
+ + + Status of the descriptive metadata +

Indicates the status of the package using a fixed vocabulary.

+
+
METS XPath
mets/dmdSec/@STATUS
+
Cardinality
0..1
+
+
+
+ + + Reference to the document with the descriptive metadata +

Reference to the descriptive metadata file located in the “metadata” section of the IP.

+
+
METS XPath
mets/dmdSec/mdRef
+
Cardinality
0..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/dmdSec/mdRef[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+ + + Type of link +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/dmdSec/mdRef[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. This specification recommends recording a URL type filepath in this attribute.

+
+
METS XPath
mets/dmdSec/mdRef/@xlink:href
+
Cardinality
1..1
+
+
+
+ + + Type of metadata +

Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS.

+
+
METS XPath
mets/dmdSec/mdRef/@MDTYPE
+
Cardinality
1..1
+
+
+
+ + + File mime type +

The IANA mime type of the referenced file.

+
+
METS XPath
mets/dmdSec/mdRef/@MIMETYPE
+
Cardinality
1..1
+
+
+
+ + + File size +

Size of the referenced file in bytes.

+
+
METS XPath
mets/dmdSec/mdRef/@SIZE
+
Cardinality
1..1
+
+
+
+ + + File creation datetime +

The creation date and time of the referenced file.

+
+
METS XPath
mets/dmdSec/mdRef/@CREATED
+
Cardinality
1..1
+
+
+
+ + + File checksum +

The checksum of the referenced file.

+
+
METS XPath
mets/dmdSec/mdRef/@CHECKSUM
+
Cardinality
1..1
+
+
+
+ + + File checksum type +

A value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file.

+
+
METS XPath
mets/dmdSec/mdRef/@CHECKSUMTYPE
+
Cardinality
1..1
+
+
+
+
+ + + + Administrative metadata +

If administrative / preservation metadata is available, it must be described using the administrative metadata section (`<amdSec>`) element.

+

All administrative metadata is present in a single `<amdSec>` element.

+

It is possible to transfer metadata in a package using just the descriptive metadata section and/or administrative metadata section.

+
+
METS XPath
mets/amdSec
+
Cardinality
0..1
+
+
+
+ + + Digital provenance metadata +

For recording information about preservation the standard PREMIS is used. It is mandatory to include one `<digiprovMD>` element for each piece of PREMIS metadata.

+

The use if PREMIS in METS is following the recommendations in the 2017 version of PREMIS in METS Guidelines.

+
+
METS XPath
mets/amdSec/digiprovMD
+
Cardinality
0..n
+
+
+
+ + + Digital provenance metadata identifier +

An `xml:id` identifier for the digital provenance metadata section `mets/amdSec/digiprovMD` used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/amdSec/digiprovMD/@ID
+
Cardinality
1..1
+
+
+
+ + + Status of the digital provenance metadata +

Indicates the status of the package using a fixed vocabulary.

+
+
METS XPath
mets/amdSec/digiprovMD/@STATUS
+
Cardinality
0..1
+
+
+
+ + + Reference to the document with the digital provenance metadata +

Reference to the digital provenance metadata file stored in the “metadata” section of the IP.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef
+
Cardinality
0..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+ + + Type of link +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. This specification recommends recording a URL type filepath within this attribute.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@xlink:href
+
Cardinality
1..1
+
+
+
+ + + Type of metadata +

Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@MDTYPE
+
Cardinality
1..1
+
+
+
+ + + File mime type +

The IANA mime type for the referenced file.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@MIMETYPE
+
Cardinality
1..1
+
+
+
+ + + File size +

Size of the referenced file in bytes.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@SIZE
+
Cardinality
1..1
+
+
+
+ + + File creation datetime +

Creation date and time of the referenced file.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@CREATED
+
Cardinality
1..1
+
+
+
+ + + File checksum +

The checksum of the referenced file.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@CHECKSUM
+
Cardinality
1..1
+
+
+
+ + + File checksum type +

A value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@CHECKSUMTYPE
+
Cardinality
1..1
+
+
+
+ + + Rights metadata +

A simple rights statement may be used to describe general permissions for the package. Individual representations should state their specific rights in their representation METS file.

+

Available standards include RightsStatements.org, Europeana rights statements info, METS Rights Schema created and maintained by the METS Board, the rights part of PREMIS as well as own local rights statements in use.

+
+
METS XPath
mets/amdSec/rightsMD
+
Cardinality
0..n
+
+
+
+ + + Rights metadata identifier +

An `xml:id` identifier for the rights metadata section (`<rightsMD>`) used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/amdSec/rightsMD/@ID
+
Cardinality
1..1
+
+
+
+ + + Status of the rights metadata +

Indicates the status of the package using a fixed vocabulary.

+
+
METS XPath
mets/amdSec/rightsMD/@STATUS
+
Cardinality
0..1
+
+
+
+ + + Reference to the document with the rights metadata +

Reference to the rights metadata file stored in the “metadata” section of the IP.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef
+
Cardinality
0..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+ + + Type of locator +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. We recommend recording a URL type filepath within this attribute.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@xlink:href
+
Cardinality
1..1
+
+
+
+ + + Type of metadata +

Specifies the type of metadata in the referenced file. Value is taken from the list provided by the METS.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@MDTYPE
+
Cardinality
1..1
+
+
+
+ + + File mime type +

The IANA mime type for the referenced file.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@MIMETYPE
+
Cardinality
1..1
+
+
+
+ + + File size +

Size of the referenced file in bytes.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@SIZE
+
Cardinality
1..1
+
+
+
+ + + File creation datetime +

Creation date and time of the referenced file.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@CREATED
+
Cardinality
1..1
+
+
+
+ + + File checksum +

The checksum of the referenced file.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@CHECKSUM
+
Cardinality
1..1
+
+
+
+ + + File checksum type +

A value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@CHECKSUMTYPE
+
Cardinality
1..1
+
+
+
+
+ + + + File section +

References to all transferred content SHOULD be placed in the file section in the different file group elements, described in other requirements.

+

Only a single file section (`<fileSec>`) element should be present.

+

In the case that a package only contains metadata updates, i.e. exclusively metadata files, then no file references need to be added to this section.

+
+
METS XPath
mets/fileSec
+
Cardinality
0..1
+
+
+
+ + + File section identifier +

An `xml:id` identifier for the file section used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/fileSec/@ID
+
Cardinality
1..1
+
+
+
+ + + Documentation file group +

All documentation pertaining to the transferred content is placed in one or more file group elements with `mets/fileSec/fileGrp/@USE` attribute value "Documentation".

+
+
METS XPath
mets/fileSec/fileGrp[@USE='Documentation']
+
Cardinality
1..n
+
+
+
+ + + Schema file group +

All XML schemas used in the information package must be referenced from one or more file groups with `mets/fileSec/fileGrp/@USE` attribute value "Schemas".

+
+
METS XPath
mets/fileSec/fileGrp[@USE='Schemas']
+
Cardinality
1..n
+
+
+
+ + + Representations file group +

A pointer to the METS document describing the representation or pointers to the content being transferred must be present in one or more file groups with `mets/fileSec/fileGrp/@USE` attribute value starting with "Representations" followed by the path to the folder where the representation level METS document is placed. For example "Representation/submission" and "Representation/ingest".

+
+
METS XPath
mets/fileSec/fileGrp[@USE=[starts-with('Representations')]]
+
Cardinality
1..n
+
+
+
+ + + Reference to administrative metadata +

If administrative metadata has been provided at file group `mets/fileSec/fileGrp` level this attribute refers to its administrative metadata section by ID.

+
+
METS XPath
mets/fileSec/fileGrp/@ADMID
+
Cardinality
0..1
+
+
+
+ + + Content Information Type Specification +

An added attribute which states the name of the content information type specification used to create the package.

+

The vocabulary will evolve under the curation of the DILCIS Board as additional content information type specifications are developed.

+

When the element "Content Information Type Specification" (CSIP4) has the value "MIXED" or the file group describes a representation, then this element states the content information type specification used for the file group.

+

When the element "Representations file group" (CSIP114), the file group describes a representation with the `mets/fileSec/fileGrp/@USE` attribute value is starting with "Representations", then this element must state the content information type specification used for the representation.

+
+
METS XPath
mets/@csip:CONTENTINFORMATIONTYPE="MIXED"|mets/fileSec/fileGrp[@USE=[starts-with('Representations')]]/@csip:CONTENTINFORMATIONTYPE
+
Cardinality
0..1
+
+
+
+ + + Other Content Information Type Specification +

When the `mets/fileSec/fileGrp/@csip:CONTENTINFORMATIONTYPE` attribute has the value "OTHER" the `mets/fileSec/fileGrp/@csip:OTHERCONTENTINFORMATIONTYPE` must state a value for the Content Information Type Specification used.

+
+
METS XPath
mets/fileSec/fileGrp[@csip:CONTENTINFORMATIONTYPE='OTHER']/@csip:OTHERCONTENTINFORMATIONTYPE
+
Cardinality
0..1
+
+
+
+ + + Description of the use of the file group +

The value in the `mets/fileSec/fileGrp/@USE` is the name of the whole folder structure to the data, e.g "Documentation", "Schemas", "Representations/preingest" or "Representations/submission/data".

+
+
METS XPath
mets/fileSec/fileGrp/@USE
+
Cardinality
1..1
+
+
+
+ + + File group identifier +

An `xml:id` identifier for the file group used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/fileSec/fileGrp/@ID
+
Cardinality
1..1
+
+
+
+ + + File +

The file group (`<fileGrp>`) contains the file elements which describe the file objects.

+
+
METS XPath
mets/fileSec/fileGrp/file
+
Cardinality
1..n
+
+
+
+ + + File identifier +

A unique `xml:id` identifier for this file across the package.

+
+
METS XPath
mets/fileSec/fileGrp/file/@ID
+
Cardinality
1..1
+
+
+
+ + + File mimetype +

The IANA mime type for the referenced file.

+
+
METS XPath
mets/fileSec/fileGrp/file/@MIMETYPE
+
Cardinality
1..1
+
+
+
+ + + File size +

Size of the referenced file in bytes.

+
+
METS XPath
mets/fileSec/fileGrp/file/@SIZE
+
Cardinality
1..1
+
+
+
+ + + File creation datetime +

Creation date and time of the referenced file.

+
+
METS XPath
mets/fileSec/fileGrp/file/@CREATED
+
Cardinality
1..1
+
+
+
+ + + File checksum +

The checksum of the referenced file.

+
+
METS XPath
mets/fileSec/fileGrp/file/@CHECKSUM
+
Cardinality
1..1
+
+
+
+ + + File checksum type +

A value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file.

+
+
METS XPath
mets/fileSec/fileGrp/file/@CHECKSUMTYPE
+
Cardinality
1..1
+
+
+
+ + + File original identification +

If an identifier for the file was supplied by the owner it can be recorded in this attribute.

+
+
METS XPath
mets/fileSec/fileGrp/file/@OWNERID
+
Cardinality
0..1
+
+
+
+ + + File reference to administrative metadata +

If administrative metadata has been provided for the file this attribute refers to the file's administrative metadata by ID.

+
+
METS XPath
mets/fileSec/fileGrp/file/@ADMID
+
Cardinality
0..1
+
+
+
+ + + File reference to descriptive metadata +

If descriptive metadata has been provided per file this attribute refers to the file's descriptive metadata by ID.

+
+
METS XPath
mets/fileSec/fileGrp/file/@DMDID
+
Cardinality
0..1
+
+
+
+ + + File locator reference +

The location of each external file must be defined by the file location `<FLocat>` element using the same rules as references for metadata files. All references to files should be made using the XLink href attribute and the file protocol using the relative location of the file.

+
+
METS XPath
mets/fileSec/fileGrp/file/FLocat
+
Cardinality
1..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/fileSec/fileGrp/file/FLocat[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+ + + Type of link +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/fileSec/fileGrp/file/FLocat[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. We recommend recording a URL type filepath within this attribute.

+
+
METS XPath
mets/fileSec/fileGrp/file/FLocat/@xlink:href
+
Cardinality
1..1
+
+
+
+
+ + + + Structural description of the package +

The structural map `<structMap>` element is the only mandatory element in the METS.

+

The `<structMap>` in the CSIP describes the highest logical structure of the IP.

+

Every CSIP compliant METS file must include ONE structural map `<structMap>` element used exactly as described in this section of requirements.

+

Institutions can add their own additional custom structural maps as separate `<structMap>` sections following their own requirements.

+
+
METS XPath
mets/structMap
+
Cardinality
1..n
+
+
+
+ + + Type of structural description +

The `mets/structMap/@TYPE` attribute must take the value “PHYSICAL” from the vocabulary.

+
+
METS XPath
mets/structMap[@TYPE='PHYSICAL']
+
Cardinality
1..1
+
+
+
+ + + Name of the structural description +

The `mets/structMap/@LABEL` attribute value is set to “CSIP” from the vocabulary.

+

This requirement identifies the CSIP compliant structural map `<structMap>` element.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']
+
Cardinality
1..1
+
+
+
+ + + Structural description identifier +

An `xml:id` identifier for the structural description (structMap) used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/@ID
+
Cardinality
1..1
+
+
+
+ + + Main structural division +

The structural map comprises a single division.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div
+
Cardinality
1..1
+
+
+
+ + + Main structural division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/@ID
+
Cardinality
1..1
+
+
+
+ + + Metadata division +

The metadata referenced in the administrative and/or descriptive metadata section is described in the structural map with one sub division.

+

When the transfer consists of only administrative and/or descriptive metadata this is the only sub division that occurs.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']
+
Cardinality
1..1
+
+
+
+ + + Metadata division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']/@ID
+
Cardinality
1..1
+
+
+
+ + + Metadata division label +

The metadata division `<div>` element's `@LABEL` attribute value must be "Metadata".

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']
+
Cardinality
1..1
+
+
+
+ + + Metadata division references administrative metadata +

The admimistrative metadata division should reference all current administrative metadata sections.

+

All `<amdSec>`s with `@STATUS='CURRENT'` SHOULD be referenced by their identifier, @ID.

+

The current `<amdSec>` @IDs are recorded in the `div[@LABEL='Metadata']/@ADMID` attribute in a space delimited list.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']/@ADMID
+
Cardinality
0..1
+
+
+
+ + + Metadata division references descriptive metadata +

The descriptive metadata division should reference all current descriptive metadata sections.

+

All `<dmdSec>`s with `@STATUS='CURRENT'` SHOULD be referenced by their identifier, @ID.

+

The current `<dmdSec>` @IDs are recorded in the `div[@LABEL='Metadata']/@DMDID` attribute in a space delimited list.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']/@DMDID
+
Cardinality
0..1
+
+
+
+ + + Documentation division +

The documentation referenced in the file section file groups is described in the structural map with one sub division.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']
+
Cardinality
0..1
+
+
+
+ + + Documentation division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']/@ID
+
Cardinality
1..1
+
+
+
+ + + Documentation division label +

The documentation division `<div>` element in the package uses the value "Documentation" from the vocabulary as the value for the `@LABEL` attribute.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']
+
Cardinality
1..1
+
+
+
+ + + Documentation file references +

All file groups containing documentation described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per `<fptr>` element.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']/fptr
+
Cardinality
0..n
+
+
+
+ + + Documentation file group reference pointer +

A reference, by ID, to the "Documentation" file group.

+

Related to the requirements CSIP60 which describes the "Documentation" file group and CSIP65 which describes the file group identifier.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']/fptr/@FILEID
+
Cardinality
1..1
+
+
+
+ + + Schema division +

The schemas referenced in the file section file groups are described in the structural map within a single sub-division.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']
+
Cardinality
0..1
+
+
+
+ + + Schema division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']/@ID
+
Cardinality
1..1
+
+
+
+ + + Schema division label +

The schema division `<div>` element's `@LABEL` attribute has the value "Schemas" from the vocabulary.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']
+
Cardinality
1..1
+
+
+
+ + + Schema file reference +

All file groups containing schemas described in the package are referenced via the relevant file group identifiers. One file group reference per fptr-element

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']/fptr
+
Cardinality
0..n
+
+
+
+ + + Schema file group reference +

The pointer to the identifier for the "Schema" file group.

+

Related to the requirements CSIP113 which describes the "Schema" file group and CSIP65 which describes the file group identifier.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']/fptr/@FILEID
+
Cardinality
1..1
+
+
+
+ + + Content division +

When no representations are present the content referenced in the file section file group with `@USE` attribute value "Representations" is described in the structural map as a single sub division.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']
+
Cardinality
0..1
+
+
+
+ + + Content division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']/@ID
+
Cardinality
1..1
+
+
+
+ + + Content division label +

The package's content division `<div>` element must have the `@LABEL` attribute value "Representations", taken from the vocabulary.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']
+
Cardinality
1..1
+
+
+
+ + + Content division file references +

All file groups containing content described in the package are referenced via the relevant file group identifiers. One file group reference per fptr-element.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']/fptr
+
Cardinality
0..n
+
+
+
+ + + Content division file group references +

The pointer to the identifier for the "Representations" file group.

+

Related to the requirements CSIP114 which describes the "Representations" file group and CSIP65 which describes the file group identifier.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']/fptr/@FILEID
+
Cardinality
1..1
+
+
+
+ + + Representation division +

When a package consists of multiple representations, each described by a representation level METS.xml document, there should be a discrete representation div element for each representation.

+

Each representation div references the representation level METS.xml document, documenting the structure of the package and its constituent representations.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div
+
Cardinality
0..n
+
+
+
+ + + Representations division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div/@ID
+
Cardinality
1..1
+
+
+
+ + + Representations division label +

The package's representation division `<div>` element `@LABEL` attribute value must be the path to the representation level METS document starting with the value "Representations" followed by the main folder name for example "Representations/submission" and "Representations/ingest".

+

This requirement gives the same value to be used as the requirement named "Description of the use of the file group" (CSIP64)

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div/@LABEL
+
Cardinality
1..1
+
+
+
+ + + Representations division file references +

The file group containing the files described in the package are referenced via the relevant file group identifier.

+

Related to the requirements CSIP114 which describes the "Representations" file group and CSIP65 which describes the file group identifier.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div/mptr/@xlink:title
+
Cardinality
1..1
+
+
+
+ + + Representation METS pointer +

The division `<div>` of the specific representation includes one occurrence of the METS pointer `<mptr>` element, pointing to the appropriate representation METS file.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div/mptr
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. We recommend recording a URL type filepath within this attribute.

+
+
METS XPath
mets/structMap/div/div/mptr/@xlink:href
+
Cardinality
1..1
+
+
+
+ + + Type of link +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/structMap/div/div/mptr[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/structMap/div/div/mptr[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+
+ + + + structLink +

Section not defined or used in CSIP, additional own uses may occur.

+

Information regarding the structural links is found in the METS Primer

+
+
+
+ + + + behaviorSec +

Section not defined or used in CSIP, additional own uses may occur.

+

Information regarding the behaviour section is found in the METS Primer

+
+
+
+
+ + + + +

Requriments not stated in CSIP

+
+
+
+ + + +

Requriments not stated in CSIP

+
+
+
+ + + +

Requriments not stated in CSIP

+
+
+
+
+ + ESSArch (ETP, ETA, EPP, ECORE) + https://github.com/ESSolutions + +

A suite of tools for e-archiving and digital preservation. The tools provide functionality for producers to archive digital information, for archives to preserve digital information and for consumers to access archived information.

+
+ +

ES Solutions - www.essolutions.se

+
+
+ + RODA + http://github.com/keeps/roda + +

RODA is a digital repository solution that delivers functionality for all the main units of the OAIS reference model. RODA is capable of ingesting, managing and providing access to the various types of digital objects produced by large corporations or public bodies. RODA is based on open-source technologies and is supported by existing standards such as the Open Archival Information System (OAIS), Metadata Encoding and Transmission Standard (METS), Encoded Archival Description (EAD), Dublin Core (DC) and PREMIS (Preservation Metadata).

+
+ +

RODA is licensed under LGPLv3 for all source-code including interoperability libraries like SIP manipulation libraries.

+
+
+ + RODA-in + https://rodain.roda-community.org + +

RODA-in is a tool specially designed for producers and archivists to create Submission Information Packages (SIP) ready to be submitted to an Open Archival Information System (OAIS). The tool creates SIPs from files and folders available on the local file system.

+

+ RODA-in supports several Submission Information Package formats such as BagIt, E-ARK SIP and the Hungarian SIP format. +

+
+ +

RODA-in is licensed under LGPLv3.

+
+
+ + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-DIP.xml b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-DIP.xml new file mode 100644 index 0000000..40a411f --- /dev/null +++ b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-DIP.xml @@ -0,0 +1,408 @@ + + + + + https://earkdip.dilcis.eu/profile/E-ARK-DIP.xml + E-ARK DIP METS Profile + This is the extension of the E-ARK CSIP profile for creation of a E-ARK DIP. The profile describes the Dissemination Information Package (DIP) specification and the implementation of METS for packaging OAIS conformant Information Packages. The profile is accompanied with a textuall document explaning the details of use of this profile. + This will enable repository interoperability and assist in the management of the preservation of digital content. + 2021-10-15T09:00:00 + + DILCIS Board +
http://dilcis.eu/
+ info@dilcis.eu +
+ E-ARK CSIP METS Profile 2.1 + + + E-ARK DIP profile +

This profile together with the E-ARK SIP document describes an DIP conforming to the E-ARK SIP.

+ Principles for a package conforming to the Common Specification for Information Packages (CSIP) +

CSIP Principles

+
+
+ + E-ARK SIP METS Extension + http://earksip.dilcis.eu/schema/DILCISExtensionSIPMETS.xsd + XML-schema for the attributes added by SIP and reused in the DIP + +

An extension schema with the added attributes for use in this profile.

+

The schema is used with a namespace prefix of sip.

+
+
+ +

The filepath must be decoded consistently throughout all file references within the information package.

+
+ + + OAIS Package type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyOAISPackageType.xml + Values for `@csip:OAISPACKAGETYPE` + +

Describes the OAIS type the package belongs to in the OAIS reference model.

+
+
+ + dmdSec status + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyStatus.xml + Values for `dmdSec/@STATUS` + +

Describes the status of the descriptive metadata section (dmdSec) which is supported by the profile.

+
+
+
+ + + + + Package Identifier +

Note that the value of the `mets/@OBJID attribute` for the DIP is expected to be different from the SIP and AIP to reflect the creation of a new package.

+
+
METS XPath
mets/@OBJID
+
Cardinality
1..1
+
+
+
+ + + METS Profile +

The value is set to "https://earkdip.dilcis.eu/profile/E-ARK-DIP.xml".

+
+
METS XPath
mets/@PROFILE
+
Cardinality
1..1
+
+
+
+
+ + + + OAIS Package type information +

The in CSIP added attribute `@csip:OAISPACKAGETYPE` is used with the value "DIP".

+
+
METS XPath
metsHdr[@csip:OAISPACKAGETYPE=`DIP`]
+
Cardinality
1..1
+
+
+
+
+ + + + Status of the descriptive metadata +

Indicates the status of the package using a fixed vocabulary. The status SHOULD in a DIP be set to "CURRENT".

+
+
METS XPath
dmdSec/@STATUS
+
Cardinality
0..1
+
+
+
+
+ + + + Administrative metadata +

The DIP <amdSec> element should comply with amdSec requirements in the CSIP profile.

+
+
+
+ + + + File section +

The DIP fileSec element should comply with fileSec requirements in the CSIP profile.

+
+
+
+ + + + Structural description of the package +

The DIP structMap element should comply with structMap requirements in the CSIP profile.

+
+
+
+ + + + structLink +

Section not defined or used in CSIP or DIP, additional own uses may occur.

+

Information regarding the structural links is found in the METS Primer

+
+
+ + + + behaviorSec +

Section not defined or used in CSIP or DIP, additional own uses may occur.

+

Information regarding the behavior section is found in the METS Primer

+
+
+
+
+ + + + +

Requirements not stated in CSIP or DIP

+
+
+
+ + + +

Requirements not stated in CSIP or DIP

+
+
+
+ + + +

Requirements not stated in CSIP or DIP

+
+
+
+
+ + ESSArch (ETP, ETA, EPP, ECORE) + https://github.com/ESSolutions + +

A suite of tools for e-archiving and digital preservation. The tools provide functionality for producers to archive digital information, for archives to preserve digital information and for consumers to access archived information.

+
+ +

ES Solutions - www.essolutions.se

+
+
+ + RODA + http://github.com/keeps/roda + +

RODA is a digital repository solution that delivers functionality for all the main units of the OAIS reference model. RODA is capable of ingesting, managing and providing access to the various types of digital objects produced by large corporations or public bodies. RODA is based on open-source technologies and is supported by existing standards such as the Open Archival Information System (OAIS), Metadata Encoding and Transmission Standard (METS), Encoded Archival Description (EAD), Dublin Core (DC) and PREMIS (Preservation Metadata).

+
+ +

RODA is licensed under LGPLv3 for all source-code including interoperability libraries like SIP manipulation libraries.

+
+
+ + RODA-in + https://rodain.roda-community.org + +

RODA-in is a tool specially designed for producers and archivists to create Submission Information Packages (SIP) ready to be submitted to an Open Archival Information System (OAIS). The tool creates SIPs from files and folders available on the local file system.

+

+ RODA-in supports several Submission Information Package formats such as BagIt, E-ARK SIP and the Hungarian SIP format. +

+
+ +

RODA-in is licensed under LGPLv3.

+
+
+ + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-SIP.xml b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-SIP.xml new file mode 100644 index 0000000..e8e3b2d --- /dev/null +++ b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-SIP.xml @@ -0,0 +1,832 @@ + + + + + https://earksip.dilcis.eu/profile/E-ARK-SIP.xml + E-ARK SIP METS Profile 2.1 + This is the extension of the E-ARK CS IP profile for creation of a E-ARK SIP. + 2021-10-15T09:00:00 + + DILCIS Board +
http://dilcis.eu/
+ info@dilcis.eu +
+ E-ARK CSIP METS Profile 2.1 + + + E-ARK SIP profile +

This profile together with the E-ARK SIP document describes an SIP conforming to the E-ARK SIP.

+
+
+ + E-ARK SIP METS Extension + https://earksip.dilcis.eu/schema/DILCISExtensionSIPMETS.xsd + XML-schema for the attributes added by SIP + +

An extension schema with the added attributes for use in this profile.

+

The schema is used with a namespace prefix of sip.

+
+
+ + E-ARK CSIP METS Extension + http://earkcsip.dilcis.eu/schema/DILCISExtensionMETS.xsd + XML-schema for the attributes added by CSIP + +

An extension schema with the added attributes for use in this profile.

+

The schema is identified using the namespace prefix csip.

+
+
+ +

The filepath must be decoded consistently throughout all file references within the information package.

+
+ + + Package status + DILCIS Board + http://earksip.dilcis.eu/schema/SIPVocabularyRecordStatus.xml + Used in `@RECORDSTATUS` + +

Describes the status of the package.

+
+
+ + Alternative record ID's + DILCIS Board + http://earksip.dilcis.eu/schema/SIPVocabularyRecordIDType.xml + Used in `altrecordID/@TYPE` + +

Describes the type of the alternative record ID.

+
+
+ + Note type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyNoteType.xml + Used in `@csip:NOTETYPE` + +

Describes the type of a note for an agent.

+
+
+ + OAIS Package type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyOAISPackageType.xml + Used in `@csip:OAISPACKAGETYPE` + +

Describes the OAIS type the package belongs to in the OAIS reference model.

+
+
+
+ + + + + Package name +

An optional short text describing the contents of the package, e.g. "Accounting records of 2017".

+
+
METS XPath
mets/@LABEL
+
Cardinality
0..1
+
+
+
+ + + METS Profile +

The value is set to "https://earksip.dilcis.eu/profile/E-ARK-SIP.xml".

+
+
METS XPath
mets/@PROFILE
+
Cardinality
1..1
+
+
+
+
+ + + + Package status +

A way of indicating the status of the package and to instruct the OAIS on how to properly handle the package. If not set, the expected behaviour is equal to NEW.

+
+
METS XPath
metsHdr/@RECORDSTATUS
+
Cardinality
0..1
+
+
+
+ + + OAIS Package type information +

`@csip:OAISPACKAGETYPE` is used with the value "SIP".

+
+
METS XPath
metsHdr/@csip:OAISPACKAGETYPE
+
Cardinality
1..1
+
+
+
+ + + Submission agreement +

A reference to the Submission Agreement associated with the package.

+

`@TYPE` is used with the value "SUBMISSIONAGREEMENT".

+

Example: RA 13-2011/5329; 2012-04-12

+

Example: http://submissionagreement.kb.se/dnr331-1144-2011/20120711/

+

Note: It is recommended to use a machine-readable format for a better description of a submission agreement.

+

For example, the submission agreement developed by Docuteam GmbH http://www.loc.gov/standards/mets/profiles/00000041.xml

+
+
METS XPath
metsHdr/altRecordID
+
Cardinality
0..1
+
+
+
+ + + Previous Submission agreement +

An optional reference to a previous submission agreement(s) which the information may have belonged to.

+

`@TYPE` is used with the value "PREVIOUSSUBMISSIONAGREEMENT".

+

Example: FM 12-2387/12726, 2007-09-19

+

Example: http://submissionagreement.kb.se/dnr331-1144-2011/20120711/

+

Note: It is recommended to use a machine-readable format for a better description of a submission agreement.

+

For example, the submission agreement developed by Docuteam GmbH http://www.loc.gov/standards/mets/profiles/00000041.xml

+
+
METS XPath
metsHdr/altRecordID
+
Cardinality
0..*
+
+
+
+ + + Archival reference code +

An optional reference code indicating where in the archival hierarchy the package shall be placed in the OAIS.

+

`@TYPE` is used with the value "REFERENCECODE".

+

Example: FM 12-2387/12726, 2007-09-19

+
+
METS XPath
metsHdr/altRecordID
+
Cardinality
0..1
+
+
+
+ + + Previous archival reference code +

In cases where the SIP originates from other institutions maintaining a reference code structure, this element can be used to record these reference codes and therefore support the provenance of the package when a whole archival description is not submitted with the submission.

+

`@TYPE` is used with the value "PREVIOUSREFERENCECODE".

+

Example: SE/FM/123/123.1/123.1.3

+
+
METS XPath
metsHdr/altRecordID
+
Cardinality
0..*
+
+
+
+ + + Archival creator agent +

A wrapper element that enables to encode the name of the organisation or person that originally created the data being transferred. Please note that this might be different from the organisation which has been charged with preparing and sending the SIP to the archives.

+
+
METS XPath
metsHdr/agent
+
Cardinality
0..1
+
+
+
+ + + Archival creator agent role +

The role of the person(s) or institution(s) responsible for the document/collection.

+
+
METS XPath
metsHdr/agent/@ROLE
+
Cardinality
1..1
+
+
+
+ + + Archival creator agent type +

The type of the archival creator agent is "ORGANIZATION" or "INDIVIDUAL".

+
+
METS XPath
metsHdr/agent/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Archival creator agent name +

The name of the organisation(s) that originally created the data being transferred.

+

Please note that this might be different from the organisation which has been charged with preparing and sending the SIP to the archives.

+
+
METS XPath
metsHdr/agent/name
+
Cardinality
0..*
+
+
+
+ + + Archival creator agent additional information +

The archival creator agent has a note providing a unique identification code for the archival creator.

+
+
METS XPath
metsHdr/agent/note
+
Cardinality
0..1
+
+
+
+ + + Classification of the archival creator agent additional information +

The archival creator agent note is typed with the value of "IDENTIFICATIONCODE".

+
+
METS XPath
metsHdr/agent/note/@csip:NOTETYPE
+
Cardinality
1..1
+
+
+
+ + + Submitting agent +

The name of the organisation or person submitting the package to the archive.

+
+
METS XPath
metsHdr/agent
+
Cardinality
1..1
+
+
+
+ + + Submitting agent role +

The role of the person(s) or institution(s) responsible for creating and/or submitting the package.

+
+
METS XPath
metsHdr/agent/@ROLE
+
Cardinality
1..1
+
+
+
+ + + Submitting agent type +

The type of the submitting agent is "ORGANIZATION" or "INDIVIDUAL".

+
+
METS XPath
metsHdr/agent/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Submitting agent name +

Name of the organisation submitting the package to the archive.

+
+
METS XPath
metsHdr/agent/name
+
Cardinality
1..1
+
+
+
+ + + Submitting agent additional information +

The submitting agent has a note providing a unique identification code for the archival creator.

+
+
METS XPath
metsHdr/agent/note
+
Cardinality
0..1
+
+
+
+ + + Classification of the submitting agent additional information +

The submitting agent note is typed with the value of "IDENTIFICATIONCODE".

+
+
METS XPath
metsHdr/agent/note/@csip:NOTETYPE
+
Cardinality
1..1
+
+
+
+ + + Contact person agent +

Contact person for the submission.

+
+
METS XPath
metsHdr/agent
+
Cardinality
0..*
+
+
+
+ + + Contact person agent role +

The role of the contact person is "CREATOR".

+
+
METS XPath
metsHdr/agent/@ROLE
+
Cardinality
1..1
+
+
+
+ + + Contact person agent type +

The type of the contact person agent is "INDIVIDUAL".

+
+
METS XPath
metsHdr/agent/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Contact person agent name +

Name of the contact person.

+
+
METS XPath
metsHdr/agent/name
+
Cardinality
1..1
+
+
+
+ + + Contact person agent additional information +

The contact person agent has one or more notes giving the contact information.

+
+
METS XPath
metsHdr/agent/note
+
Cardinality
0..*
+
+
+
+ + + Preservation agent +

The organisation or person that preserves the package.

+
+
METS XPath
metsHdr/agent
+
Cardinality
0..1
+
+
+
+ + + Preservation agent role +

The role of the preservation agent is "PRESERVATION".

+
+
METS XPath
metsHdr/agent/@ROLE
+
Cardinality
1..1
+
+
+
+ + + Preservation agent type +

The type of the submitting agent is "ORGANIZATION".

+
+
METS XPath
metsHdr/agent/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Preservation agent name +

Name of the organisation preserving the package.

+
+
METS XPath
metsHdr/agent/name
+
Cardinality
1..1
+
+
+
+ + + Preservation agent additional information +

The preservation agent has a note providing a unique identification code for the archival creator.

+
+
METS XPath
metsHdr/agent/note
+
Cardinality
0..1
+
+
+
+ + + Classification of the preservation agent additional information +

The preservation agent note is typed with the value of "IDENTIFICATIONCODE".

+
+
METS XPath
metsHdr/agent/note/@csip:NOTETYPE
+
Cardinality
1..1
+
+
+
+
+ + + + Descriptive metadata +

The SIP dmdSec element should comply with dmdSec requirements in the CSIP profile.

+
+
+
+ + + + Administrative metadata +

The SIP amdSec element should comply with amdSec requirements in the CSIP profile.

+
+
+
+ + + + File format name +

An optional attribute may be used if the MIMETYPE is not sufficient for the purposes of processing the information package.

+

Example: "Extensible Markup Language"

+

Example: "PDF/A"

+

Example: "ISO/IEC 26300:2006"

+
+
METS XPath
fileSec/fileGrp/file/@sip:FILEFORMATNAME
+
Cardinality
0..1
+
+
+
+ + + File format version +

The version of the file format when the use of PREMIS has not been agreed upon in the submission agreement.

+

Example: "1.0"

+
+
METS XPath
fileSec/fileGrp/file/@sip:FILEFORMATVERSION
+
Cardinality
0..1
+
+
+
+ + + File format registry +

The name of the format registry used to identify the file format when the use of PREMIS has not been agreed upon in the submission agreement.

+

Example: "PRONOM"

+
+
METS XPath
fileSec/fileGrp/file/@sip:FILEFORMATREGISTRY
+
Cardinality
0..1
+
+
+
+ + + File format registry key +

Key of the file format in the registry when use of PREMIS has not been agreed upon in the submission agreement.

+

Example: "fmt/101"

+
+
METS XPath
fileSec/fileGrp/file/@sip:FILEFORMATKEY
+
Cardinality
0..1
+
+
+
+
+ + + + Structural description of the package +

The SIP structMap element should comply with structMap requirements in the CSIP profile.

+
+
+
+ + + + structLink +

Section not defined or used in CSIP, additional own uses may occur.

+

Information regarding the structural links is found in the METS Primer

+
+
+
+ + + + behaviorSec +

Section not defined or used in CSIP, additional own uses may occur.

+

Information regarding the behavior section is found in the METS Primer

+
+
+
+
+ + + + +

Requirements not stated in CSIP or SIP

+
+
+
+ + + +

Requirements not stated in CSIP or SIP

+
+
+
+ + + +

Requirements not stated in CSIP or SIP

+
+
+
+
+ + ESSArch (ETP, ETA, EPP, ECORE) + https://github.com/ESSolutions + +

A suite of tools for e-archiving and digital preservation. The tools provide functionality for producers to archive digital information, for archives to preserve digital information and for consumers to access archived information.

+
+ +

ES Solutions - www.essolutions.se

+
+
+ + RODA + http://github.com/keeps/roda + +

RODA is a digital repository solution that delivers functionality for all the main units of the OAIS reference model. RODA is capable of ingesting, managing and providing access to the various types of digital objects produced by large corporations or public bodies. RODA is based on open-source technologies and is supported by existing standards such as the Open Archival Information System (OAIS), Metadata Encoding and Transmission Standard (METS), Encoded Archival Description (EAD), Dublin Core (DC) and PREMIS (Preservation Metadata).

+
+ +

RODA is licensed under LGPLv3 for all source-code including interoperability libraries like SIP manipulation libraries.

+
+
+ + RODA-in + https://rodain.roda-community.org + +

RODA-in is a tool specially designed for producers and archivists to create Submission Information Packages (SIP) ready to be submitted to an Open Archival Information System (OAIS). The tool creates SIPs from files and folders available on the local file system.

+

+ RODA-in supports several Submission Information Package formats such as BagIt, E-ARK SIP and the Hungarian SIP format. +

+
+ +

RODA-in is licensed under LGPLv3.

+
+
+ + + + + + + + The Swedish health agency + VAT:SE201345098701 + + + The agency, Personnel + VAT:SE2098109810-AF87 + + + Sven Svensson + Phone: 08-123456, Email: sven.svensson@mail.mail + + + The archives + ID:1234567 + + http://submissionagreement.kb.se/dnr331-1144-2011/20120711/ + FM 12-2387/12726, 2007-09-19 + SE/RA/123456/24/P + SE/FM/123/123.1/123.1.3 + + + + + The Swedish health agency + VAT:SE201345098701 + + + + + The agency, Personnel + VAT:SE2098109810-AF87 + + + + + Sven Svensson + Phone: 08-123456, Email: sven.svensson@mail.mail + + + + + The archives + ID:1234567 + + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + The Swedish health agency + VAT:SE201345098701 + + + The agency, Personnel + VAT:SE2098109810-AF87 + + + Sven Svensson + Phone: 08-123456, Email: sven.svensson@mail.mail + + + The archives + ID:1234567 + + + http://submissionagreement.kb.se/dnr331-1144-2011/20120711/ + + + FM 12-2387/12726, 2007-09-19 + + + SE/RA/123456/24/P + + + SE/FM/123/123.1/123.1.3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/eark_validator/ipxml/resources/profiles/__init__.py b/eark_validator/ipxml/resources/profiles/__init__.py index e69de29..66b57be 100644 --- a/eark_validator/ipxml/resources/profiles/__init__.py +++ b/eark_validator/ipxml/resources/profiles/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package validation + Information Package XML METS Profiles +""" diff --git a/eark_validator/ipxml/resources/schema/__init__.py b/eark_validator/ipxml/resources/schema/__init__.py index e69de29..fb885e7 100644 --- a/eark_validator/ipxml/resources/schema/__init__.py +++ b/eark_validator/ipxml/resources/schema/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package XML schema +""" diff --git a/eark_validator/ipxml/resources/schema/mets.csip.local.v2-0.xsd b/eark_validator/ipxml/resources/schema/mets.csip.local.v2-0.xsd index c5f7c89..fad30c4 100644 --- a/eark_validator/ipxml/resources/schema/mets.csip.local.v2-0.xsd +++ b/eark_validator/ipxml/resources/schema/mets.csip.local.v2-0.xsd @@ -4,5 +4,4 @@ - diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_amdSec_rules.xml similarity index 86% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_amdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_amdSec_rules.xml index 16cf669..faef768 100644 --- a/eark_validator/ipxml/resources/schematron/CSIP/mets_amdSec_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_amdSec_rules.xml @@ -10,7 +10,8 @@ Mandatory, unique id for the digital provenance. - Should be used to indicate the status of the package. + SHOULD be used to indicated the status of the package. + SHOULD be used to indicated the status of the package. Should provide a reference to the digital provenance metadata file stored in the “metadata” section of the IP. @@ -18,7 +19,7 @@ Attribute MUST be used with the value “simple”. Value list is maintained by the xlink standard. MUST record the actual location of the resource. This specification recommends recording a URL type filepath within this attribute. MUST record the type of metadata at the referenced location. - MUST record the MIME type of the referenced file. + MUST record the MIME type of the referenced file. MUST record the size in bytes of the referenced file. MUST record the date the referenced file was created. MUST record the checksum of the referenced file. @@ -26,7 +27,8 @@ Mandatory, unique id for the rights metadata. - Should be used to indicate the status of the package. + SHOULD be used to indicated the status of the package. + SHOULD be used to indicated the status of the package. Should provide a reference to the digital provenance metadata file stored in the “metadata” section of the IP. diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_behaviorSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_behaviorSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_behaviorSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_dmdSec_rules.xml similarity index 79% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_dmdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_dmdSec_rules.xml index 70f7603..24b6d83 100644 --- a/eark_validator/ipxml/resources/schematron/CSIP/mets_dmdSec_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_dmdSec_rules.xml @@ -7,15 +7,16 @@ Mandatory, identifier must be unique within the package. Mandatory, creation date of the descriptive metadata in this section. - SHOULD be used to indicated the status of the package. - SHOULD provide a reference to the descriptive metadata file located in the “metadata” section of the IP.. + SHOULD be used to indicated the status of the package. + SHOULD be used to indicated the status of the package. + SHOULD provide a reference to the descriptive metadata file located in the “metadata” section of the IP.. - + The locator type is always used with the value “URL” from the vocabulary in the attribute. Attribute used with the value “simple”. Value list is maintained by the xlink standard. The actual location of the resource. This specification recommends recording a URL type filepath in this attribute. Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS. - MUST hold the IANA mime type of the referenced file. + MUST hold the IANA mime type of the referenced file. MUST hold the size of the referenced file in bytes. MUST hold the creation date of the referenced file. MUST hold the checksum of the referenced file. diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_fileSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_fileSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_fileSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsHdr_rules.xml similarity index 64% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_metsHdr_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsHdr_rules.xml index b6eea90..20fda3c 100644 --- a/eark_validator/ipxml/resources/schematron/CSIP/mets_metsHdr_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsHdr_rules.xml @@ -6,15 +6,15 @@ The metsHdr element MUST have a CREATEDATE attribute. The metsHdr element SHOULD have a LASTMODDATE attribute. - The metsHdr element MUST have a csip:OAISPACKAGETYPE attribute. + The metsHdr element MUST have a csip:OAISPACKAGETYPE attribute. The metsHdr element MUST contain an agent element that records the software used to create the package. + The agent element MUST have a ROLE attribute with the value "CREATOR". - - The agent element MUST have a ROLE attribute with the value "CREATOR". + The agent element MUST have a TYPE attribute with the value "OTHER". The agent element MUST have a OTHERTYPE attribute with the value "SOFTWARE". - The agent element MUST have a child name element that records the name of the software tool used to create the IP. - The agent element MUST have a child note element that records the version of the tool used to create the IP. + The agent element MUST have a child name element that records the name of the software tool used to create the IP. + The agent element MUST have a child note element that records the version of the tool used to create the IP. The mandatory agent element’s note child has a @csip:NOTETYPE attribute with a fixed value of “SOFTWARE VERSION”. diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsRootElement_rules.xml similarity index 57% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_metsRootElement_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsRootElement_rules.xml index 42502f8..6dda8ff 100644 --- a/eark_validator/ipxml/resources/schematron/CSIP/mets_metsRootElement_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsRootElement_rules.xml @@ -7,10 +7,10 @@ The mets root element is mandatory. - The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder. For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder. - The mets/@TYPE attibute MUST be used to declare the category of the content held in the package, e.g. book, journal, stereograph, video, etc.. Legal values are defined in a fixed vocabulary. - When the content category used falls outside of the defined vocabulary the mets/@TYPE value must be set to “OTHER” and the specific value declared in mets/@csip:OTHERTYPE. The vocabulary will develop under the curation of the DILCIS Board as additional content information type specifications are produced. - Used to declare the Content Information Type Specification used when creating the package. Legal values are defined in a fixed vocabulary. The attribute is mandatory for representation level METS documents. + The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder. For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder. + The mets/@TYPE attibute MUST be used to declare the category of the content held in the package, e.g. book, journal, stereograph, video, etc.. Legal values are defined in a fixed vocabulary. + When the `mets/@TYPE` attribute has the value "Other" the `mets/@csip:OTHERTYPE` attribute MUST be used to declare the content category of the package/representation. The value can either be "Other" or any other string that are not present in the vocabulary used in the `mets/@TYPE` attribute. + Used to declare the Content Information Type Specification used when creating the package. Legal values are defined in a fixed vocabulary. The attribute is mandatory for representation level METS documents. When the mets/@csip:CONTENTINFORMATIONTYPE has the value “OTHER” the mets/@csip:OTHERCONTENTINFORMATIONTYPE must state the content information type. The PROFILE attribute MUST contain the URL of the METS profile that the information package conforms with. Must be used if descriptive metadata about the package content is available. NOTE: According to official METS documentation each metadata section must describe one and only one set of metadata. As such, if implementers want to include multiple occurrences of descriptive metadata into the package this must be done by repeating the whole dmdSec element for each individual metadata. diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_structLink_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_structLink_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_structLink_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_structMap_rules.xml similarity index 73% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_structMap_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_structMap_rules.xml index efc6796..255cfad 100644 --- a/eark_validator/ipxml/resources/schematron/CSIP/mets_structMap_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_structMap_rules.xml @@ -15,43 +15,48 @@ An xml:id identifier must be unique within the package. - The package’s top-level structural division div element’s @LABEL attribute value must be identical to the package identifier, i.e. the same value as the mets/@OBJID attribute. + The metadata referenced in the administrative and/or descriptive metadata section is described in the structural map with one sub division. + The metadata division div element's @LABEL attribute value must be "Metadata". The documentation referenced in the file section file groups is described in the structural map with one sub division. + The documentation division div element in the package uses the value "Documentation" from the vocabulary as the value for the @LABEL attribute. The schemas referenced in the file section file groups are described in the structural map within a single sub-division. + The schema division div element's @LABEL attribute has the value Schemas from the vocabulary. When no representations are present the content referenced in the file section file group with @USE attribute value “Representations” is described in the structural map as a single sub division. + The package's content division div element must have the @LABEL attribute value "Representations", taken from the vocabulary. + When a package consists of multiple representations, each described by a representation level METS.xml document, there should be a discrete representation div element for each representation. - + An xml:id identifier must be unique within the package. When there is administrative metadata and the amdSec is present, all administrative metadata MUST be referenced via the administrative sections different identifiers. When there are descriptive metadata and one or more dmdSec is present, all descriptive metadata MUST be referenced via the descriptive section identifiers. - + An xml:id identifier must be unique within the package. All file groups containing documentation described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. - + A reference, by ID, to the “Documentation” file group. - + An xml:id identifier must be unique within the package. - All file groups containing documentation described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. + All file groups containing schemas described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. - + The pointer to the identifier for the “Schema” file group. - + An xml:id identifier must be unique within the package. All file groups containing documentation described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. - + The pointer to the identifier for the Representations file group. - + Mandatory, xml:id identifier must be unique within the package. The package’s representation division div element @LABEL attribute value must be the path to the representation level METS document. - The division div of the specific representation includes one occurrence of the METS pointer mptr element, pointing to the appropriate representation METS file. + The division div of the specific representation includes one occurrence of the METS pointer mptr element, pointing to the appropriate representation METS file. - + The file group containing the files described in the package are referenced via the relevant file group identifier. MUST point to the actual location of the resource. We recommend recording a URL type filepath within this attribute. Attribute used with the value “simple”. Value list is maintained by the xlink standard. diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_amdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_amdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_amdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_behaviorSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_behaviorSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_behaviorSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_dmdSec_rules.xml new file mode 100644 index 0000000..2d41696 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_dmdSec_rules.xml @@ -0,0 +1,11 @@ + + + + + + Use of the METS descriptive metadata section. + + Indicates the status of the package using a fixed vocabulary. The status SHOULD in a DIP be set to "CURRENT". + + + diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_fileSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_fileSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_fileSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsHdr_rules.xml new file mode 100644 index 0000000..b85006b --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsHdr_rules.xml @@ -0,0 +1,10 @@ + + + + + Use of the METS header + + The in CSIP added attribute `@csip:OAISPACKAGETYPE` is used with the value "DIP". + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsRootElement_rules.xml new file mode 100644 index 0000000..5fb2315 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsRootElement_rules.xml @@ -0,0 +1,12 @@ + + + + + + Validate METS root element. + + The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. Note that the value of the `mets/@OBJID attribute` for the DIP is expected to be different from the SIP and AIP to reflect the creation of a new package. + The mets/@PROFILE attribute MUST contain the URL of the METS profile that the information package conforms with. + + + diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_structLink_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_structLink_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_structLink_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_structMap_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_structMap_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_structMap_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_amdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_amdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_amdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_behaviorSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_behaviorSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_behaviorSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_dmdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_dmdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_dmdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_fileSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_fileSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_fileSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_metsHdr_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_metsHdr_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_metsHdr_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_metsRootElement_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_metsRootElement_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_metsRootElement_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_structLink_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_structLink_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_structLink_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_structMap_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_structMap_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_structMap_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml new file mode 100644 index 0000000..f4d69e8 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml @@ -0,0 +1,46 @@ + + + + + + Use of the METS administrative metadata section. + + Sould be used to record information about preservation the standard PREMIS is used. + A simple rights statement may be used to describe general permissions for the package. Individual representations should state their specific rights in their representation METS file. + + + Mandatory, unique id for the digital provenance. + SHOULD be used to indicated the status of the package. + SHOULD be used to indicated the status of the package. + Should provide a reference to the digital provenance metadata file stored in the “metadata” section of the IP. + + + Mandatory, locator type is always used with the value “URL” from the vocabulary in the attribute. + Attribute MUST be used with the value “simple”. Value list is maintained by the xlink standard. + MUST record the actual location of the resource. This specification recommends recording a URL type filepath within this attribute. + MUST record the type of metadata at the referenced location. + MUST record the MIME type of the referenced file. + MUST record the size in bytes of the referenced file. + MUST record the date and time the referenced file was created. + MUST record the checksum of the referenced file. + MUST record a value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file. + + + Mandatory, unique id for the rights metadata. + SHOULD be used to indicated the status of the package. + SHOULD be used to indicated the status of the package. + Should provide a reference to the digital provenance metadata file stored in the “metadata” section of the IP. + + + Mandatory, locator type is always used with the value “URL” from the vocabulary in the attribute. + Attribute MUST be used with the value “simple”. Value list is maintained by the xlink standard. + MUST record the actual location of the resource. This specification recommends recording a URL type filepath within this attribute. + MUST record the type of metadata at the referenced location. + MUST record the MIME type of the referenced file. + MUST record the size in bytes of the referenced file. + MUST record the date and time the referenced file was created. + MUST record the checksum of the referenced file. + MUST record a value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_behaviorSec_rules.xml new file mode 100644 index 0000000..4816461 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_behaviorSec_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml new file mode 100644 index 0000000..340827f --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml @@ -0,0 +1,26 @@ + + + + + + Use of the METS descriptive metadata section. + + Mandatory, identifier must be unique within the package. + Mandatory, creation date and time of the descriptive metadata in this section. + SHOULD be used to indicated the status of the package. + SHOULD be used to indicated the status of the package. + SHOULD provide a reference to the descriptive metadata file located in the “metadata” section of the IP.. + + + The locator type is always used with the value “URL” from the vocabulary in the attribute. + Attribute used with the value “simple”. Value list is maintained by the xlink standard. + The actual location of the resource. This specification recommends recording a URL type filepath in this attribute. + Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS. + MUST hold the IANA mime type of the referenced file. + MUST hold the size of the referenced file in bytes. + MUST hold the creation date and time of the referenced file. + MUST hold the checksum of the referenced file. + MUST hold a value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml new file mode 100644 index 0000000..7d3aaa7 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml @@ -0,0 +1,39 @@ + + + + + + Use of the METS file section. + + An xml:id identifier for the file section used for internal package references. + All documentation pertaining to the transferred content is placed in one or more file group elements with mets/fileSec/fileGrp/@USE attribute value “Documentation”. + All XML schemas used in the information package must be referenced from one or more file groups with `mets/fileSec/fileGrp/@USE` attribute value "Schemas". + A pointer to the METS document describing the representation or pointers to the content being transferred must be present in one or more file groups with `mets/fileSec/fileGrp/@USE` attribute value starting with "Representations" followed by the path to the folder where the representation level METS document is placed. For example "Representation/submission" and "Representation/ingest". + + + ADMID attribute used. + When the element "Content Information Type Specification" (CSIP4) has the value "MIXED" or the file group describes a representation, then this element states the content information type specification used for the file group. When the element "Representations file group" (CSIP114), the file group describes a representation with the `mets/fileSec/fileGrp/@USE` attribute value is starting with "Representations", then this element must state the content information type specification used for the representation. + When the `mets/fileSec/fileGrp/@csip:CONTENTINFORMATIONTYPE` attribute has the value "OTHER" the `mets/fileSec/fileGrp/@csip:OTHERCONTENTINFORMATIONTYPE` must state a value for the Content Information Type Specification used. + This attribute is mandatory. + This attribute is mandatory. An xml:id identifier for the file group used for internal package references. It must be unique within the package. + The file group contains the file elements which describe the file objects. + + + This attribute is mandatory. An xml:id identifier for the file group used for internal package references. It must be unique within the package. + MUST record the MIME type of the referenced file. + MUST record the size in bytes of the referenced file. + MUST record the date and time the referenced file was created. + MUST record the checksum of the referenced file. + MUST record a value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file. + A file element has an OWNERID attribute. + A file element has an ADMID attribute. + A file element has an DMDID attribute. + The location of each external file must be defined by the file location `FLocat` element using the same rules as references for metadata files. All references to files should be made using the XLink href attribute and the file protocol using the relative location of the file. + + + Mandatory, locator type is always used with the value “URL” from the vocabulary in the attribute. + Attribute MUST be used with the value “simple”. Value list is maintained by the xlink standard. + MUST record the actual location of the resource. This specification recommends recording a URL type filepath within this attribute. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsHdr_rules.xml new file mode 100644 index 0000000..20fda3c --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsHdr_rules.xml @@ -0,0 +1,21 @@ + + + + + Use of the METS header + + The metsHdr element MUST have a CREATEDATE attribute. + The metsHdr element SHOULD have a LASTMODDATE attribute. + The metsHdr element MUST have a csip:OAISPACKAGETYPE attribute. + The metsHdr element MUST contain an agent element that records the software used to create the package. + The agent element MUST have a ROLE attribute with the value "CREATOR". + + + The agent element MUST have a TYPE attribute with the value "OTHER". + The agent element MUST have a OTHERTYPE attribute with the value "SOFTWARE". + The agent element MUST have a child name element that records the name of the software tool used to create the IP. + The agent element MUST have a child note element that records the version of the tool used to create the IP. + The mandatory agent element’s note child has a @csip:NOTETYPE attribute with a fixed value of “SOFTWARE VERSION”. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml new file mode 100644 index 0000000..c8f5d23 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml @@ -0,0 +1,23 @@ + + + + + Validate METS root element. + + The mets root element is mandatory. + + + The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder. For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder. + The `mets/@TYPE` attribute MUST be used to declare the category of the content held in the package, e.g. "Datasets", "Websites", "Mixes" , "Other", etc.. Legal values are defined in a fixed vocabulary. When the content category used falls outside of the defined vocabulary the `mets/@TYPE` value must be set to "Other" and the specific value declared in `mets/@csip:OTHERTYPE`. The vocabulary will develop under the curation of the DILCIS Board as additional content information type specifications are produced. + When the `mets/@TYPE` attribute has the value "Other" the `mets/@csip:OTHERTYPE` attribute MUST be used to declare the content category of the package/representation. The value can either be "Other" or any other string that are not present in the vocabulary used in the `mets/@TYPE` attribute. + Used to declare the Content Information Type Specification used when creating the package. Legal values are defined in a fixed vocabulary. The attribute is mandatory for representation level METS documents. + When the mets/@csip:CONTENTINFORMATIONTYPE has the value “OTHER” the mets/@csip:OTHERCONTENTINFORMATIONTYPE must state the content information type. + The PROFILE attribute MUST contain the URL of the METS profile that the information package conforms with. + Must be used if descriptive metadata about the package content is available. NOTE: According to official METS documentation each metadata section must describe one and only one set of metadata. As such, if implementers want to include multiple occurrences of descriptive metadata into the package this must be done by repeating the whole dmdSec element for each individual metadata. + If administrative / preservation metadata is available, it must be described using the administrative metadata section (amdSec) element. All administrative metadata is present in a single amdSec element. + References to all transferred content SHOULD be placed in the file section in the different file group elements, described in other requirements. + Every CSIP compliant METS file must include ONE structural map `structMap` element used exactly as described in this section of requirements. + There MUST be a general element that describes the package. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structLink_rules.xml new file mode 100644 index 0000000..eae2368 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structLink_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml new file mode 100644 index 0000000..d6e007f --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml @@ -0,0 +1,65 @@ + + + + + + Use of the METS structural map. + + The mets/structMap/@TYPE attribute must take the value “PHYSICAL” from the vocabulary. + The mets/structMap/@LABEL attribute value is set to “CSIP” from the vocabulary. + + + An xml:id identifier for the structural description (structMap) used for internal package references. It must be unique within the package. + The structural map MUST comprises a single division. + + + An xml:id identifier must be unique within the package. + The metadata referenced in the administrative and/or descriptive metadata section is described in the structural map with one sub division. + The metadata division div element's @LABEL attribute value must be "Metadata". + The documentation referenced in the file section file groups is described in the structural map with one sub division. + The documentation division div element in the package uses the value "Documentation" from the vocabulary as the value for the @LABEL attribute. + The schemas referenced in the file section file groups are described in the structural map within a single sub-division. + The schema division div element's @LABEL attribute has the value Schemas from the vocabulary. + When no representations are present the content referenced in the file section file group with @USE attribute value “Representations” is described in the structural map as a single sub division. + The package's content division div element must have the @LABEL attribute value "Representations", taken from the vocabulary. + When a package consists of multiple representations, each described by a representation level METS.xml document, there should be a discrete representation div element for each representation. + + + An xml:id identifier must be unique within the package. + The admimistrative metadata division should reference all current administrative metadata sections. + The descriptive metadata division should reference all current descriptive metadata sections. + + + An xml:id identifier must be unique within the package. + All file groups containing documentation described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. + + + A reference, by ID, to the “Documentation” file group. + + + An xml:id identifier must be unique within the package. + All file groups containing schemas described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. + + + The pointer to the identifier for the “Schema” file group. + + + An xml:id identifier must be unique within the package. + All file groups containing documentation described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. + + + The pointer to the identifier for the Representations file group. + + + Mandatory, xml:id identifier must be unique within the package. + The package's representation division div element @LABEL attribute value must be the path to the representation level METS document starting with the value "Representations" followed by the main folder name for example "Representations/submission" and "Representations/ingest". + The division div of the specific representation includes one occurrence of the METS pointer mptr element, pointing to the appropriate representation METS file. + + + The file group containing the files described in the package are referenced via the relevant file group identifier. + MUST point to the actual location of the resource. We recommend recording a URL type filepath within this attribute. + Attribute used with the value “simple”. Value list is maintained by the xlink standard. + The locator type is always used with the value “URL” from the vocabulary in the attribute. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_amdSec_rules.xml new file mode 100644 index 0000000..4aaf161 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_amdSec_rules.xml @@ -0,0 +1,8 @@ + + + + + + Use of the METS administrative metadata section. + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_behaviorSec_rules.xml new file mode 100644 index 0000000..4816461 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_behaviorSec_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml new file mode 100644 index 0000000..2d41696 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml @@ -0,0 +1,11 @@ + + + + + + Use of the METS descriptive metadata section. + + Indicates the status of the package using a fixed vocabulary. The status SHOULD in a DIP be set to "CURRENT". + + + diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_fileSec_rules.xml similarity index 52% rename from eark_validator/ipxml/resources/schematron/DIP/mets_metsRootElement_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_fileSec_rules.xml index c4c7fc4..9d92416 100644 --- a/eark_validator/ipxml/resources/schematron/DIP/mets_metsRootElement_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_fileSec_rules.xml @@ -1,8 +1,9 @@ - + - - Validate METS root element. + + + Use of the METS file section. diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml new file mode 100644 index 0000000..b85006b --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml @@ -0,0 +1,10 @@ + + + + + Use of the METS header + + The in CSIP added attribute `@csip:OAISPACKAGETYPE` is used with the value "DIP". + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml new file mode 100644 index 0000000..5fb2315 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml @@ -0,0 +1,12 @@ + + + + + + Validate METS root element. + + The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. Note that the value of the `mets/@OBJID attribute` for the DIP is expected to be different from the SIP and AIP to reflect the creation of a new package. + The mets/@PROFILE attribute MUST contain the URL of the METS profile that the information package conforms with. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structLink_rules.xml new file mode 100644 index 0000000..eae2368 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structLink_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structMap_rules.xml similarity index 67% rename from eark_validator/ipxml/resources/schematron/DIP/mets_metsHdr_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structMap_rules.xml index 43c3715..6c44f94 100644 --- a/eark_validator/ipxml/resources/schematron/DIP/mets_metsHdr_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structMap_rules.xml @@ -1,7 +1,7 @@ - - Use of the METS header + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_amdSec_rules.xml new file mode 100644 index 0000000..4aaf161 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_amdSec_rules.xml @@ -0,0 +1,8 @@ + + + + + + Use of the METS administrative metadata section. + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_behaviorSec_rules.xml new file mode 100644 index 0000000..4816461 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_behaviorSec_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_dmdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_dmdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_dmdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_fileSec_rules.xml new file mode 100644 index 0000000..bbef1c4 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_fileSec_rules.xml @@ -0,0 +1,15 @@ + + + + + + + Use of the METS file section. + + A file element has an FILEFORMATNAME attribute. + A file element has an FILEFORMATVERSION attribute. + A file element has an FILEFORMATREGISTRY attribute. + A file element has an FILEFORMATKEY attribute. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsHdr_rules.xml new file mode 100644 index 0000000..531404d --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsHdr_rules.xml @@ -0,0 +1,31 @@ + + + + + Use of the METS header + + Optional @metsHdr:RECORDSTATUS attribute used to indicate package status. + The metsHdr element MUST have a csip:OAISPACKAGETYPE attribute. + + + Optional altRecordID element identifying submission agreement. + Optional altRecordID element identifying previous submission agreement. + Optional altRecordID element identifying reference code. + Optional altRecordID element identifying previous reference code. + + + Optional METS agent element found. + + + The agent element MUST have a role attribute. + + + The agent element MUST have a role attribute. + Optional METS name element used. + Optional METS note element used. + + + The creator agent element MUST have a NOTETYPE attribute of value IDENTIFICATIONCODE. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsRootElement_rules.xml new file mode 100644 index 0000000..90c6744 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsRootElement_rules.xml @@ -0,0 +1,12 @@ + + + + + + Validate METS root element. + + Optional LABEL attribute used as short text package name. + The PROFILE attribute MUST contain the URL of the METS profile, for a SIP: https://earksip.dilcis.eu/profile/E-ARK-SIP.xml. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structLink_rules.xml new file mode 100644 index 0000000..eae2368 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structLink_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structMap_rules.xml new file mode 100644 index 0000000..6c44f94 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structMap_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/__init__.py b/eark_validator/ipxml/resources/schematron/__init__.py index e69de29..d1ae11d 100644 --- a/eark_validator/ipxml/resources/schematron/__init__.py +++ b/eark_validator/ipxml/resources/schematron/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package XML schematron +""" diff --git a/eark_validator/ipxml/resources/vocabs/IANA.txt b/eark_validator/ipxml/resources/vocabs/IANA.txt new file mode 100644 index 0000000..433005d --- /dev/null +++ b/eark_validator/ipxml/resources/vocabs/IANA.txt @@ -0,0 +1,2127 @@ +application/1d-interleaved-parityfec +application/3gpdash-qoe-report+xml +application/3gppHal+json +application/3gppHalForms+json +application/3gpp-ims+xml +application/A2L +application/ace-groupcomm+cbor +application/ace+cbor +application/ace+json +application/activemessage +application/activity+json +application/aif+cbor +application/aif+json +application/alto-cdni+json +application/alto-cdnifilter+json +application/alto-costmap+json +application/alto-costmapfilter+json +application/alto-directory+json +application/alto-endpointprop+json +application/alto-endpointpropparams+json +application/alto-endpointcost+json +application/alto-endpointcostparams+json +application/alto-error+json +application/alto-networkmapfilter+json +application/alto-networkmap+json +application/alto-propmap+json +application/alto-propmapparams+json +application/alto-tips+json +application/alto-tipsparams+json +application/alto-updatestreamcontrol+json +application/alto-updatestreamparams+json +application/AML +application/andrew-inset +application/applefile +application/at+jwt +application/ATF +application/ATFX +application/atom+xml +application/atomcat+xml +application/atomdeleted+xml +application/atomicmail +application/atomsvc+xml +application/atsc-dwd+xml +application/atsc-dynamic-event-message +application/atsc-held+xml +application/atsc-rdt+json +application/atsc-rsat+xml +application/ATXML +application/auth-policy+xml +application/automationml-aml+xml +application/automationml-amlx+zip +application/bacnet-xdd+zip +application/batch-SMTP +application/beep+xml +application/bufr +application/c2pa +application/calendar+json +application/calendar+xml +application/call-completion +application/CALS-1840 +application/captive+json +application/cbor +application/cbor-seq +application/cccex +application/ccmp+xml +application/ccxml+xml +application/cda+xml +application/CDFX+XML +application/cdmi-capability +application/cdmi-container +application/cdmi-domain +application/cdmi-object +application/cdmi-queue +application/cdni +application/CEA +application/cea-2018+xml +application/cellml+xml +application/cfw +application/cid-edhoc+cbor-seq +application/city+json +application/clr +application/clue_info+xml +application/clue+xml +application/cms +application/cnrp+xml +application/coap-group+json +application/coap-payload +application/commonground +application/concise-problem-details+cbor +application/conference-info+xml +application/cpl+xml +application/cose +application/cose-key +application/cose-key-set +application/cose-x509 +application/csrattrs +application/csta+xml +application/CSTAdata+xml +application/csvm+json +application/cwl +application/cwl+json +application/cwl+yaml +application/cwt +application/cybercash +application/dash+xml +application/dash-patch+xml +application/dashdelta +application/davmount+xml +application/dca-rft +application/DCD +application/dec-dx +application/dialog-info+xml +application/dicom +application/dicom+json +application/dicom+xml +application/DII +application/DIT +application/dns +application/dns+json +application/dns-message +application/dots+cbor +application/dpop+jwt +application/dskpp+xml +application/dssc+der +application/dssc+xml +application/dvcs +application/ecmascript +application/edhoc+cbor-seq +application/EDI-consent +application/EDIFACT +application/EDI-X12 +application/efi +application/elm+json +application/elm+xml +application/EmergencyCallData.cap+xml +application/EmergencyCallData.Comment+xml +application/EmergencyCallData.Control+xml +application/EmergencyCallData.DeviceInfo+xml +application/EmergencyCallData.eCall.MSD +application/EmergencyCallData.LegacyESN+json +application/EmergencyCallData.ProviderInfo+xml +application/EmergencyCallData.ServiceInfo+xml +application/EmergencyCallData.SubscriberInfo+xml +application/EmergencyCallData.VEDS+xml +application/emma+xml +application/emotionml+xml +application/encaprtp +application/epp+xml +application/epub+zip +application/eshop +application/example +application/exi +application/expect-ct-report+json +application/express +application/fastinfoset +application/fastsoap +application/fdf +application/fdt+xml +application/fhir+json +application/fhir+xml +application/fits +application/flexfec +application/font-sfnt +application/font-tdpfr +application/font-woff +application/framework-attributes+xml +application/geo+json +application/geo+json-seq +application/geopackage+sqlite3 +application/geoxacml+json +application/geoxacml+xml +application/gltf-buffer +application/gml+xml +application/gnap-binding-jws +application/gnap-binding-jwsd +application/gnap-binding-rotation-jws +application/gnap-binding-rotation-jwsd +application/grib +application/gzip +application/H224 +application/held+xml +application/hl7v2+xml +application/http +application/hyperstudio +application/ibe-key-request+xml +application/ibe-pkg-reply+xml +application/ibe-pp-data +application/iges +application/im-iscomposing+xml +application/index +application/index.cmd +application/index.obj +application/index.response +application/index.vnd +application/inkml+xml +application/IOTP +application/ipfix +application/ipp +application/ISUP +application/its+xml +application/java-archive +application/javascript +application/jf2feed+json +application/jose +application/jose+json +application/jrd+json +application/jscalendar+json +application/jscontact+json +application/json +application/json-patch+json +application/json-seq +application/jsonpath +application/jwk+json +application/jwk-set+json +application/jwt +application/kpml-request+xml +application/kpml-response+xml +application/ld+json +application/lgr+xml +application/link-format +application/linkset +application/linkset+json +application/load-control+xml +application/logout+jwt +application/lost+xml +application/lostsync+xml +application/lpf+zip +application/LXF +application/mac-binhex40 +application/macwriteii +application/mads+xml +application/manifest+json +application/marc +application/marcxml+xml +application/mathematica +application/mathml+xml +application/mathml-content+xml +application/mathml-presentation+xml +application/mbms-associated-procedure-description+xml +application/mbms-deregister+xml +application/mbms-envelope+xml +application/mbms-msk-response+xml +application/mbms-msk+xml +application/mbms-protection-description+xml +application/mbms-reception-report+xml +application/mbms-register-response+xml +application/mbms-register+xml +application/mbms-schedule+xml +application/mbms-user-service-description+xml +application/mbox +application/media_control+xml +application/media-policy-dataset+xml +application/mediaservercontrol+xml +application/merge-patch+json +application/metalink4+xml +application/mets+xml +application/MF4 +application/mikey +application/mipc +application/missing-blocks+cbor-seq +application/mmt-aei+xml +application/mmt-usd+xml +application/mods+xml +application/moss-keys +application/moss-signature +application/mosskey-data +application/mosskey-request +application/mp21 +application/mp4 +application/mpeg4-generic +application/mpeg4-iod +application/mpeg4-iod-xmt +application/mrb-consumer+xml +application/mrb-publish+xml +application/msc-ivr+xml +application/msc-mixer+xml +application/msword +application/mud+json +application/multipart-core +application/mxf +application/n-quads +application/n-triples +application/nasdata +application/news-checkgroups +application/news-groupinfo +application/news-transmission +application/nlsml+xml +application/node +application/nss +application/oauth-authz-req+jwt +application/oblivious-dns-message +application/ocsp-request +application/ocsp-response +application/octet-stream +application/ODA +application/odm+xml +application/ODX +application/oebps-package+xml +application/ogg +application/ohttp-keys +application/opc-nodeset+xml +application/oscore +application/oxps +application/p21 +application/p21+zip +application/p2p-overlay+xml +application/parityfec +application/passport +application/patch-ops-error+xml +application/pdf +application/PDX +application/pem-certificate-chain +application/pgp-encrypted +application/pgp-keys +application/pgp-signature +application/pidf-diff+xml +application/pidf+xml +application/pkcs10 +application/pkcs7-mime +application/pkcs7-signature +application/pkcs8 +application/pkcs8-encrypted +application/pkcs12 +application/pkix-attr-cert +application/pkix-cert +application/pkix-crl +application/pkix-pkipath +application/pkixcmp +application/pls+xml +application/poc-settings+xml +application/postscript +application/ppsp-tracker+json +application/private-token-issuer-directory +application/private-token-request +application/private-token-response +application/problem+json +application/problem+xml +application/provenance+xml +application/prs.alvestrand.titrax-sheet +application/prs.cww +application/prs.cyn +application/prs.hpub+zip +application/prs.implied-document+xml +application/prs.implied-executable +application/prs.implied-object+json +application/prs.implied-object+json-seq +application/prs.implied-object+yaml +application/prs.implied-structure +application/prs.nprend +application/prs.plucker +application/prs.rdf-xml-crypt +application/prs.vcfbzip2 +application/prs.xsf+xml +application/pskc+xml +application/pvd+json +application/rdf+xml +application/route-apd+xml +application/route-s-tsid+xml +application/route-usd+xml +application/QSIG +application/raptorfec +application/rdap+json +application/reginfo+xml +application/relax-ng-compact-syntax +application/remote-printing +application/reputon+json +application/resource-lists-diff+xml +application/resource-lists+xml +application/rfc+xml +application/riscos +application/rlmi+xml +application/rls-services+xml +application/rpki-checklist +application/rpki-ghostbusters +application/rpki-manifest +application/rpki-publication +application/rpki-roa +application/rpki-signed-tal +application/rpki-updown +application/rtf +application/rtploopback +application/rtx +application/samlassertion+xml +application/samlmetadata+xml +application/sarif-external-properties+json +application/sarif+json +application/sbe +application/sbml+xml +application/scaip+xml +application/scim+json +application/scvp-cv-request +application/scvp-cv-response +application/scvp-vp-request +application/scvp-vp-response +application/sdp +application/secevent+jwt +application/senml-etch+cbor +application/senml-etch+json +application/senml-exi +application/senml+cbor +application/senml+json +application/senml+xml +application/sensml-exi +application/sensml+cbor +application/sensml+json +application/sensml+xml +application/sep-exi +application/sep+xml +application/session-info +application/set-payment +application/set-payment-initiation +application/set-registration +application/set-registration-initiation +application/SGML +application/sgml-open-catalog +application/shf+xml +application/sieve +application/simple-filter+xml +application/simple-message-summary +application/simpleSymbolContainer +application/sipc +application/slate +application/smil +application/smil+xml +application/smpte336m +application/soap+fastinfoset +application/soap+xml +application/sparql-query +application/spdx+json +application/sparql-results+xml +application/spirits-event+xml +application/sql +application/srgs +application/srgs+xml +application/sru+xml +application/sslkeylogfile +application/ssml+xml +application/ST2110-41 +application/stix+json +application/stratum +application/swid+cbor +application/swid+xml +application/tamp-apex-update +application/tamp-apex-update-confirm +application/tamp-community-update +application/tamp-community-update-confirm +application/tamp-error +application/tamp-sequence-adjust +application/tamp-sequence-adjust-confirm +application/tamp-status-query +application/tamp-status-response +application/tamp-update +application/tamp-update-confirm +application/taxii+json +application/td+json +application/tei+xml +application/TETRA_ISI +application/thraud+xml +application/timestamp-query +application/timestamp-reply +application/timestamped-data +application/tlsrpt+gzip +application/tlsrpt+json +application/tm+json +application/tnauthlist +application/token-introspection+jwt +application/trickle-ice-sdpfrag +application/trig +application/ttml+xml +application/tve-trigger +application/tzif +application/tzif-leap +application/ulpfec +application/urc-grpsheet+xml +application/urc-ressheet+xml +application/urc-targetdesc+xml +application/urc-uisocketdesc+xml +application/vc +application/vcard+json +application/vcard+xml +application/vemmi +application/vnd.1000minds.decision-model+xml +application/vnd.1ob +application/vnd.3gpp.5gnas +application/vnd.3gpp.5gsa2x +application/vnd.3gpp.5gsa2x-local-service-information +application/vnd.3gpp.access-transfer-events+xml +application/vnd.3gpp.bsf+xml +application/vnd.3gpp.crs+xml +application/vnd.3gpp.current-location-discovery+xml +application/vnd.3gpp.GMOP+xml +application/vnd.3gpp.gtpc +application/vnd.3gpp.interworking-data +application/vnd.3gpp.lpp +application/vnd.3gpp.mc-signalling-ear +application/vnd.3gpp.mcdata-affiliation-command+xml +application/vnd.3gpp.mcdata-info+xml +application/vnd.3gpp.mcdata-msgstore-ctrl-request+xml +application/vnd.3gpp.mcdata-payload +application/vnd.3gpp.mcdata-regroup+xml +application/vnd.3gpp.mcdata-service-config+xml +application/vnd.3gpp.mcdata-signalling +application/vnd.3gpp.mcdata-ue-config+xml +application/vnd.3gpp.mcdata-user-profile+xml +application/vnd.3gpp.mcptt-affiliation-command+xml +application/vnd.3gpp.mcptt-floor-request+xml +application/vnd.3gpp.mcptt-info+xml +application/vnd.3gpp.mcptt-location-info+xml +application/vnd.3gpp.mcptt-mbms-usage-info+xml +application/vnd.3gpp.mcptt-regroup+xml +application/vnd.3gpp.mcptt-service-config+xml +application/vnd.3gpp.mcptt-signed+xml +application/vnd.3gpp.mcptt-ue-config+xml +application/vnd.3gpp.mcptt-ue-init-config+xml +application/vnd.3gpp.mcptt-user-profile+xml +application/vnd.3gpp.mcvideo-affiliation-command+xml +application/vnd.3gpp.mcvideo-affiliation-info+xml +application/vnd.3gpp.mcvideo-info+xml +application/vnd.3gpp.mcvideo-location-info+xml +application/vnd.3gpp.mcvideo-mbms-usage-info+xml +application/vnd.3gpp.mcvideo-regroup+xml +application/vnd.3gpp.mcvideo-service-config+xml +application/vnd.3gpp.mcvideo-transmission-request+xml +application/vnd.3gpp.mcvideo-ue-config+xml +application/vnd.3gpp.mcvideo-user-profile+xml +application/vnd.3gpp.mid-call+xml +application/vnd.3gpp.ngap +application/vnd.3gpp.pfcp +application/vnd.3gpp.pic-bw-large +application/vnd.3gpp.pic-bw-small +application/vnd.3gpp.pic-bw-var +application/vnd.3gpp.pinapp-info+xml +application/vnd.3gpp-prose-pc3a+xml +application/vnd.3gpp-prose-pc3ach+xml +application/vnd.3gpp-prose-pc3ch+xml +application/vnd.3gpp-prose-pc8+xml +application/vnd.3gpp-prose+xml +application/vnd.3gpp.s1ap +application/vnd.3gpp.seal-group-doc+xml +application/vnd.3gpp.seal-info+xml +application/vnd.3gpp.seal-location-info+xml +application/vnd.3gpp.seal-mbms-usage-info+xml +application/vnd.3gpp.seal-network-QoS-management-info+xml +application/vnd.3gpp.seal-ue-config-info+xml +application/vnd.3gpp.seal-unicast-info+xml +application/vnd.3gpp.seal-user-profile-info+xml +application/vnd.3gpp.sms +application/vnd.3gpp.sms+xml +application/vnd.3gpp.srvcc-ext+xml +application/vnd.3gpp.SRVCC-info+xml +application/vnd.3gpp.state-and-event-info+xml +application/vnd.3gpp.ussd+xml +application/vnd.3gpp.vae-info+xml +application/vnd.3gpp-v2x-local-service-information +application/vnd.3gpp2.bcmcsinfo+xml +application/vnd.3gpp2.sms +application/vnd.3gpp2.tcap +application/vnd.3gpp.v2x +application/vnd.3lightssoftware.imagescal +application/vnd.3M.Post-it-Notes +application/vnd.accpac.simply.aso +application/vnd.accpac.simply.imp +application/vnd.acm.addressxfer+json +application/vnd.acm.chatbot+json +application/vnd.acucobol +application/vnd.acucorp +application/vnd.adobe.flash.movie +application/vnd.adobe.formscentral.fcdt +application/vnd.adobe.fxp +application/vnd.adobe.partial-upload +application/vnd.adobe.xdp+xml +application/vnd.aether.imp +application/vnd.afpc.afplinedata +application/vnd.afpc.afplinedata-pagedef +application/vnd.afpc.cmoca-cmresource +application/vnd.afpc.foca-charset +application/vnd.afpc.foca-codedfont +application/vnd.afpc.foca-codepage +application/vnd.afpc.modca +application/vnd.afpc.modca-cmtable +application/vnd.afpc.modca-formdef +application/vnd.afpc.modca-mediummap +application/vnd.afpc.modca-objectcontainer +application/vnd.afpc.modca-overlay +application/vnd.afpc.modca-pagesegment +application/vnd.age +application/vnd.ah-barcode +application/vnd.ahead.space +application/vnd.airzip.filesecure.azf +application/vnd.airzip.filesecure.azs +application/vnd.amadeus+json +application/vnd.amazon.mobi8-ebook +application/vnd.americandynamics.acc +application/vnd.amiga.ami +application/vnd.amundsen.maze+xml +application/vnd.android.ota +application/vnd.anki +application/vnd.anser-web-certificate-issue-initiation +application/vnd.antix.game-component +application/vnd.apache.arrow.file +application/vnd.apache.arrow.stream +application/vnd.apache.parquet +application/vnd.apache.thrift.binary +application/vnd.apache.thrift.compact +application/vnd.apache.thrift.json +application/vnd.apexlang +application/vnd.api+json +application/vnd.aplextor.warrp+json +application/vnd.apothekende.reservation+json +application/vnd.apple.installer+xml +application/vnd.apple.keynote +application/vnd.apple.mpegurl +application/vnd.apple.numbers +application/vnd.apple.pages +application/vnd.arastra.swi +application/vnd.aristanetworks.swi +application/vnd.artisan+json +application/vnd.artsquare +application/vnd.astraea-software.iota +application/vnd.audiograph +application/vnd.autopackage +application/vnd.avalon+json +application/vnd.avistar+xml +application/vnd.balsamiq.bmml+xml +application/vnd.banana-accounting +application/vnd.bbf.usp.error +application/vnd.bbf.usp.msg +application/vnd.bbf.usp.msg+json +application/vnd.balsamiq.bmpr +application/vnd.bekitzur-stech+json +application/vnd.belightsoft.lhzd+zip +application/vnd.belightsoft.lhzl+zip +application/vnd.bint.med-content +application/vnd.biopax.rdf+xml +application/vnd.blink-idb-value-wrapper +application/vnd.blueice.multipass +application/vnd.bluetooth.ep.oob +application/vnd.bluetooth.le.oob +application/vnd.bmi +application/vnd.bpf +application/vnd.bpf3 +application/vnd.businessobjects +application/vnd.byu.uapi+json +application/vnd.bzip3 +application/vnd.c3voc.schedule+xml +application/vnd.cab-jscript +application/vnd.canon-cpdl +application/vnd.canon-lips +application/vnd.capasystems-pg+json +application/vnd.cendio.thinlinc.clientconf +application/vnd.century-systems.tcp_stream +application/vnd.chemdraw+xml +application/vnd.chess-pgn +application/vnd.chipnuts.karaoke-mmd +application/vnd.ciedi +application/vnd.cinderella +application/vnd.cirpack.isdn-ext +application/vnd.citationstyles.style+xml +application/vnd.claymore +application/vnd.cloanto.rp9 +application/vnd.clonk.c4group +application/vnd.cluetrust.cartomobile-config +application/vnd.cluetrust.cartomobile-config-pkg +application/vnd.cncf.helm.chart.content.v1.tar+gzip +application/vnd.cncf.helm.chart.provenance.v1.prov +application/vnd.cncf.helm.config.v1+json +application/vnd.coffeescript +application/vnd.collabio.xodocuments.document +application/vnd.collabio.xodocuments.document-template +application/vnd.collabio.xodocuments.presentation +application/vnd.collabio.xodocuments.presentation-template +application/vnd.collabio.xodocuments.spreadsheet +application/vnd.collabio.xodocuments.spreadsheet-template +application/vnd.collection.doc+json +application/vnd.collection+json +application/vnd.collection.next+json +application/vnd.comicbook-rar +application/vnd.comicbook+zip +application/vnd.commerce-battelle +application/vnd.commonspace +application/vnd.coreos.ignition+json +application/vnd.cosmocaller +application/vnd.contact.cmsg +application/vnd.crick.clicker +application/vnd.crick.clicker.keyboard +application/vnd.crick.clicker.palette +application/vnd.crick.clicker.template +application/vnd.crick.clicker.wordbank +application/vnd.criticaltools.wbs+xml +application/vnd.cryptii.pipe+json +application/vnd.crypto-shade-file +application/vnd.cryptomator.encrypted +application/vnd.cryptomator.vault +application/vnd.ctc-posml +application/vnd.ctct.ws+xml +application/vnd.cups-pdf +application/vnd.cups-postscript +application/vnd.cups-ppd +application/vnd.cups-raster +application/vnd.cups-raw +application/vnd.curl +application/vnd.cyan.dean.root+xml +application/vnd.cybank +application/vnd.cyclonedx+json +application/vnd.cyclonedx+xml +application/vnd.d2l.coursepackage1p0+zip +application/vnd.d3m-dataset +application/vnd.d3m-problem +application/vnd.dart +application/vnd.data-vision.rdz +application/vnd.datalog +application/vnd.datapackage+json +application/vnd.dataresource+json +application/vnd.dbf +application/vnd.debian.binary-package +application/vnd.dece.data +application/vnd.dece.ttml+xml +application/vnd.dece.unspecified +application/vnd.dece.zip +application/vnd.denovo.fcselayout-link +application/vnd.desmume.movie +application/vnd.dir-bi.plate-dl-nosuffix +application/vnd.dm.delegation+xml +application/vnd.dna +application/vnd.document+json +application/vnd.dolby.mobile.1 +application/vnd.dolby.mobile.2 +application/vnd.doremir.scorecloud-binary-document +application/vnd.dpgraph +application/vnd.dreamfactory +application/vnd.drive+json +application/vnd.dtg.local +application/vnd.dtg.local.flash +application/vnd.dtg.local.html +application/vnd.dvb.ait +application/vnd.dvb.dvbisl+xml +application/vnd.dvb.dvbj +application/vnd.dvb.esgcontainer +application/vnd.dvb.ipdcdftnotifaccess +application/vnd.dvb.ipdcesgaccess +application/vnd.dvb.ipdcesgaccess2 +application/vnd.dvb.ipdcesgpdd +application/vnd.dvb.ipdcroaming +application/vnd.dvb.iptv.alfec-base +application/vnd.dvb.iptv.alfec-enhancement +application/vnd.dvb.notif-aggregate-root+xml +application/vnd.dvb.notif-container+xml +application/vnd.dvb.notif-generic+xml +application/vnd.dvb.notif-ia-msglist+xml +application/vnd.dvb.notif-ia-registration-request+xml +application/vnd.dvb.notif-ia-registration-response+xml +application/vnd.dvb.notif-init+xml +application/vnd.dvb.pfr +application/vnd.dvb.service +application/vnd.dxr +application/vnd.dynageo +application/vnd.dzr +application/vnd.easykaraoke.cdgdownload +application/vnd.ecip.rlp +application/vnd.ecdis-update +application/vnd.eclipse.ditto+json +application/vnd.ecowin.chart +application/vnd.ecowin.filerequest +application/vnd.ecowin.fileupdate +application/vnd.ecowin.series +application/vnd.ecowin.seriesrequest +application/vnd.ecowin.seriesupdate +application/vnd.efi.img +application/vnd.efi.iso +application/vnd.eln+zip +application/vnd.emclient.accessrequest+xml +application/vnd.enliven +application/vnd.enphase.envoy +application/vnd.eprints.data+xml +application/vnd.epson.esf +application/vnd.epson.msf +application/vnd.epson.quickanime +application/vnd.epson.salt +application/vnd.epson.ssf +application/vnd.ericsson.quickcall +application/vnd.erofs +application/vnd.espass-espass+zip +application/vnd.eszigno3+xml +application/vnd.etsi.aoc+xml +application/vnd.etsi.asic-s+zip +application/vnd.etsi.asic-e+zip +application/vnd.etsi.cug+xml +application/vnd.etsi.iptvcommand+xml +application/vnd.etsi.iptvdiscovery+xml +application/vnd.etsi.iptvprofile+xml +application/vnd.etsi.iptvsad-bc+xml +application/vnd.etsi.iptvsad-cod+xml +application/vnd.etsi.iptvsad-npvr+xml +application/vnd.etsi.iptvservice+xml +application/vnd.etsi.iptvsync+xml +application/vnd.etsi.iptvueprofile+xml +application/vnd.etsi.mcid+xml +application/vnd.etsi.mheg5 +application/vnd.etsi.overload-control-policy-dataset+xml +application/vnd.etsi.pstn+xml +application/vnd.etsi.sci+xml +application/vnd.etsi.simservs+xml +application/vnd.etsi.timestamp-token +application/vnd.etsi.tsl+xml +application/vnd.etsi.tsl.der +application/vnd.eu.kasparian.car+json +application/vnd.eudora.data +application/vnd.evolv.ecig.profile +application/vnd.evolv.ecig.settings +application/vnd.evolv.ecig.theme +application/vnd.exstream-empower+zip +application/vnd.exstream-package +application/vnd.ezpix-album +application/vnd.ezpix-package +application/vnd.f-secure.mobile +application/vnd.fastcopy-disk-image +application/vnd.familysearch.gedcom+zip +application/vnd.fdsn.mseed +application/vnd.fdsn.seed +application/vnd.ffsns +application/vnd.ficlab.flb+zip +application/vnd.filmit.zfc +application/vnd.fints +application/vnd.firemonkeys.cloudcell +application/vnd.FloGraphIt +application/vnd.fluxtime.clip +application/vnd.font-fontforge-sfd +application/vnd.framemaker +application/vnd.freelog.comic +application/vnd.frogans.fnc +application/vnd.frogans.ltf +application/vnd.fsc.weblaunch +application/vnd.fujifilm.fb.docuworks +application/vnd.fujifilm.fb.docuworks.binder +application/vnd.fujifilm.fb.docuworks.container +application/vnd.fujifilm.fb.jfi+xml +application/vnd.fujitsu.oasys +application/vnd.fujitsu.oasys2 +application/vnd.fujitsu.oasys3 +application/vnd.fujitsu.oasysgp +application/vnd.fujitsu.oasysprs +application/vnd.fujixerox.ART4 +application/vnd.fujixerox.ART-EX +application/vnd.fujixerox.ddd +application/vnd.fujixerox.docuworks +application/vnd.fujixerox.docuworks.binder +application/vnd.fujixerox.docuworks.container +application/vnd.fujixerox.HBPL +application/vnd.fut-misnet +application/vnd.futoin+cbor +application/vnd.futoin+json +application/vnd.fuzzysheet +application/vnd.ga4gh.passport+jwt +application/vnd.genomatix.tuxedo +application/vnd.genozip +application/vnd.gentics.grd+json +application/vnd.gentoo.catmetadata+xml +application/vnd.gentoo.ebuild +application/vnd.gentoo.eclass +application/vnd.gentoo.gpkg +application/vnd.gentoo.manifest +application/vnd.gentoo.xpak +application/vnd.gentoo.pkgmetadata+xml +application/vnd.geo+json +application/vnd.geocube+xml +application/vnd.geogebra.file +application/vnd.geogebra.slides +application/vnd.geogebra.tool +application/vnd.geometry-explorer +application/vnd.geonext +application/vnd.geoplan +application/vnd.geospace +application/vnd.gerber +application/vnd.globalplatform.card-content-mgt +application/vnd.globalplatform.card-content-mgt-response +application/vnd.gmx +application/vnd.gnu.taler.exchange+json +application/vnd.gnu.taler.merchant+json +application/vnd.google-earth.kml+xml +application/vnd.google-earth.kmz +application/vnd.gov.sk.e-form+xml +application/vnd.gov.sk.e-form+zip +application/vnd.gov.sk.xmldatacontainer+xml +application/vnd.gpxsee.map+xml +application/vnd.grafeq +application/vnd.gridmp +application/vnd.groove-account +application/vnd.groove-help +application/vnd.groove-identity-message +application/vnd.groove-injector +application/vnd.groove-tool-message +application/vnd.groove-tool-template +application/vnd.groove-vcard +application/vnd.hal+json +application/vnd.hal+xml +application/vnd.HandHeld-Entertainment+xml +application/vnd.hbci +application/vnd.hc+json +application/vnd.hcl-bireports +application/vnd.hdt +application/vnd.heroku+json +application/vnd.hhe.lesson-player +application/vnd.hp-HPGL +application/vnd.hp-hpid +application/vnd.hp-hps +application/vnd.hp-jlyt +application/vnd.hp-PCL +application/vnd.hp-PCLXL +application/vnd.hsl +application/vnd.httphone +application/vnd.hydrostatix.sof-data +application/vnd.hyper-item+json +application/vnd.hyper+json +application/vnd.hyperdrive+json +application/vnd.hzn-3d-crossword +application/vnd.ibm.afplinedata +application/vnd.ibm.electronic-media +application/vnd.ibm.MiniPay +application/vnd.ibm.modcap +application/vnd.ibm.rights-management +application/vnd.ibm.secure-container +application/vnd.iccprofile +application/vnd.ieee.1905 +application/vnd.igloader +application/vnd.imagemeter.folder+zip +application/vnd.imagemeter.image+zip +application/vnd.immervision-ivp +application/vnd.immervision-ivu +application/vnd.ims.imsccv1p1 +application/vnd.ims.imsccv1p2 +application/vnd.ims.imsccv1p3 +application/vnd.ims.lis.v2.result+json +application/vnd.ims.lti.v2.toolconsumerprofile+json +application/vnd.ims.lti.v2.toolproxy.id+json +application/vnd.ims.lti.v2.toolproxy+json +application/vnd.ims.lti.v2.toolsettings+json +application/vnd.ims.lti.v2.toolsettings.simple+json +application/vnd.informedcontrol.rms+xml +application/vnd.infotech.project +application/vnd.infotech.project+xml +application/vnd.informix-visionary +application/vnd.innopath.wamp.notification +application/vnd.insors.igm +application/vnd.intercon.formnet +application/vnd.intergeo +application/vnd.intertrust.digibox +application/vnd.intertrust.nncp +application/vnd.intu.qbo +application/vnd.intu.qfx +application/vnd.ipfs.ipns-record +application/vnd.ipld.car +application/vnd.ipld.dag-cbor +application/vnd.ipld.dag-json +application/vnd.ipld.raw +application/vnd.iptc.g2.catalogitem+xml +application/vnd.iptc.g2.conceptitem+xml +application/vnd.iptc.g2.knowledgeitem+xml +application/vnd.iptc.g2.newsitem+xml +application/vnd.iptc.g2.newsmessage+xml +application/vnd.iptc.g2.packageitem+xml +application/vnd.iptc.g2.planningitem+xml +application/vnd.ipunplugged.rcprofile +application/vnd.irepository.package+xml +application/vnd.is-xpr +application/vnd.isac.fcs +application/vnd.jam +application/vnd.iso11783-10+zip +application/vnd.japannet-directory-service +application/vnd.japannet-jpnstore-wakeup +application/vnd.japannet-payment-wakeup +application/vnd.japannet-registration +application/vnd.japannet-registration-wakeup +application/vnd.japannet-setstore-wakeup +application/vnd.japannet-verification +application/vnd.japannet-verification-wakeup +application/vnd.jcp.javame.midlet-rms +application/vnd.jisp +application/vnd.joost.joda-archive +application/vnd.jsk.isdn-ngn +application/vnd.kahootz +application/vnd.kde.karbon +application/vnd.kde.kchart +application/vnd.kde.kformula +application/vnd.kde.kivio +application/vnd.kde.kontour +application/vnd.kde.kpresenter +application/vnd.kde.kspread +application/vnd.kde.kword +application/vnd.kenameaapp +application/vnd.kidspiration +application/vnd.Kinar +application/vnd.koan +application/vnd.kodak-descriptor +application/vnd.las +application/vnd.las.las+json +application/vnd.las.las+xml +application/vnd.laszip +application/vnd.ldev.productlicensing +application/vnd.leap+json +application/vnd.liberty-request+xml +application/vnd.llamagraphics.life-balance.desktop +application/vnd.llamagraphics.life-balance.exchange+xml +application/vnd.logipipe.circuit+zip +application/vnd.loom +application/vnd.lotus-1-2-3 +application/vnd.lotus-approach +application/vnd.lotus-freelance +application/vnd.lotus-notes +application/vnd.lotus-organizer +application/vnd.lotus-screencam +application/vnd.lotus-wordpro +application/vnd.macports.portpkg +application/vnd.mapbox-vector-tile +application/vnd.marlin.drm.actiontoken+xml +application/vnd.marlin.drm.conftoken+xml +application/vnd.marlin.drm.license+xml +application/vnd.marlin.drm.mdcf +application/vnd.mason+json +application/vnd.maxar.archive.3tz+zip +application/vnd.maxmind.maxmind-db +application/vnd.mcd +application/vnd.mdl +application/vnd.mdl-mbsdf +application/vnd.medcalcdata +application/vnd.mediastation.cdkey +application/vnd.medicalholodeck.recordxr +application/vnd.meridian-slingshot +application/vnd.mermaid +application/vnd.MFER +application/vnd.mfmp +application/vnd.micro+json +application/vnd.micrografx.flo +application/vnd.micrografx.igx +application/vnd.microsoft.portable-executable +application/vnd.microsoft.windows.thumbnail-cache +application/vnd.miele+json +application/vnd.mif +application/vnd.minisoft-hp3000-save +application/vnd.mitsubishi.misty-guard.trustweb +application/vnd.Mobius.DAF +application/vnd.Mobius.DIS +application/vnd.Mobius.MBK +application/vnd.Mobius.MQY +application/vnd.Mobius.MSL +application/vnd.Mobius.PLC +application/vnd.Mobius.TXF +application/vnd.modl +application/vnd.mophun.application +application/vnd.mophun.certificate +application/vnd.motorola.flexsuite +application/vnd.motorola.flexsuite.adsi +application/vnd.motorola.flexsuite.fis +application/vnd.motorola.flexsuite.gotap +application/vnd.motorola.flexsuite.kmr +application/vnd.motorola.flexsuite.ttc +application/vnd.motorola.flexsuite.wem +application/vnd.motorola.iprm +application/vnd.mozilla.xul+xml +application/vnd.ms-artgalry +application/vnd.ms-asf +application/vnd.ms-cab-compressed +application/vnd.ms-3mfdocument +application/vnd.ms-excel +application/vnd.ms-excel.addin.macroEnabled.12 +application/vnd.ms-excel.sheet.binary.macroEnabled.12 +application/vnd.ms-excel.sheet.macroEnabled.12 +application/vnd.ms-excel.template.macroEnabled.12 +application/vnd.ms-fontobject +application/vnd.ms-htmlhelp +application/vnd.ms-ims +application/vnd.ms-lrm +application/vnd.ms-office.activeX+xml +application/vnd.ms-officetheme +application/vnd.ms-playready.initiator+xml +application/vnd.ms-powerpoint +application/vnd.ms-powerpoint.addin.macroEnabled.12 +application/vnd.ms-powerpoint.presentation.macroEnabled.12 +application/vnd.ms-powerpoint.slide.macroEnabled.12 +application/vnd.ms-powerpoint.slideshow.macroEnabled.12 +application/vnd.ms-powerpoint.template.macroEnabled.12 +application/vnd.ms-PrintDeviceCapabilities+xml +application/vnd.ms-PrintSchemaTicket+xml +application/vnd.ms-project +application/vnd.ms-tnef +application/vnd.ms-windows.devicepairing +application/vnd.ms-windows.nwprinting.oob +application/vnd.ms-windows.printerpairing +application/vnd.ms-windows.wsd.oob +application/vnd.ms-wmdrm.lic-chlg-req +application/vnd.ms-wmdrm.lic-resp +application/vnd.ms-wmdrm.meter-chlg-req +application/vnd.ms-wmdrm.meter-resp +application/vnd.ms-word.document.macroEnabled.12 +application/vnd.ms-word.template.macroEnabled.12 +application/vnd.ms-works +application/vnd.ms-wpl +application/vnd.ms-xpsdocument +application/vnd.msa-disk-image +application/vnd.mseq +application/vnd.msgpack +application/vnd.msign +application/vnd.multiad.creator +application/vnd.multiad.creator.cif +application/vnd.musician +application/vnd.music-niff +application/vnd.muvee.style +application/vnd.mynfc +application/vnd.nacamar.ybrid+json +application/vnd.nato.bindingdataobject+cbor +application/vnd.nato.bindingdataobject+json +application/vnd.nato.bindingdataobject+xml +application/vnd.nato.openxmlformats-package.iepd+zip +application/vnd.ncd.control +application/vnd.ncd.reference +application/vnd.nearst.inv+json +application/vnd.nebumind.line +application/vnd.nervana +application/vnd.netfpx +application/vnd.neurolanguage.nlu +application/vnd.nimn +application/vnd.nintendo.snes.rom +application/vnd.nintendo.nitro.rom +application/vnd.nitf +application/vnd.noblenet-directory +application/vnd.noblenet-sealer +application/vnd.noblenet-web +application/vnd.nokia.catalogs +application/vnd.nokia.conml+wbxml +application/vnd.nokia.conml+xml +application/vnd.nokia.iptv.config+xml +application/vnd.nokia.iSDS-radio-presets +application/vnd.nokia.landmark+wbxml +application/vnd.nokia.landmark+xml +application/vnd.nokia.landmarkcollection+xml +application/vnd.nokia.ncd +application/vnd.nokia.n-gage.ac+xml +application/vnd.nokia.n-gage.data +application/vnd.nokia.n-gage.symbian.install +application/vnd.nokia.pcd+wbxml +application/vnd.nokia.pcd+xml +application/vnd.nokia.radio-preset +application/vnd.nokia.radio-presets +application/vnd.novadigm.EDM +application/vnd.novadigm.EDX +application/vnd.novadigm.EXT +application/vnd.ntt-local.content-share +application/vnd.ntt-local.file-transfer +application/vnd.ntt-local.ogw_remote-access +application/vnd.ntt-local.sip-ta_remote +application/vnd.ntt-local.sip-ta_tcp_stream +application/vnd.oai.workflows +application/vnd.oai.workflows+json +application/vnd.oai.workflows+yaml +application/vnd.oasis.opendocument.base +application/vnd.oasis.opendocument.chart +application/vnd.oasis.opendocument.chart-template +application/vnd.oasis.opendocument.database +application/vnd.oasis.opendocument.formula +application/vnd.oasis.opendocument.formula-template +application/vnd.oasis.opendocument.graphics +application/vnd.oasis.opendocument.graphics-template +application/vnd.oasis.opendocument.image +application/vnd.oasis.opendocument.image-template +application/vnd.oasis.opendocument.presentation +application/vnd.oasis.opendocument.presentation-template +application/vnd.oasis.opendocument.spreadsheet +application/vnd.oasis.opendocument.spreadsheet-template +application/vnd.oasis.opendocument.text +application/vnd.oasis.opendocument.text-master +application/vnd.oasis.opendocument.text-master-template +application/vnd.oasis.opendocument.text-template +application/vnd.oasis.opendocument.text-web +application/vnd.obn +application/vnd.ocf+cbor +application/vnd.oci.image.manifest.v1+json +application/vnd.oftn.l10n+json +application/vnd.oipf.contentaccessdownload+xml +application/vnd.oipf.contentaccessstreaming+xml +application/vnd.oipf.cspg-hexbinary +application/vnd.oipf.dae.svg+xml +application/vnd.oipf.dae.xhtml+xml +application/vnd.oipf.mippvcontrolmessage+xml +application/vnd.oipf.pae.gem +application/vnd.oipf.spdiscovery+xml +application/vnd.oipf.spdlist+xml +application/vnd.oipf.ueprofile+xml +application/vnd.oipf.userprofile+xml +application/vnd.olpc-sugar +application/vnd.oma.bcast.associated-procedure-parameter+xml +application/vnd.oma.bcast.drm-trigger+xml +application/vnd.oma.bcast.imd+xml +application/vnd.oma.bcast.ltkm +application/vnd.oma.bcast.notification+xml +application/vnd.oma.bcast.provisioningtrigger +application/vnd.oma.bcast.sgboot +application/vnd.oma.bcast.sgdd+xml +application/vnd.oma.bcast.sgdu +application/vnd.oma.bcast.simple-symbol-container +application/vnd.oma.bcast.smartcard-trigger+xml +application/vnd.oma.bcast.sprov+xml +application/vnd.oma.bcast.stkm +application/vnd.oma.cab-address-book+xml +application/vnd.oma.cab-feature-handler+xml +application/vnd.oma.cab-pcc+xml +application/vnd.oma.cab-subs-invite+xml +application/vnd.oma.cab-user-prefs+xml +application/vnd.oma.dcd +application/vnd.oma.dcdc +application/vnd.oma.dd2+xml +application/vnd.oma.drm.risd+xml +application/vnd.oma.group-usage-list+xml +application/vnd.oma.lwm2m+cbor +application/vnd.oma.lwm2m+json +application/vnd.oma.lwm2m+tlv +application/vnd.oma.pal+xml +application/vnd.oma.poc.detailed-progress-report+xml +application/vnd.oma.poc.final-report+xml +application/vnd.oma.poc.groups+xml +application/vnd.oma.poc.invocation-descriptor+xml +application/vnd.oma.poc.optimized-progress-report+xml +application/vnd.oma.push +application/vnd.oma.scidm.messages+xml +application/vnd.oma.xcap-directory+xml +application/vnd.omads-email+xml +application/vnd.omads-file+xml +application/vnd.omads-folder+xml +application/vnd.omaloc-supl-init +application/vnd.oma-scws-config +application/vnd.oma-scws-http-request +application/vnd.oma-scws-http-response +application/vnd.onepager +application/vnd.onepagertamp +application/vnd.onepagertamx +application/vnd.onepagertat +application/vnd.onepagertatp +application/vnd.onepagertatx +application/vnd.onvif.metadata +application/vnd.openblox.game-binary +application/vnd.openblox.game+xml +application/vnd.openeye.oeb +application/vnd.openstreetmap.data+xml +application/vnd.opentimestamps.ots +application/vnd.openxmlformats-officedocument.custom-properties+xml +application/vnd.openxmlformats-officedocument.customXmlProperties+xml +application/vnd.openxmlformats-officedocument.drawing+xml +application/vnd.openxmlformats-officedocument.drawingml.chart+xml +application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml +application/vnd.openxmlformats-officedocument.drawingml.diagramColors+xml +application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml +application/vnd.openxmlformats-officedocument.drawingml.diagramLayout+xml +application/vnd.openxmlformats-officedocument.drawingml.diagramStyle+xml +application/vnd.openxmlformats-officedocument.extended-properties+xml +application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml +application/vnd.openxmlformats-officedocument.presentationml.comments+xml +application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml +application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml +application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml +application/vnd.openxmlformats-officedocument.presentationml.presentation +application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml +application/vnd.openxmlformats-officedocument.presentationml.presProps+xml +application/vnd.openxmlformats-officedocument.presentationml.slide +application/vnd.openxmlformats-officedocument.presentationml.slide+xml +application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml +application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml +application/vnd.openxmlformats-officedocument.presentationml.slideshow +application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml +application/vnd.openxmlformats-officedocument.presentationml.slideUpdateInfo+xml +application/vnd.openxmlformats-officedocument.presentationml.tableStyles+xml +application/vnd.openxmlformats-officedocument.presentationml.tags+xml +application/vnd.openxmlformats-officedocument.presentationml.template +application/vnd.openxmlformats-officedocument.presentationml.template.main+xml +application/vnd.openxmlformats-officedocument.presentationml.viewProps+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.connections+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.dialogsheet+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.externalLink+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheDefinition+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheRecords+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.pivotTable+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.queryTable+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.revisionHeaders+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.revisionLog+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.sheet +application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.sheetMetadata+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.tableSingleCells+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.template +application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.userNames+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.volatileDependencies+xml +application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml +application/vnd.openxmlformats-officedocument.theme+xml +application/vnd.openxmlformats-officedocument.themeOverride+xml +application/vnd.openxmlformats-officedocument.vmlDrawing +application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.document +application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.template +application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml +application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml +application/vnd.openxmlformats-package.core-properties+xml +application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml +application/vnd.openxmlformats-package.relationships+xml +application/vnd.oracle.resource+json +application/vnd.orange.indata +application/vnd.osa.netdeploy +application/vnd.osgeo.mapguide.package +application/vnd.osgi.bundle +application/vnd.osgi.dp +application/vnd.osgi.subsystem +application/vnd.otps.ct-kip+xml +application/vnd.oxli.countgraph +application/vnd.pagerduty+json +application/vnd.palm +application/vnd.panoply +application/vnd.paos.xml +application/vnd.patentdive +application/vnd.patientecommsdoc +application/vnd.pawaafile +application/vnd.pcos +application/vnd.pg.format +application/vnd.pg.osasli +application/vnd.piaccess.application-licence +application/vnd.picsel +application/vnd.pmi.widget +application/vnd.poc.group-advertisement+xml +application/vnd.pocketlearn +application/vnd.powerbuilder6 +application/vnd.powerbuilder6-s +application/vnd.powerbuilder7 +application/vnd.powerbuilder75 +application/vnd.powerbuilder75-s +application/vnd.powerbuilder7-s +application/vnd.preminet +application/vnd.previewsystems.box +application/vnd.proteus.magazine +application/vnd.psfs +application/vnd.pt.mundusmundi +application/vnd.publishare-delta-tree +application/vnd.pvi.ptid1 +application/vnd.pwg-multiplexed +application/vnd.pwg-xhtml-print+xml +application/vnd.qualcomm.brew-app-res +application/vnd.quarantainenet +application/vnd.Quark.QuarkXPress +application/vnd.quobject-quoxdocument +application/vnd.radisys.moml+xml +application/vnd.radisys.msml-audit-conf+xml +application/vnd.radisys.msml-audit-conn+xml +application/vnd.radisys.msml-audit-dialog+xml +application/vnd.radisys.msml-audit-stream+xml +application/vnd.radisys.msml-audit+xml +application/vnd.radisys.msml-conf+xml +application/vnd.radisys.msml-dialog-base+xml +application/vnd.radisys.msml-dialog-fax-detect+xml +application/vnd.radisys.msml-dialog-fax-sendrecv+xml +application/vnd.radisys.msml-dialog-group+xml +application/vnd.radisys.msml-dialog-speech+xml +application/vnd.radisys.msml-dialog-transform+xml +application/vnd.radisys.msml-dialog+xml +application/vnd.radisys.msml+xml +application/vnd.rainstor.data +application/vnd.rapid +application/vnd.rar +application/vnd.realvnc.bed +application/vnd.recordare.musicxml +application/vnd.recordare.musicxml+xml +application/vnd.relpipe +application/vnd.RenLearn.rlprint +application/vnd.resilient.logic +application/vnd.restful+json +application/vnd.rig.cryptonote +application/vnd.route66.link66+xml +application/vnd.rs-274x +application/vnd.ruckus.download +application/vnd.s3sms +application/vnd.sailingtracker.track +application/vnd.sar +application/vnd.sbm.cid +application/vnd.sbm.mid2 +application/vnd.scribus +application/vnd.sealed.3df +application/vnd.sealed.csf +application/vnd.sealed.doc +application/vnd.sealed.eml +application/vnd.sealed.mht +application/vnd.sealed.net +application/vnd.sealed.ppt +application/vnd.sealed.tiff +application/vnd.sealed.xls +application/vnd.sealedmedia.softseal.html +application/vnd.sealedmedia.softseal.pdf +application/vnd.seemail +application/vnd.seis+json +application/vnd.sema +application/vnd.semd +application/vnd.semf +application/vnd.shade-save-file +application/vnd.shana.informed.formdata +application/vnd.shana.informed.formtemplate +application/vnd.shana.informed.interchange +application/vnd.shana.informed.package +application/vnd.shootproof+json +application/vnd.shopkick+json +application/vnd.shp +application/vnd.shx +application/vnd.sigrok.session +application/vnd.SimTech-MindMapper +application/vnd.siren+json +application/vnd.smaf +application/vnd.smart.notebook +application/vnd.smart.teacher +application/vnd.smintio.portals.archive +application/vnd.snesdev-page-table +application/vnd.software602.filler.form+xml +application/vnd.software602.filler.form-xml-zip +application/vnd.solent.sdkm+xml +application/vnd.spotfire.dxp +application/vnd.spotfire.sfs +application/vnd.sqlite3 +application/vnd.sss-cod +application/vnd.sss-dtf +application/vnd.sss-ntf +application/vnd.stepmania.package +application/vnd.stepmania.stepchart +application/vnd.street-stream +application/vnd.sun.wadl+xml +application/vnd.sus-calendar +application/vnd.svd +application/vnd.swiftview-ics +application/vnd.sybyl.mol2 +application/vnd.sycle+xml +application/vnd.syft+json +application/vnd.syncml.dm.notification +application/vnd.syncml.dmddf+xml +application/vnd.syncml.dmtnds+wbxml +application/vnd.syncml.dmtnds+xml +application/vnd.syncml.dmddf+wbxml +application/vnd.syncml.dm+wbxml +application/vnd.syncml.dm+xml +application/vnd.syncml.ds.notification +application/vnd.syncml+xml +application/vnd.tableschema+json +application/vnd.tao.intent-module-archive +application/vnd.tcpdump.pcap +application/vnd.think-cell.ppttc+json +application/vnd.tml +application/vnd.tmd.mediaflex.api+xml +application/vnd.tmobile-livetv +application/vnd.tri.onesource +application/vnd.trid.tpt +application/vnd.triscape.mxs +application/vnd.trueapp +application/vnd.truedoc +application/vnd.ubisoft.webplayer +application/vnd.ufdl +application/vnd.uiq.theme +application/vnd.umajin +application/vnd.unity +application/vnd.uoml+xml +application/vnd.uplanet.alert +application/vnd.uplanet.alert-wbxml +application/vnd.uplanet.bearer-choice +application/vnd.uplanet.bearer-choice-wbxml +application/vnd.uplanet.cacheop +application/vnd.uplanet.cacheop-wbxml +application/vnd.uplanet.channel +application/vnd.uplanet.channel-wbxml +application/vnd.uplanet.list +application/vnd.uplanet.listcmd +application/vnd.uplanet.listcmd-wbxml +application/vnd.uplanet.list-wbxml +application/vnd.uri-map +application/vnd.uplanet.signal +application/vnd.valve.source.material +application/vnd.vcx +application/vnd.vd-study +application/vnd.vectorworks +application/vnd.vel+json +application/vnd.verimatrix.vcas +application/vnd.veritone.aion+json +application/vnd.veryant.thin +application/vnd.ves.encrypted +application/vnd.vidsoft.vidconference +application/vnd.visio +application/vnd.visionary +application/vnd.vividence.scriptfile +application/vnd.vsf +application/vnd.wap.sic +application/vnd.wap.slc +application/vnd.wap.wbxml +application/vnd.wap.wmlc +application/vnd.wap.wmlscriptc +application/vnd.wasmflow.wafl +application/vnd.webturbo +application/vnd.wfa.dpp +application/vnd.wfa.p2p +application/vnd.wfa.wsc +application/vnd.windows.devicepairing +application/vnd.wmc +application/vnd.wmf.bootstrap +application/vnd.wolfram.mathematica +application/vnd.wolfram.mathematica.package +application/vnd.wolfram.player +application/vnd.wordlift +application/vnd.wordperfect +application/vnd.wqd +application/vnd.wrq-hp3000-labelled +application/vnd.wt.stf +application/vnd.wv.csp+xml +application/vnd.wv.csp+wbxml +application/vnd.wv.ssp+xml +application/vnd.xacml+json +application/vnd.xara +application/vnd.xecrets-encrypted +application/vnd.xfdl +application/vnd.xfdl.webform +application/vnd.xmi+xml +application/vnd.xmpie.cpkg +application/vnd.xmpie.dpkg +application/vnd.xmpie.plan +application/vnd.xmpie.ppkg +application/vnd.xmpie.xlim +application/vnd.yamaha.hv-dic +application/vnd.yamaha.hv-script +application/vnd.yamaha.hv-voice +application/vnd.yamaha.openscoreformat.osfpvg+xml +application/vnd.yamaha.openscoreformat +application/vnd.yamaha.remote-setup +application/vnd.yamaha.smaf-audio +application/vnd.yamaha.smaf-phrase +application/vnd.yamaha.through-ngn +application/vnd.yamaha.tunnel-udpencap +application/vnd.yaoweme +application/vnd.yellowriver-custom-menu +application/vnd.youtube.yt +application/vnd.zul +application/vnd.zzazz.deck+xml +application/voicexml+xml +application/voucher-cms+json +application/vp +application/vq-rtcpxr +application/wasm +application/watcherinfo+xml +application/webpush-options+json +application/whoispp-query +application/whoispp-response +application/widget +application/wita +application/wordperfect5.1 +application/wsdl+xml +application/wspolicy+xml +application/x-pki-message +application/x-www-form-urlencoded +application/x-x509-ca-cert +application/x-x509-ca-ra-cert +application/x-x509-next-ca-cert +application/x400-bp +application/xacml+xml +application/xcap-att+xml +application/xcap-caps+xml +application/xcap-diff+xml +application/xcap-el+xml +application/xcap-error+xml +application/xcap-ns+xml +application/xcon-conference-info-diff+xml +application/xcon-conference-info+xml +application/xenc+xml +application/xfdf +application/xhtml+xml +application/xliff+xml +application/xml +application/xml-dtd +application/xml-external-parsed-entity +application/xml-patch+xml +application/xmpp+xml +application/xop+xml +application/xslt+xml +application/xv+xml +application/yaml +application/yang +application/yang-data+cbor +application/yang-data+json +application/yang-data+xml +application/yang-patch+json +application/yang-patch+xml +application/yang-sid+json +application/yin+xml +application/zip +application/zlib +application/zstd +audio/1d-interleaved-parityfec +audio/32kadpcm +audio/3gpp +audio/3gpp2 +audio/aac +audio/ac3 +audio/AMR +audio/AMR-WB +audio/amr-wb+ +audio/aptx +audio/asc +audio/ATRAC-ADVANCED-LOSSLESS +audio/ATRAC-X +audio/ATRAC3 +audio/basic +audio/BV16 +audio/BV32 +audio/clearmode +audio/CN +audio/DAT12 +audio/dls +audio/dsr-es201108 +audio/dsr-es202050 +audio/dsr-es202211 +audio/dsr-es202212 +audio/DV +audio/DVI4 +audio/eac3 +audio/encaprtp +audio/EVRC +audio/EVRC-QCP +audio/EVRC0 +audio/EVRC1 +audio/EVRCB +audio/EVRCB0 +audio/EVRCB1 +audio/EVRCNW +audio/EVRCNW0 +audio/EVRCNW1 +audio/EVRCWB +audio/EVRCWB0 +audio/EVRCWB1 +audio/EVS +audio/example +audio/flac +audio/flexfec +audio/fwdred +audio/G711-0 +audio/G719 +audio/G7221 +audio/G722 +audio/G723 +audio/G726-16 +audio/G726-24 +audio/G726-32 +audio/G726-40 +audio/G728 +audio/G729 +audio/G7291 +audio/G729D +audio/G729E +audio/GSM +audio/GSM-EFR +audio/GSM-HR-08 +audio/iLBC +audio/ip-mr_v2.5 +audio/L8 +audio/L16 +audio/L20 +audio/L24 +audio/LPC +audio/matroska +audio/MELP +audio/MELP600 +audio/MELP1200 +audio/MELP2400 +audio/mhas +audio/midi-clip +audio/mobile-xmf +audio/MPA +audio/mp4 +audio/MP4A-LATM +audio/mpa-robust +audio/mpeg +audio/mpeg4-generic +audio/ogg +audio/opus +audio/parityfec +audio/PCMA +audio/PCMA-WB +audio/PCMU +audio/PCMU-WB +audio/prs.sid +audio/QCELP +audio/raptorfec +audio/RED +audio/rtp-enc-aescm128 +audio/rtploopback +audio/rtp-midi +audio/rtx +audio/scip +audio/SMV +audio/SMV0 +audio/SMV-QCP +audio/sofa +audio/sp-midi +audio/speex +audio/t140c +audio/t38 +audio/telephone-event +audio/TETRA_ACELP +audio/TETRA_ACELP_BB +audio/tone +audio/TSVCIS +audio/UEMCLIP +audio/ulpfec +audio/usac +audio/VDVI +audio/VMR-WB +audio/vnd.3gpp.iufp +audio/vnd.4SB +audio/vnd.audiokoz +audio/vnd.CELP +audio/vnd.cisco.nse +audio/vnd.cmles.radio-events +audio/vnd.cns.anp1 +audio/vnd.cns.inf1 +audio/vnd.dece.audio +audio/vnd.digital-winds +audio/vnd.dlna.adts +audio/vnd.dolby.heaac.1 +audio/vnd.dolby.heaac.2 +audio/vnd.dolby.mlp +audio/vnd.dolby.mps +audio/vnd.dolby.pl2 +audio/vnd.dolby.pl2x +audio/vnd.dolby.pl2z +audio/vnd.dolby.pulse.1 +audio/vnd.dra +audio/vnd.dts +audio/vnd.dts.hd +audio/vnd.dts.uhd +audio/vnd.dvb.file +audio/vnd.everad.plj +audio/vnd.hns.audio +audio/vnd.lucent.voice +audio/vnd.ms-playready.media.pya +audio/vnd.nokia.mobile-xmf +audio/vnd.nortel.vbk +audio/vnd.nuera.ecelp4800 +audio/vnd.nuera.ecelp7470 +audio/vnd.nuera.ecelp9600 +audio/vnd.octel.sbc +audio/vnd.presonus.multitrack +audio/vnd.qcelp +audio/vnd.rhetorex.32kadpcm +audio/vnd.rip +audio/vnd.sealedmedia.softseal.mpeg +audio/vnd.vmx.cvsd +audio/vorbis +audio/vorbis-config +font/collection +font/otf +font/sfnt +font/ttf +font/woff +font/woff2 +haptics/ivs +haptics/hjif +haptics/hmpg +image/aces +image/apng +image/avci +image/avcs +image/avif +image/bmp +image/cgm +image/dicom-rle +image/dpx +image/emf +image/example +image/fits +image/g3fax +image/gif +image/heic +image/heic-sequence +image/heif +image/heif-sequence +image/hej2k +image/hsj2 +image/ief +image/j2c +image/jls +image/jp2 +image/jpeg +image/jph +image/jphc +image/jpm +image/jpx +image/jxl +image/jxr +image/jxrA +image/jxrS +image/jxs +image/jxsc +image/jxsi +image/jxss +image/ktx +image/ktx2 +image/naplps +image/png +image/prs.btif +image/prs.pti +image/pwg-raster +image/svg+xml +image/t38 +image/tiff +image/tiff-fx +image/vnd.adobe.photoshop +image/vnd.airzip.accelerator.azv +image/vnd.cns.inf2 +image/vnd.dece.graphic +image/vnd.djvu +image/vnd.dwg +image/vnd.dxf +image/vnd.dvb.subtitle +image/vnd.fastbidsheet +image/vnd.fpx +image/vnd.fst +image/vnd.fujixerox.edmics-mmr +image/vnd.fujixerox.edmics-rlc +image/vnd.globalgraphics.pgb +image/vnd.microsoft.icon +image/vnd.mix +image/vnd.ms-modi +image/vnd.mozilla.apng +image/vnd.net-fpx +image/vnd.pco.b16 +image/vnd.radiance +image/vnd.sealed.png +image/vnd.sealedmedia.softseal.gif +image/vnd.sealedmedia.softseal.jpg +image/vnd.svf +image/vnd.tencent.tap +image/vnd.valve.source.texture +image/vnd.wap.wbmp +image/vnd.xiff +image/vnd.zbrush.pcx +image/webp +image/wmf +image/x-emf +image/x-wmf +message/bhttp +message/CPIM +message/delivery-status +message/disposition-notification +message/example +message/external-body +message/feedback-report +message/global +message/global-delivery-status +message/global-disposition-notification +message/global-headers +message/http +message/imdn+xml +message/mls +message/news +message/ohttp-req +message/ohttp-res +message/partial +message/rfc822 +message/s-http +message/sip +message/sipfrag +message/tracking-status +message/vnd.si.simp +message/vnd.wfa.wsc +model/3mf +model/e57 +model/example +model/gltf-binary +model/gltf+json +model/JT +model/iges +model/mesh +model/mtl +model/obj +model/prc +model/step +model/step+xml +model/step+zip +model/step-xml+zip +model/stl +model/u3d +model/vnd.bary +model/vnd.cld +model/vnd.collada+xml +model/vnd.dwf +model/vnd.flatland.3dml +model/vnd.gdl +model/vnd.gs-gdl +model/vnd.gtw +model/vnd.moml+xml +model/vnd.mts +model/vnd.opengex +model/vnd.parasolid.transmit.binary +model/vnd.parasolid.transmit.text +model/vnd.pytha.pyox +model/vnd.rosette.annotated-data-model +model/vnd.sap.vds +model/vnd.usda +model/vnd.usdz+zip +model/vnd.valve.source.compiled-map +model/vnd.vtu +model/vrml +model/x3d-vrml +model/x3d+fastinfoset +model/x3d+xml +multipart/alternative +multipart/appledouble +multipart/byteranges +multipart/digest +multipart/encrypted +multipart/example +multipart/form-data +multipart/header-set +multipart/mixed +multipart/multilingual +multipart/parallel +multipart/related +multipart/report +multipart/signed +multipart/vnd.bint.med-plus +multipart/voice-message +multipart/x-mixed-replace +text/1d-interleaved-parityfec +text/cache-manifest +text/calendar +text/cql +text/cql-expression +text/cql-identifier +text/css +text/csv +text/csv-schema +text/directory +text/dns +text/ecmascript +text/encaprtp +text/enriched +text/example +text/fhirpath +text/flexfec +text/fwdred +text/gff3 +text/grammar-ref-list +text/hl7v2 +text/html +text/javascript +text/jcr-cnd +text/markdown +text/mizar +text/n3 +text/parameters +text/parityfec +text/plain +text/provenance-notation +text/prs.fallenstein.rst +text/prs.lines.tag +text/prs.prop.logic +text/prs.texi +text/raptorfec +text/RED +text/rfc822-headers +text/richtext +text/rtf +text/rtp-enc-aescm128 +text/rtploopback +text/rtx +text/SGML +text/shaclc +text/shex +text/spdx +text/strings +text/t140 +text/tab-separated-values +text/troff +text/turtle +text/ulpfec +text/uri-list +text/vcard +text/vnd.a +text/vnd.abc +text/vnd.ascii-art +text/vnd.curl +text/vnd.debian.copyright +text/vnd.DMClientScript +text/vnd.dvb.subtitle +text/vnd.esmertec.theme-descriptor +text/vnd.exchangeable +text/vnd.familysearch.gedcom +text/vnd.ficlab.flt +text/vnd.fly +text/vnd.fmi.flexstor +text/vnd.gml +text/vnd.graphviz +text/vnd.hans +text/vnd.hgl +text/vnd.in3d.3dml +text/vnd.in3d.spot +text/vnd.IPTC.NewsML +text/vnd.IPTC.NITF +text/vnd.latex-z +text/vnd.motorola.reflex +text/vnd.ms-mediapackage +text/vnd.net2phone.commcenter.command +text/vnd.radisys.msml-basic-layout +text/vnd.senx.warpscript +text/vnd.si.uricatalogue +text/vnd.sun.j2me.app-descriptor +text/vnd.sosi +text/vnd.trolltech.linguist +text/vnd.vcf +text/vnd.wap.si +text/vnd.wap.sl +text/vnd.wap.wml +text/vnd.wap.wmlscript +text/vnd.zoo.kcl +text/vtt +text/wgsl +text/xml +text/xml-external-parsed-entity +video/1d-interleaved-parityfec +video/3gpp +video/3gpp2 +video/3gpp-tt +video/AV1 +video/BMPEG +video/BT656 +video/CelB +video/DV +video/encaprtp +video/evc +video/example +video/FFV1 +video/flexfec +video/H261 +video/H263 +video/H263-1998 +video/H263-2000 +video/H264 +video/H264-RCDO +video/H264-SVC +video/H265 +video/H266 +video/iso.segment +video/JPEG +video/jpeg2000 +video/jxsv +video/matroska +video/matroska-3d +video/mj2 +video/MP1S +video/MP2P +video/MP2T +video/mp4 +video/MP4V-ES +video/MPV +video/mpeg +video/mpeg4-generic +video/nv +video/ogg +video/parityfec +video/pointer +video/quicktime +video/raptorfec +video/raw +video/rtp-enc-aescm128 +video/rtploopback +video/rtx +video/scip +video/smpte291 +video/SMPTE292M +video/ulpfec +video/vc1 +video/vc2 +video/vnd.CCTV +video/vnd.dece.hd +video/vnd.dece.mobile +video/vnd.dece.mp4 +video/vnd.dece.pd +video/vnd.dece.sd +video/vnd.dece.video +video/vnd.directv.mpeg +video/vnd.directv.mpeg-tts +video/vnd.dlna.mpeg-tts +video/vnd.dvb.file +video/vnd.fvt +video/vnd.hns.video +video/vnd.iptvforum.1dparityfec-1010 +video/vnd.iptvforum.1dparityfec-2005 +video/vnd.iptvforum.2dparityfec-1010 +video/vnd.iptvforum.2dparityfec-2005 +video/vnd.iptvforum.ttsavc +video/vnd.iptvforum.ttsmpeg2 +video/vnd.motorola.video +video/vnd.motorola.videop +video/vnd.mpegurl +video/vnd.ms-playready.media.pyv +video/vnd.nokia.interleaved-multimedia +video/vnd.nokia.mp4vr +video/vnd.nokia.videovoip +video/vnd.objectvideo +video/vnd.radgamettools.bink +video/vnd.radgamettools.smacker +video/vnd.sealed.mpeg1 +video/vnd.sealed.mpeg4 +video/vnd.sealed.swf +video/vnd.sealedmedia.softseal.mov +video/vnd.uvvu.mp4 +video/vnd.youtube.yt +video/vnd.vivo +video/VP8 +video/VP9 diff --git a/eark_validator/ipxml/resources/vocabs/__init__.py b/eark_validator/ipxml/resources/vocabs/__init__.py new file mode 100644 index 0000000..88fd6de --- /dev/null +++ b/eark_validator/ipxml/resources/vocabs/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package vocabularies +""" diff --git a/eark_validator/ipxml/schematron.py b/eark_validator/ipxml/schematron.py index 5889441..f586e24 100644 --- a/eark_validator/ipxml/schematron.py +++ b/eark_validator/ipxml/schematron.py @@ -1,78 +1,142 @@ -#!/usr/bin/env python +#!/usr/bin/env python # -*- coding: utf-8 -*- -# -# E-ARK Validation -# Copyright (C) 2019 -# All rights reserved. -# -# Licensed to the E-ARK project under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The E-ARK project licenses -# this file to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -"""Module to capture everything schematron validation related.""" -import os -from importlib_resources import files -from typing import Generator - -from lxml import etree as ET -from lxml.isoschematron import Schematron - -from .resources import schematron as SCHEMATRON -from eark_validator.const import NO_PATH, NOT_FILE - -SCHEMATRON_NS = '{http://purl.oclc.org/dsdl/schematron}' -SVRL_NS = '{http://purl.oclc.org/dsdl/svrl}' - -class SchematronRuleset(): - """Encapsulates a set of Schematron rules loaded from a file.""" - def __init__(self, sch_path: str=None): - if not os.path.exists(sch_path): - raise FileNotFoundError(NO_PATH.format(sch_path)) - if not os.path.isfile(sch_path): - raise ValueError(NOT_FILE.format(sch_path)) - self._path = sch_path - try: - self._schematron = Schematron(file=self._path, store_schematron=True, store_report=True) - except ET.SchematronParseError as ex: - raise ValueError('Rules file is not valid XML: {}. {}'.format(sch_path, ex.error_log.last_error.message )) - except KeyError as ex: - raise ValueError('Rules file is not valid Schematron: {}. {}'.format(sch_path, ex.__doc__)) - - @property - def path(self) -> str: - """Return the path to the Schematron rules file.""" - return self._path - - @property - def schematron(self) -> Schematron: - """Return the Schematron object.""" - return self._schematron - - def get_assertions(self) -> Generator[ ET.Element, None, None]: - """Generator that returns the rules one at a time.""" - xml_rules = ET.XML(bytes(self.schematron.schematron)) - for ele in xml_rules.iter(): - if ele.tag == SCHEMATRON_NS + 'assert': - yield ele - - def validate(self, to_validate: str) -> ET.Element: - """Validate a file against the loaded Schematron ruleset.""" - xml_file = ET.parse(to_validate) - self.schematron.validate(xml_file) - return self.schematron.validation_report - -def get_schematron_path(id: str, section: str) -> str: - return str(files(SCHEMATRON).joinpath(id).joinpath('mets_{}_rules.xml'.format(section))) +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +"""Module to capture everything schematron validation related.""" +import os +from urllib.request import urlopen +from typing import Generator + +from importlib_resources import files + +from lxml import etree as ET +from lxml.isoschematron import Schematron + +from eark_validator.const import NO_PATH, NOT_FILE +from .resources import schematron as SCHEMATRON +from .resources import vocabs as vocabularies + +SCHEMATRON_NS = '{http://purl.oclc.org/dsdl/schematron}' +SVRL_NS = '{http://purl.oclc.org/dsdl/svrl}' + +class SchematronTests(): + __vocabulary_definitions = { + '@TYPE': 'https://earkcsip.dilcis.eu/schema/CSIPVocabularyContentCategory.xml', + '@csip:CONTENTINFORMATIONTYPE': 'https://earkcsip.dilcis.eu/schema/CSIPVocabularyContentInformationType.xml', + '@csip:OAISPACKAGETYPE': 'https://earkcsip.dilcis.eu/schema/CSIPVocabularyOAISPackageType.xml', + '@STATUS': 'https://earkcsip.dilcis.eu/schema/CSIPVocabularyStatus.xml' + } + + tests = {} + + def __init__(self): + for attribute, vocabulary_uri in self.__vocabulary_definitions.items(): + self.tests[attribute + '_vocabulary_test'] = self.__create_vocabulary_test(attribute, vocabulary_uri) + + self.tests['@MIMETYPE_IANA_test'] = self.___create_IANA_test() + + def __create_vocabulary_test(self, attribute: str, vocabulary_uri: str) -> str: + vocabulary_tests = [] + for line_bytes in urlopen(vocabulary_uri): + line = line_bytes.decode('utf-8') + if 'Term' not in line: + continue + + start = line.find('>') + 1 + end = line.find('<', start) + + vocabulary_item = line[start:end] + vocabulary_tests.append(f"({attribute} = '{vocabulary_item}')") + + return ' or '.join(vocabulary_tests) + + def ___create_IANA_test(self) -> str: + mime_tests = [] + with open(str(files(vocabularies).joinpath('IANA.txt')), 'r') as iana: + for mime_type in iana: + mime_type = mime_type.rstrip('\n') + mime_tests.append(f"(@MIMETYPE = '{mime_type}')") + + return ' or '.join(mime_tests) + +schematron_tests = SchematronTests() + +class SchematronRuleset(): + """Encapsulates a set of Schematron rules loaded from a file.""" + def __init__(self, sch_path: str=None): + if not os.path.exists(sch_path): + raise FileNotFoundError(NO_PATH.format(sch_path)) + if not os.path.isfile(sch_path): + raise ValueError(NOT_FILE.format(sch_path)) + self._path = sch_path + try: + with open(sch_path) as schematron_file: + schematron_data = schematron_file.read() + for test_name, test_value in schematron_tests.tests.items(): + schematron_data = schematron_data.replace(test_name, test_value) + + tree = ET.XML(schematron_data) + self._schematron = Schematron(etree=tree, store_schematron=True, store_report=True) + except (ET.SchematronParseError, ET.XMLSyntaxError) as ex: + ex_mess = ex.error_log.last_error.message # pylint: disable=E1101 + subject = 'Schematron' + raise ValueError(f'Rules file is not valid {subject}: {sch_path}. {ex_mess}') from ex + except KeyError as ex: + ex_mess = ex.__doc__ + subject = 'XML' + raise ValueError(f'Rules file is not valid {subject}: {sch_path}. {ex_mess}') from ex + + @property + def path(self) -> str: + """Return the path to the Schematron rules file.""" + return self._path + + @property + def schematron(self) -> Schematron: + """Return the Schematron object.""" + return self._schematron + + @property + def assertions(self) -> Generator[ ET.Element, None, None]: + """Generator that returns the assertion rules one at a time.""" + xml_rules = ET.XML(bytes(self.schematron.schematron)) + for ele in xml_rules.iter(): + if ele.tag == SCHEMATRON_NS + 'assert': + yield ele + + @property + def reports(self) -> Generator[ ET.Element, None, None]: + """Generator that returns the report rules one at a time.""" + xml_rules = ET.XML(bytes(self.schematron.schematron)) + for ele in xml_rules.iter(): + if ele.tag == SCHEMATRON_NS + 'report': + yield ele + + def validate(self, to_validate: str) -> ET.Element: + """Validate a file against the loaded Schematron ruleset.""" + xml_file = ET.parse(to_validate) + self.schematron.validate(xml_file) + return self.schematron.validation_report + +def get_schematron_path(version: str, spec_id: str, section: str) -> str: + return str(files(SCHEMATRON).joinpath(version).joinpath(spec_id).joinpath(f'mets_{section}_rules.xml')) diff --git a/eark_validator/mets.py b/eark_validator/mets.py index eca9393..6aaa870 100644 --- a/eark_validator/mets.py +++ b/eark_validator/mets.py @@ -24,47 +24,117 @@ # """METS Schema validation.""" import os +from pathlib import Path +from typing import Dict, List from lxml import etree -from eark_validator.infopacks.manifest import FileItem, Manifest from eark_validator.ipxml.schema import IP_SCHEMA from eark_validator.ipxml.namespaces import Namespaces +from eark_validator.model.checksum import Checksum, ChecksumAlg +from eark_validator.model.metadata import FileEntry, MetsFile, MetsRoot +from eark_validator.model.validation_report import Result +from eark_validator.utils import get_path +from eark_validator.const import NOT_FILE, NOT_VALID_FILE + +NAMESPACES : str = 'namespaces' +OBJID: str = 'objid' +LABEL: str = 'label' +TYPE: str = 'type' +PROFILE: str = 'profile' +OTHERTYPE: str = 'OTHERTYPE' + +START_ELE: str = 'start' +START_NS: str = 'start-ns' + +class MetsFiles(): + @staticmethod + def details_from_mets_root(namespaces: dict[str,str], root_element: etree.Element) -> MetsRoot: + return MetsRoot.model_validate({ + NAMESPACES: namespaces, + OBJID: root_element.get(OBJID.upper(), ''), + LABEL: root_element.get(LABEL.upper(), ''), + TYPE: root_element.get(TYPE.upper(), ''), + PROFILE: root_element.get(PROFILE.upper(), '') + }) + + @staticmethod + def from_file(mets_file: Path | str) -> MetsFile: + path: Path = get_path(mets_file, True) + if not path.is_file(): + raise ValueError(NOT_FILE.format(mets_file)) + ns: dict[str, str] = {} + entries: list[FileEntry] = [] + othertype = contentinformationtype = oaispackagetype = mets_root = '' + try: + parsed_mets = etree.iterparse(mets_file, events=[START_ELE, START_NS]) + for event, element in parsed_mets: + if event == START_NS: + prefix = element[0] + ns_uri = element[1] + ns[prefix] = ns_uri + if event == 'start': + if element.tag == Namespaces.METS.qualify('mets'): + mets_root: MetsRoot = MetsFiles.details_from_mets_root(ns, element) + othertype = element.get(Namespaces.CSIP.qualify(OTHERTYPE), '') + contentinformationtype = element.get( + Namespaces.CSIP.qualify('CONTENTINFORMATIONTYPE'), + '' + ) + elif element.tag == Namespaces.METS.qualify('metsHdr'): + oaispackagetype = element.get( + Namespaces.CSIP.qualify('OAISPACKAGETYPE'), '' + ) + elif element.tag in [ + Namespaces.METS.qualify('file'), + Namespaces.METS.qualify('mdRef') + ]: + entries.append(_parse_file_entry(element)) + except etree.XMLSyntaxError as ex: + raise ValueError(NOT_VALID_FILE.format(mets_file, 'XML')) from ex + return MetsFile.model_validate({ + 'root': mets_root, + 'oaispackagetype': oaispackagetype, + 'othertype': othertype, + 'contentinformationtype': contentinformationtype, + 'file_entries': entries + }) class MetsValidator(): """Encapsulates METS schema validation.""" def __init__(self, root: str): - self._validation_errors = [] - self._package_root = root - self._reps_mets = {} - self._file_refs = [] + self._validation_errors: List[Result] = [] + self._package_root: str = root + self._reps_mets: Dict[str , str] = {} + self._file_refs: List[FileEntry] = [] @property def root(self) -> str: return self._package_root @property - def validation_errors(self) -> list[str]: + def validation_errors(self) -> List[Result]: return self._validation_errors @property - def representations(self) -> list[str]: + def representations(self) -> List[str]: return self._reps_mets.keys() @property - def representation_mets(self) -> list[str]: + def representation_mets(self) -> List[str]: return self._reps_mets.values() @property - def file_references(self) -> list[FileItem]: + def file_references(self) -> List[FileEntry]: return self._file_refs + @property + def is_valid(self) -> bool: + return len(self._validation_errors) == 0 + def get_mets_path(self, rep_name: str) -> str: return self._reps_mets[rep_name] - def get_manifest(self) -> Manifest: - return Manifest.from_file_items(self._package_root, self._file_refs) - def validate_mets(self, mets: str) -> bool: ''' Validates a Mets file. The Mets file is parsed with etree.iterparse(), @@ -79,29 +149,76 @@ def validate_mets(self, mets: str) -> bool: self._package_root, mets = _handle_rel_paths(self._package_root, mets) try: parsed_mets = etree.iterparse(mets, schema=IP_SCHEMA.get('csip')) - for event, element in parsed_mets: + for _, element in parsed_mets: self._process_element(element) except etree.XMLSyntaxError as synt_err: - self._validation_errors.append(synt_err) + self._validation_errors.append( + Result.model_validate({ + 'rule_id': 'XML-1', + 'location': synt_err.filename + str(synt_err.lineno) + str(synt_err.offset), + 'message': f'File {mets} is not valid XML. {synt_err.msg}', + 'severity': 'Error' + }) + ) return len(self._validation_errors) == 0 def _process_element(self, element: etree.Element) -> None: # Define what to do with specific tags. if element.tag == Namespaces.METS.qualify('div') and \ - element.attrib['LABEL'].startswith('Representations/'): + element.attrib['LABEL'].lower().startswith('representations/'): self._process_rep_div(element) return - if element.tag == Namespaces.METS.qualify('file') or element.tag == Namespaces.METS.qualify('mdRef'): - self._file_refs.append(FileItem.from_element(element)) + if element.tag in [ Namespaces.METS.qualify('file'), Namespaces.METS.qualify('mdRef') ]: + self._file_refs.append(_parse_file_entry(element)) def _process_rep_div(self, element: etree.Element) -> None: rep = element.attrib['LABEL'].rsplit('/', 1)[1] for child in element.getchildren(): if child.tag == Namespaces.METS.qualify('mptr'): - self._reps_mets.update({rep: child.attrib[Namespaces.XLINK.qualify('href')]}) + self._reps_mets.update({ + rep: child.attrib[Namespaces.XLINK.qualify('href')] + }) + +def _parse_file_entry(element: etree.Element) -> FileEntry: + """Create a FileItem from an etree element.""" + return FileEntry.model_validate({ + 'path': _path_from_xml_element(element), + 'size': int(element.attrib['SIZE']), + 'checksum': _checksum_from_mets_element(element), + 'mimetype': element.attrib.get('MIMETYPE') or '' + }) + +def _path_from_xml_element(element: etree.Element) -> str: + loc_ele: etree.Element = element + if element.tag in [ Namespaces.METS.qualify('file'), 'file' ]: + tag: str = Namespaces.METS.qualify('FLocat') if hasattr(element, 'nsmap') else 'FLocat' + loc_ele = element.find(tag) + if element.tag in [ + Namespaces.METS.qualify('file'), + 'file', Namespaces.METS.qualify('mdRef'), + 'mdRef' + ]: + return _get_path_attrib(loc_ele) + raise ValueError(f'Element {element.tag} is not a METS:file or METS:mdRef element.') + +def _get_path_attrib(element: etree.Element) -> str: + """Get the path attribute from an etree element.""" + attrib_name = Namespaces.XLINK.qualify('href') if hasattr(element, 'nsmap') else 'href' + return element.attrib.get(attrib_name) or '' + +def _checksum_from_mets_element(element: etree.Element) -> Checksum: + """Create a Checksum from an etree element.""" + # Get the child flocat element and grab the href attribute. + return Checksum.model_validate({ + 'algorithm': ChecksumAlg.from_string(element.attrib['CHECKSUMTYPE']), + 'value': element.attrib['CHECKSUM']}, + strict=True) def _handle_rel_paths(rootpath: str, metspath: str) -> tuple[str, str]: if metspath.startswith('file:///') or os.path.isabs(metspath): return metspath.rsplit('/', 1)[0], metspath - relpath = os.path.join(rootpath, metspath[9:]) if metspath.startswith('file://./') else os.path.join(rootpath, metspath) + if metspath.startswith('file://./'): + relpath = os.path.join(rootpath, metspath[9:]) + else: + relpath = os.path.join(rootpath, metspath) return relpath.rsplit('/', 1)[0], relpath diff --git a/eark_validator/model/__init__.py b/eark_validator/model/__init__.py new file mode 100644 index 0000000..9441015 --- /dev/null +++ b/eark_validator/model/__init__.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package model types and constants. +""" +# import models into model package +from .checksum import Checksum, ChecksumAlg +from .manifest import Manifest, ManifestEntry, SourceType +from .validation_report import ValidationReport +from .package_details import PackageDetails +from .package_details import Representation +from .validation_report import ( + Level, + Severity, + StructureStatus, + StructResults, + Result +) diff --git a/eark_validator/model/checksum.py b/eark_validator/model/checksum.py new file mode 100644 index 0000000..bae5699 --- /dev/null +++ b/eark_validator/model/checksum.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" + E-ARK : Information Package Validation Model types + Types for supported checksum algorithms and checksum values +""" +from enum import Enum, unique +import hashlib +from typing import Annotated, Optional + +from pydantic import BaseModel, StringConstraints + +@unique +class ChecksumAlg(str, Enum): + """ + Enumerated type for supported checksum algorithms. + """ + MD5 = 'MD5' + SHA1 = 'SHA-1' + SHA256 = 'SHA-256' + SHA384 = 'SHA-384' + SHA512 = 'SHA-512' + + @classmethod + def from_string(cls, value: str) -> 'ChecksumAlg': + """ + Obtain a ChecksumAlg from a string identifier. + + Args: + value (str): The string identifier for the algorithm. + + Returns: + ChecksumAlg: The appropriate ChecksumAlg for the given string identifier, + or None if not found. + """ + if isinstance(value, ChecksumAlg): + return value + search_value = value.upper() if hasattr(value, 'upper') else value + for algorithm in ChecksumAlg: + if search_value in [ algorithm.name, algorithm.value ]: + return algorithm + raise ValueError(f'No ChecksumAlg with id: {value}') + + @classmethod + def get_implementation(cls, algorithm: 'ChecksumAlg' = SHA1): + """ + Get the appropriate hashlib implementation for the given algorithm. + + Args: + algorithm (ChecksumAlg, optional): the enumn type for the required ChecksumAlg, + or a string identifier. Defaults to SHA1. + + Raises: + ValueError: if the requested algorithm is not supported. + + Returns: + hashlib._Hash: The hashlib implementation for the requested algorithm. + """ + if isinstance(algorithm, str): + algorithm = cls.from_string(algorithm) + if algorithm == ChecksumAlg.SHA1: + return hashlib.sha1() + if algorithm == ChecksumAlg.SHA256: + return hashlib.sha256() + if algorithm == ChecksumAlg.SHA384: + return hashlib.sha384() + if algorithm == ChecksumAlg.SHA512: + return hashlib.sha512() + if algorithm == ChecksumAlg.MD5: + return hashlib.md5() + raise ValueError(f'Algorithm {algorithm} not supported.') + +class Checksum(BaseModel): + """ + Model type for a checksum value + """ + algorithm: ChecksumAlg = ChecksumAlg.SHA1 + """The algorithm used to generate the checksum, defaults to SHA1.""" + value: Annotated[ str, StringConstraints(to_upper=True) ] = '' + """The checksum value as an uppercase hexadecimal string, defaults to an empty string.""" diff --git a/eark_validator/model/constants.py b/eark_validator/model/constants.py new file mode 100644 index 0000000..05b0211 --- /dev/null +++ b/eark_validator/model/constants.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" + E-ARK : Information Package Validation Model constants + Constant values for the model package +""" +METS = 'mets' +METS_FILE = 'METS.xml' +MIME_DEFAULT = 'application/octet-stream' +MAY = 'MAY' +SHOULD = 'SHOULD' +MUST = 'MUST' +UNKNOWN = 'Unknown' +INFORMATION = 'Info' +WARNING = 'Warn' +ERROR = 'Error' +NOTWELLFORMED = 'NotWellFormed' +WELLFORMED = 'WellFormed' +PACKAGE = 'Package' +VALID = 'VALID' +INVALID = 'INVALID' diff --git a/eark_validator/model/manifest.py b/eark_validator/model/manifest.py new file mode 100644 index 0000000..e408836 --- /dev/null +++ b/eark_validator/model/manifest.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +from enum import Enum, unique +from pathlib import Path +from typing import List, Optional + +from pydantic import BaseModel + +from .checksum import Checksum +from .constants import METS, UNKNOWN, PACKAGE # pylint: disable=W0611 + +class ManifestEntry(BaseModel): + path : Path | str + size : int = 0 + checksums : List[Checksum] = [] + +class ManifestSummary(BaseModel): + file_count: int = 0 + total_size: int = 0 + +@unique +class SourceType(str, Enum): + """Enum covering information package validation statuses.""" + UNKNOWN = UNKNOWN.upper() + # Information level, possibly not best practise + METS = METS.upper() + # Non-fatal issue that should be corrected + PACKAGE = PACKAGE.upper() + +class Manifest(BaseModel): + source: SourceType = SourceType.UNKNOWN + root: Path + summary: Optional[ManifestSummary] = None + entries: List[ManifestEntry] = [] + + @property + def file_count(self) -> int: + return len(self.entries) + + @property + def total_size(self) -> int: + return sum(entry.size for entry in self.entries) diff --git a/eark_validator/model/metadata.py b/eark_validator/model/metadata.py new file mode 100644 index 0000000..1fc662c --- /dev/null +++ b/eark_validator/model/metadata.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +from enum import Enum +from pathlib import Path +from typing import Annotated, List + +from pydantic import BaseModel, StringConstraints + +from .checksum import Checksum +from .constants import MIME_DEFAULT + +class EntryType(str, Enum): + FILE = 'file' + METADATA = 'metadata' + +class FileEntry(BaseModel): + path : Path | str + type: EntryType = EntryType.FILE + size : int = 0 + checksum : Checksum + mimetype : Annotated[ str, StringConstraints(to_lower=True) ] = MIME_DEFAULT + +class MetsRoot(BaseModel): + namespaces: dict[str, str] = {} + objid: str = '' + label: str= '' + type: str = '' + profile: str = '' + +class MetsFile(BaseModel): + root: MetsRoot = MetsRoot() + file_entries: List[FileEntry] = [] diff --git a/eark_validator/model/package_details.py b/eark_validator/model/package_details.py new file mode 100644 index 0000000..84aaeb8 --- /dev/null +++ b/eark_validator/model/package_details.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package Package Details type +""" +from typing import Any, List, Optional + +from pydantic import BaseModel, ValidationInfo, model_validator + +from .checksum import Checksum +from .metadata import MetsFile + + +class PackageDetails(BaseModel): + name: str = '' + label: str = '' + oaispackagetype: str = '' + othertype: str = '' + contentinformationtype: str = '' + checksums: List[Checksum] = [] + + # Validator to add a hyphen to the SHA checksum algorithm IDs generated by commons-ip + @model_validator(mode='before') + @classmethod + def convert_checksum_ids(cls, data: Any) -> Any: + incoming_checksums = data.get('checksums', []) + if isinstance(incoming_checksums, list): + # If the details are a dict type then it's a commons-ip set + checksums : list[Checksum] = [] + # Loop through the checksums + for checksum in incoming_checksums: + alg_name = checksum.get('algorithm') + if alg_name and alg_name.startswith('SHA') and '-' not in alg_name: + # If it's a SHA checksum alg ID without a hyphen, add one + alg_name = f'{alg_name[:3]}-{alg_name[3:]}' + checksums.append(Checksum(algorithm=alg_name, value=checksum.get('value'))) + data['checksums'] = checksums + # Return the reps for further validation. + return data + +class Representation(BaseModel): + mets: Optional[MetsFile] = None + name: Optional[str] = '' + +class InformationPackage(BaseModel): + mets: Optional[MetsFile] = None + details: Optional[PackageDetails] = None + representations: List[Representation] = [] + + # Validator to convert the commons-ip representations dict to a list of representations + @model_validator(mode='before') + @classmethod + def convert_representations_dict(cls, data: Any) -> Any: + representations = data.get('representations') + if isinstance(representations, dict): + # If this is a dict type then it's a commons-ip type, coerce to list + reps : list[Representation] = [] + for k, v in representations.items(): + reps.append(Representation(name=v,)) + data['representations'] = reps + # Return the reps for further validation. + return data diff --git a/eark_validator/model/specifications.py b/eark_validator/model/specifications.py new file mode 100644 index 0000000..cdf670e --- /dev/null +++ b/eark_validator/model/specifications.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package Package Details type +""" +from enum import Enum, unique +from typing import Dict, List, Optional + +from pydantic import BaseModel, computed_field + +from .constants import MAY, SHOULD, MUST + +@unique +class Level(str, Enum): + """Enum covering information package validation statuses.""" + MAY = MAY + # Package has basic parse / structure problems and can't be validated + SHOULD = SHOULD + # Package structure is OK + MUST = MUST + + @staticmethod + def from_string(level: str) -> 'Level': + """Convert a string to a Level.""" + for item in Level: + if level in [ item.value, item.name ]: + return item + raise ValueError(f'No Level with value {level}') + +class Requirement(BaseModel): + """Encapsulates a requirement.""" + id: str + level: Level = Level.MUST + message: Optional[str] = None + +class Specification(BaseModel): + """Stores the vital facts and figures an IP specification.""" + title: str + url: Optional[str] = None + version: str + date: str + structural_requirements: List[Requirement] = [] + requirements: Dict[str, List[Requirement]] = {} + + @computed_field + def id(self) -> str: + """Return the specification id.""" + return self.url.split('/')[-1].split('.')[0].split('-')[-1] + + @property + def sections(self) -> List[str]: + """Return the sections in the specification.""" + return list(self.requirements.keys()) + + @computed_field + def requirement_count(self) -> int: + """Return the number of requirements.""" + return sum(len(self.requirements[sect]) for sect in self.sections) + + def section_requirements(self, section: Optional[str]=None) -> List[Requirement]: + """Get the specification requirements, by section if offered.""" + requirements = [] + if section: + requirements = self.requirements[section] + else: + for sect in self.sections: + requirements += self.requirements[sect] + return requirements + + def get_requirement_by_id(self, req_id: str) -> Optional[Requirement]: + """Retrieve a requirement by id.""" + for sect in self.sections: + req = self.get_requirement_by_sect(req_id, sect) + if req: + return req + return None + + def get_requirement_by_sect(self, req_id: str, section: str) -> Optional[Requirement]: + """Retrieve a requirement by id.""" + return next((req for req in self.requirements[section] if req.id == req_id), None) diff --git a/eark_validator/model/validation_report.py b/eark_validator/model/validation_report.py new file mode 100644 index 0000000..5ff102d --- /dev/null +++ b/eark_validator/model/validation_report.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package Validation Report type +""" + +from enum import Enum, unique +from typing import Any, List, Optional +import uuid + +from pydantic import BaseModel, ConfigDict, Field, model_validator + +from .package_details import InformationPackage +from .specifications import Level +from .constants import ( + UNKNOWN, INFORMATION, WARNING, ERROR, WELLFORMED, NOTWELLFORMED, VALID, INVALID) + +@unique +class Severity(str, Enum): + """Enum covering information package validation statuses.""" + UNKNOWN = UNKNOWN + # Information level, possibly not best practise + INFORMATION = INFORMATION + # Non-fatal issue that should be corrected + WARNING = WARNING + # Error level message means invalid package + ERROR = ERROR + + @classmethod + def from_id(cls, severity_id: str) -> Optional['Severity']: + """Get the enum from the value.""" + for severity in cls: + if severity_id in [ severity.name, severity.value ]: + return severity + return None + + @classmethod + def from_role(cls, role: str) -> Optional['Severity']: + """Get the enum from the value.""" + search = role.lower() + for severity in cls: + if severity.value.lower().startswith(search): + return severity + raise ValueError(f'No severity found for role: {role}') + + @classmethod + def from_level(cls, level: Level) -> 'Severity': + """Return the correct test result severity from a Level instance.""" + if level is Level.MUST: + return Severity.ERROR + if level is Level.SHOULD: + return Severity.WARNING + return Severity.INFORMATION + +class Result(BaseModel): + model_config = ConfigDict(populate_by_name=True) + rule_id: Optional[str] = Field(validation_alias='ruleId') + severity: Severity = Severity.UNKNOWN + location: str | None + message: str | None + + +@unique +class StructureStatus(str, Enum): + """Enum for information package structure status values.""" + UNKNOWN = UNKNOWN + # Package has basic parse / structure problems and can't be validated + NOTWELLFORMED = NOTWELLFORMED + # Package structure is OK + WELLFORMED = WELLFORMED + +class StructResults(BaseModel): + status: StructureStatus = StructureStatus.UNKNOWN + messages: List[Result] = [] + + @property + def errors(self) -> List[Result]: + return [m for m in self.messages if m.severity == Severity.ERROR] + + @property + def warnings(self) -> List[Result]: + return [m for m in self.messages if m.severity == Severity.WARNING] + + @property + def infos(self) -> List[Result]: + return [m for m in self.messages if m.severity == Severity.INFORMATION] + +@unique +class MetadataStatus(str, Enum): + """Enum for information package metadata status values.""" + UNKNOWN = UNKNOWN + # Package metadata is valid according to the schema/schematron rules + VALID = VALID + # Package metadata is invalid according to the schema/schematron rules + INVALID = INVALID + +class MetadataResults(BaseModel): + status: MetadataStatus = MetadataStatus.UNKNOWN + messages: List[Result] = [] + + # Validator to convert commons-ip status from NOTVALID to INVALID + @model_validator(mode='before') + @classmethod + def convert_status(cls, data: Any) -> Any: + status = data.get('status') + if status and status == 'NOTVALID': + data['status'] = 'INVALID' + return data + +class MetatdataResultSet(BaseModel): + model_config = ConfigDict(populate_by_name=True) + schema_results: MetadataResults = Field(validation_alias='schemaResults') + model_config = ConfigDict(populate_by_name=True) + schematron_results: MetadataResults = Field(validation_alias='schematronResults') + +class ValidationReport(BaseModel): + uid: uuid.UUID = uuid.uuid4() + structure: Optional[StructResults] = None + metadata: Optional[MetatdataResultSet] = None + package: Optional[InformationPackage] = None + + @property + def is_valid(self) -> bool: + return self.structure.status == StructureStatus.WELLFORMED and self.metadata.schema_results.status == MetadataStatus.VALID and self.metadata.schematron_results.status == MetadataStatus.VALID diff --git a/eark_validator/packages.py b/eark_validator/packages.py new file mode 100644 index 0000000..6bb28fd --- /dev/null +++ b/eark_validator/packages.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +Factory methods for the package classes. +""" +import os +from pathlib import Path + +from eark_validator import rules as SC +from eark_validator import structure +from eark_validator.infopacks.information_package import InformationPackages +from eark_validator.infopacks.package_handler import PackageHandler +from eark_validator.mets import MetsValidator +from eark_validator.model import ValidationReport +from eark_validator.model.package_details import InformationPackage +from eark_validator.model.validation_report import MetadataResults, MetadataStatus, MetatdataResultSet, Result, Severity +from eark_validator.specifications.specification import SpecificationType, SpecificationVersion + +METS: str = 'METS.xml' + +class PackageValidator(): + """Class for performing full package validation.""" + _package_handler = PackageHandler() + def __init__(self, package_path: Path, version: SpecificationVersion = SpecificationVersion.V2_1_0): + self._path : Path = package_path + self._name: str = os.path.basename(package_path) + self._report: ValidationReport = None + self._version: SpecificationVersion = version + + if os.path.isdir(package_path): + # If a directory or archive get the path to process + self._to_proc = self._path.absolute() + elif PackageHandler.is_archive(package_path): + self._to_proc = self._package_handler.prepare_package(package_path) + elif self._name == METS: + mets_path = Path(package_path) + self._to_proc = mets_path.parent.absolute() + self._name = os.path.basename(self._to_proc) + else: + # If not an archive we can't process + self._report = _report_from_bad_path(package_path) + return + + self._report = self.validate(self._version, self._to_proc) + + @property + def original_path(self) -> Path: + """Returns the original parsed path.""" + return self._path + + @property + def name(self) -> str: + """Returns the package name.""" + return self._name + + @property + def validation_report(self) -> ValidationReport: + """Returns the valdiation report for the package.""" + return self._report + + @property + def version(self) -> SpecificationVersion: + """Returns the specifiation version used for validation.""" + return self._version + + @classmethod + def validate(cls, version: SpecificationVersion, to_validate: Path) -> ValidationReport: + """Returns the validation report that results from validating the path + to_validate as a folder. The method does not validate archive files.""" + is_struct_valid, struct_results = structure.validate(to_validate) + if not is_struct_valid: + return ValidationReport.model_validate({'structure': struct_results}) + validator = MetsValidator(str(to_validate)) + validator.validate_mets(METS) + + csip_profile = SC.ValidationProfile(SpecificationType.CSIP, version) + csip_profile.validate(to_validate.joinpath(METS)) + results = csip_profile.get_all_results() + + package: InformationPackage = InformationPackages.from_path(to_validate) + if package.details.oaispackagetype in ['SIP', 'DIP']: + profile = SC.ValidationProfile(SpecificationType.from_string(package.details.oaispackagetype), version) + profile.validate(to_validate.joinpath(METS)) + results.extend(profile.get_all_results()) + + metadata: MetatdataResultSet = MetatdataResultSet.model_validate({ + 'schema_results': MetadataResults.model_validate({ 'status': _validity_from_messages(validator.validation_errors), 'messages': validator.validation_errors }), + 'schematron_results': MetadataResults.model_validate({ 'status': _validity_from_messages(results), 'messages': results }) + }) + return ValidationReport.model_validate({ + 'structure': struct_results, + 'package': package, + 'metadata': metadata + }) + +def _validity_from_messages(messages: list[Result]) -> MetadataStatus: + return MetadataStatus.VALID if len([ res for res in messages if res.severity == Severity.ERROR]) == 0 else MetadataStatus.INVALID + +def _report_from_bad_path(package_path: Path) -> ValidationReport: + struct_results = structure.get_bad_path_results(package_path) + return ValidationReport.model_validate({ 'structure': struct_results }) diff --git a/eark_validator/rules.py b/eark_validator/rules.py index c200fb8..92c82b7 100644 --- a/eark_validator/rules.py +++ b/eark_validator/rules.py @@ -23,27 +23,30 @@ # under the License. # """Module to capture everything schematron validation related.""" -from enum import Enum, unique import os -from importlib_resources import files +from typing import Dict, List from lxml import etree as ET from eark_validator.ipxml.schematron import SchematronRuleset, SVRL_NS, get_schematron_path -from eark_validator.specifications.specification import EarkSpecifications, Specification +from eark_validator.model.validation_report import Result +from eark_validator.specifications.specification import EarkSpecification, Specification, SpecificationType, SpecificationVersion from eark_validator.const import NO_PATH, NOT_FILE +from eark_validator.model import Severity class ValidationProfile(): """ A complete set of Schematron rule sets that comprise a complete validation profile.""" - def __init__(self, specification: Specification): - self._rulesets = {} - self._specification = specification - self.is_valid = False - self.is_wellformed = False - self.results = {} - self.messages = [] + def __init__(self, type: SpecificationType, version: SpecificationVersion): + specification: Specification = EarkSpecification(type, version).specification + + self._rulesets: Dict[str, SchematronRuleset] = {} + self._specification: Specification = specification + self.is_valid: bool = False + self.is_wellformed: bool = False + self.results: Dict[str, List[Result]] = {} + self.messages: List[str] = [] for section in specification.sections: - self.rulesets[section] = SchematronRuleset(get_schematron_path(specification.id, section)) + self.rulesets[section] = SchematronRuleset(get_schematron_path(version, specification.id, section)) @property def specification(self) -> Specification: @@ -65,185 +68,64 @@ def validate(self, to_validate: str) -> None: self.is_valid = True self.results = {} self.messages = [] - for section in self.rulesets.keys(): + for section, validator in self.rulesets.items(): try: - self.results[section] = TestReport.from_validation_report(self.rulesets[section].validate(to_validate)) + self.results[section] = TestResults.from_validation_report( + validator.validate(to_validate) + ) + if self._contains_errors(section): + self.is_valid = False except ET.XMLSyntaxError as parse_err: self.is_wellformed = False self.is_valid = False - self.messages.append('File {} is not valid XML. {}'.format(to_validate, parse_err.msg)) + self.messages.append(f'File {to_validate} is not valid XML. {parse_err.msg}') return - if not self.results[section].is_valid: - self.is_valid = False - def get_results(self) -> dict[str, 'TestReport']: + def _contains_errors(self, section: str) -> bool: + return len(list(filter(lambda a: a.severity == Severity.ERROR, self.results[section]))) > 0 + + def get_results(self) -> dict[str, List[Result]]: """Return the full set of results.""" return self.results - def get_result(self, name: str) -> 'TestReport': + def get_all_results(self) -> List[Result]: + """Return the full set of results.""" + results_list: List[Result] = [] + for _, results in self.results.items(): + results_list.extend(results) + return results_list + + def get_result(self, name: str) -> List[Result]: """Return only the results for element name.""" return self.results.get(name) - @classmethod - def from_specification(cls, specification: Specification) -> 'ValidationProfile': - """Create a validation profile from a specification.""" - if isinstance(specification, str): - specification = EarkSpecifications.from_id(specification) - if isinstance(specification, EarkSpecifications): - specification = specification.specification - if not isinstance(specification, Specification): - raise ValueError('Specification must be a Specification instance or valid specification ID.') - return cls(specification) - -@unique -class Severity(Enum): - """Enum covering information package validation statuses.""" - UNKNOWN = 'Unknown' - # Information level, possibly not best practise - INFO = 'Information' - # Non-fatal issue that should be corrected - WARN = 'Warning' - # Error level message means invalid package - ERROR = 'Error' - - @classmethod - def from_id(cls, id: str) -> 'Severity': - """Get the enum from the value.""" - for severity in cls: - if severity.name == id or severity.value == id: - return severity - return None - -class TestResult(): - """Encapsulates an individual validation test result.""" - def __init__(self, rule_id: str, location: 'SchematronLocation', message: str, severity: Severity = Severity.UNKNOWN): - self._rule_id = rule_id - self._severity = severity - self._location = location - self._message = message - - @property - def rule_id(self) -> str: - """Get the rule_id.""" - return self._rule_id - - @property - def severity(self) -> Severity: - """Get the severity.""" - return self._severity - - @severity.setter - def severity(self, value: Severity) -> None: - if not isinstance(value, Severity): - value = Severity.from_id(value) - if value not in list(Severity): - raise ValueError('Illegal severity value') - self._severity = value - - @property - def location(self) -> 'SchematronLocation': - """Get the location location.""" - return self._location - - @property - def message(self) -> str: - """Get the message.""" - return self._message - - def __str__(self) -> str: - return str(self.rule_id) + ' ' + str(self.severity) + ' ' + str(self.location) - - def to_json(self) -> dict: - """Output the error message in JSON form.""" - return {'rule_id' : self.rule_id, 'severity' : str(self.severity.name), - 'test' : self.location.test, 'location' : self.location.location, - 'message' : self.message} - - @classmethod - def from_element(cls, rule: ET.Element, failed_assert: ET.Element) -> 'TestResult': +class TestResults(): + @staticmethod + def from_element(rule: ET.Element, failed_assert: ET.Element) -> Result: """Create a Test result from an element.""" context = rule.get('context') rule_id = failed_assert.get('id') + if isinstance(rule_id, str): + rule_id = rule_id.split('_')[0] + test = failed_assert.get('test') - severity = Severity.from_id(failed_assert.get('role', Severity.UNKNOWN.name)) + severity = Severity.from_role(failed_assert.get('role', Severity.ERROR)) location = failed_assert.get('location') message = failed_assert.find(SVRL_NS + 'text').text - schmtrn_loc = SchematronLocation(context, test, location) - return cls(rule_id, schmtrn_loc, message, severity) - - -class TestReport(): - """A report made up of validation results.""" - def __init__(self, is_valid: bool, errors: list[TestResult]=None, warnings: list[TestResult]=None, infos: list[TestResult]=None): - self._is_valid = is_valid - self._errors = errors if errors else [] - self._warnings = warnings if warnings else [] - self._infos = infos if infos else [] - - @property - def is_valid(self) -> bool: - """Get the is_valid result.""" - return self._is_valid - - @property - def errors(self) -> list[TestResult]: - """Get the failures.""" - return self._errors - - @property - def warnings(self) -> list[TestResult]: - """Get the warnings.""" - return self._warnings - - @property - def infos(self) -> list[TestResult]: - """Get the warnings.""" - return self._infos + location = context + test + location + return Result.model_validate({ + 'rule_id': rule_id, 'location':location, 'message':message, 'severity':severity + }) - @classmethod - def from_validation_report(cls, ruleset: ET.Element) -> 'TestReport': + @staticmethod + def from_validation_report(ruleset: ET.Element) -> List[Result]: """Get the report from the last validation.""" xml_report = ET.XML(bytes(ruleset)) - failures = [] - warnings = [] - infos = [] - is_valid = True rule = None + results: List[Result] = [] for ele in xml_report.iter(): if ele.tag == SVRL_NS + 'fired-rule': rule = ele - elif (ele.tag == SVRL_NS + 'failed-assert') or (ele.tag == SVRL_NS + 'successful-report'): - if ele.get('role') == 'INFO': - infos.append(TestResult.from_element(rule, ele)) - elif ele.get('role') == 'WARN': - warnings.append(TestResult.from_element(rule, ele)) - else: - is_valid = False - failures.append(TestResult.from_element(rule, ele)) - return TestReport(is_valid, failures, warnings, infos) - - -class SchematronLocation(): - """All details of the location of a Schematron error.""" - def __init__(self, context: str, test: str, location: str): - self._context = context - self._test = test - self._location = location - - @property - def context(self) -> str: - """Get the context of the location.""" - return self._context - - @property - def test(self) -> str: - """Get the location test.""" - return self._test - - @property - def location(self) -> str: - """Get the location location.""" - return self._location - - def __str__(self) -> str: - return str(self.context) + ' ' + str(self.test) + ' ' + str(self.location) + elif ele.tag in [ SVRL_NS + 'failed-assert', SVRL_NS + 'successful-report' ]: + results.append(TestResults.from_element(rule, ele)) + return results diff --git a/eark_validator/specifications/__init__.py b/eark_validator/specifications/__init__.py index 9c8cf7a..72269e7 100644 --- a/eark_validator/specifications/__init__.py +++ b/eark_validator/specifications/__init__.py @@ -24,5 +24,5 @@ # """ E-ARK : Information package validation - Information Package modules + Information Package Specifications """ diff --git a/eark_validator/specifications/specification.py b/eark_validator/specifications/specification.py index e7c1b8d..3f7a828 100644 --- a/eark_validator/specifications/specification.py +++ b/eark_validator/specifications/specification.py @@ -1,332 +1,198 @@ -#!/usr/bin/env python +#!/usr/bin/env python # -*- coding: utf-8 -*- -# -# E-ARK Validation -# Copyright (C) 2019 -# All rights reserved. -# -# Licensed to the E-ARK project under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The E-ARK project licenses -# this file to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -"""Module covering information package structure validation and navigation.""" -from enum import Enum, unique -from lxml import etree as ET -import os - -from importlib_resources import files - -from eark_validator.ipxml import PROFILES -from eark_validator.ipxml.schema import METS_PROF_SCHEMA -from eark_validator.ipxml.namespaces import Namespaces -from eark_validator.specifications.struct_reqs import STRUCT_REQS -from eark_validator.const import NOT_FILE, NO_PATH - -class Specification: - """Stores the vital facts and figures an IP specification.""" - def __init__(self, title: str, url: str, version: str, date: str, requirements:dict[str, 'Requirement']=None): - self._title = title - self._url = url - self._version = version - self._date = date - self._requirements = requirements if requirements else {} - - @property - def id(self) -> str: - """Get the id of the specification.""" - return EarkSpecifications.from_id(self.url).name - - @property - def title(self) -> str: - """Get the name of the specification.""" - return self._title - - @property - def url(self) -> str: - """Get the name of the specification.""" - return self._url - - @property - def version(self) -> str: - """Get the version.""" - return self._version - - @property - def date(self) -> str: - """Return the specification date.""" - return self._date - - @property - def requirements(self): - """Get the specification rules.""" - for section in self.sections: - for requirement in self._requirements[section].values(): - yield requirement - - @property - def requirement_count(self) -> int: - """Return the number of requirments in the specification.""" - req_count = 0 - for sect in self.sections: - req_count += len(self._requirements[sect]) - return req_count - - def get_requirement_by_id(self, id: str) -> 'Requirement': - """Retrieve a requirement by id.""" - for sect in self.sections: - req = self.get_requirement_by_sect(id, sect) - if req: - return req - return None - - def get_requirement_by_sect(self, id: str, section: str) -> 'Requirement': - """Retrieve a requirement by id.""" - sect = self._requirements[section] - if sect: - return sect.get(id) - return None - - def section_requirements(self, section: str=None) -> list['Requirement']: - """Get the specification requirements, by section if offered.""" - requirements = [] - if section: - requirements = self._requirements[section] - else: - for sect in self.sections: - requirements += self._requirements[sect].values() - return requirements - - @property - def section_count(self) -> int: - """Get the specification sections.""" - return len(self._requirements) - - @property - def sections(self) -> list[str]: - """Get the specification sections.""" - return self._requirements.keys() - - def __str__(self) -> str: - return 'name:' + self.title + ', version:' + \ - str(self.version) + ', date:' + str(self.date) - - @classmethod - def _from_xml_file(cls, xml_file: str, add_struct: bool=False) -> 'Specification': - if not os.path.exists(xml_file): - raise FileNotFoundError(NO_PATH.format(xml_file)) - if not os.path.isfile(xml_file): - raise ValueError(NOT_FILE.format(xml_file)) - """Create a Specification from an XML file.""" - tree = ET.parse(xml_file, parser=cls._parser()) - return cls._from_xml(tree, add_struct=add_struct) - - @classmethod - def _parser(cls) -> ET.XMLParser: - """Create a parser for the specification.""" - parser = ET.XMLParser(schema=METS_PROF_SCHEMA, resolve_entities=False, no_network=True) - return parser - - @classmethod - def _from_xml(cls, tree: ET.ElementTree, add_struct: bool=False) -> 'Specification': - spec = cls.from_element(tree.getroot(), add_struct=add_struct) - return spec - - @classmethod - def from_element(cls, spec_ele: ET.Element, add_struct: bool=False) -> 'Specification': - """Create a Specification from an XML element.""" - version = spec_ele.get('ID') - title = date = '' - requirements = {} - profile = '' - # Loop through the child eles - for child in spec_ele: - if child.tag == Namespaces.PROFILE.qualify('title'): - # Process the title element - title = child.text - elif child.tag == Namespaces.PROFILE.qualify('date'): - # Grab the requirement text value - date = child.text - elif child.tag == Namespaces.PROFILE.qualify('structural_requirements'): - requirements = cls._processs_requirements(child) - elif child.tag in [Namespaces.PROFILE.qualify('URI'), 'URI']: - profile = child.text - if add_struct: - # Add the structural requirements - struct_reqs = Specification.StructuralRequirement._get_struct_reqs() - requirements['structure'] = struct_reqs - # Return the Specification - return cls(title, profile, version, date, requirements=requirements) - - @classmethod - def _processs_requirements(cls, req_root: ET.Element) -> dict[str, 'Requirement']: - requirements = {} - for sect_ele in req_root: - section = sect_ele.tag.replace(Namespaces.PROFILE.qualifier, '') - reqs = {} - for req_ele in sect_ele: - requirement = cls.Requirement.from_element(req_ele) - if not requirement.id.startswith('REF_'): - reqs.update({requirement.id: requirement}) - requirements[section] = reqs - return requirements - - class Requirement(): - """Encapsulates a requirement.""" - def __init__(self, req_id: str, name: str, level: str='MUST', xpath: str=None, cardinality: str=None): - self._id = req_id - self._name = name - self._level = level - self._xpath = xpath - self._cardinality = cardinality - - @property - def id(self) -> str: # pylint: disable-msg=C0103 - """Return the id.""" - return self._id - - @property - def name(self) -> str: - """Return the name.""" - return self._name - - @property - def level(self) -> str: - """Return the level.""" - return self._level - - @property - def xpath(self) -> str: - """Return the xpath.""" - return self._xpath - - @property - def cardinality(self) -> str: - """Return the cardinality.""" - return self._cardinality - - def __str__(self) -> str: - return 'id:' + self.id + ', name:' + self.name - - @classmethod - def from_element(cls, req_ele: ET.Element) -> 'Specification.Requirement': - """Return a Requirement instance from an XML element.""" - req_id = req_ele.get('ID') - level = req_ele.get('LEVEL') - name = '' - for child in req_ele: - if child.tag == Namespaces.METS.qualify('description'): - for req_child in child: - if req_child.tag == Namespaces.METS.qualify('head'): - name = req_child.text - return cls(req_id, name, level) - - class StructuralRequirement(): - """Encapsulates a structural requirement.""" - def __init__(self, req_id: str, level: str='MUST', message: str=None): - self._id = req_id - self._level = level - self._message = message - - @property - def id(self) -> str: # pylint: disable-msg=C0103 - """Return the id.""" - return self._id - - @property - def level(self) -> str: - """Return the level.""" - return self._level - - @property - def message(self) -> str: - """Return the message.""" - return self._message - - def __str__(self) -> str: - return 'id:' + self.id + ', level:' + str(self.level) - - @classmethod - def from_rule_no(cls, rule_no: int) -> 'Specification.StructuralRequirement': - """Create an StructuralRequirement from a numerical rule id and a sub_message.""" - item = STRUCT_REQS.get(rule_no) - return cls.from_dict_item(item) - - @classmethod - def from_dict_item(cls, item: ET.Element) -> 'Specification.StructuralRequirement': - """Create an StructuralRequirement from dictionary item and a sub_message.""" - return cls.from_values(item.get('id'), item.get('level'), - item.get('message')) - - @classmethod - def from_values(cls, req_id: str, level: str='MUST', message:str=None) -> 'Specification.StructuralRequirement': - """Create an StructuralRequirement from values supplied.""" - return cls(req_id, level, message) - - @staticmethod - def _get_struct_reqs() -> list['Specification.StructuralRequirement']: - reqs = [] - for req_num in STRUCT_REQS: - req = STRUCT_REQS.get(req_num) - reqs.append(Specification.StructuralRequirement(req.get('id'), - level=req.get('level'), - message=req.get('message'))) - return reqs - - -@unique -class EarkSpecifications(Enum): - """Enumeration of E-ARK specifications.""" - CSIP = 'E-ARK-CSIP' - SIP = 'E-ARK-SIP' - DIP = 'E-ARK-DIP' - - def __init__(self, value: str): - self._path = str(files(PROFILES).joinpath(value + '.xml')) - self._specfication = Specification._from_xml_file(self._path) - self._title = value - - @property - def id(self) -> str: - """Get the specification id.""" - return self.name - - @property - def path(self) -> str: - """Get the path to the specification file.""" - self._path - - @property - def title(self) -> str: - """Get the specification title.""" - self._title - - @property - def specification(self) -> Specification: - """Get the specification.""" - return self._specfication - - @property - def profile(self) -> str: - """Get the specification profile url.""" - return 'https://eark{}.dilcis.eu/profile/{}.xml'.format(self.name.lower(), self.value) - - @classmethod - def from_id(cls, id: str) -> 'EarkSpecifications': - """Get the enum from the value.""" - for spec in cls: - if spec.id == id or spec.value == id or spec.profile == id: - return spec - return None +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +"""Module covering information package structure validation and navigation.""" +import os +from enum import Enum, unique +from typing import Optional + +from importlib_resources import files +from lxml import etree as ET + +from eark_validator.const import NO_PATH, NOT_FILE +from eark_validator.ipxml.namespaces import Namespaces +from eark_validator.ipxml.resources import profiles +from eark_validator.ipxml.schema import METS_PROF_SCHEMA +from eark_validator.model.specifications import Requirement, Specification +from eark_validator.specifications.struct_reqs import REQUIREMENTS +from eark_validator.specifications.struct_reqs import Level + + +class Specifications: + + @classmethod + def _from_xml_file(cls, xml_file: str) -> Specification: + """Create a Specification from an XML file.""" + if not os.path.exists(xml_file): + raise FileNotFoundError(NO_PATH.format(xml_file)) + if not os.path.isfile(xml_file): + raise ValueError(NOT_FILE.format(xml_file)) + tree = ET.parse(xml_file, parser=cls._parser()) + return cls._from_xml(tree) + + @classmethod + def _parser(cls) -> ET.XMLParser: + """Create a parser for the specification.""" + return ET.XMLParser(schema=METS_PROF_SCHEMA, resolve_entities=False, no_network=True) + + @classmethod + def _from_xml(cls, tree: ET.ElementTree) -> Specification: + return cls.from_element(tree.getroot()) + + @classmethod + def from_element(cls, spec_ele: ET.Element) -> Specification: + """Create a Specification from an XML element.""" + version = spec_ele.get('ID') + title = date = '' + requirements: dict[str, Requirement] = {} + profile = '' + # Loop through the child eles + for child in spec_ele: + if child.tag == Namespaces.PROFILE.qualify('title'): + # Process the title element + title = child.text + elif child.tag == Namespaces.PROFILE.qualify('date'): + # Grab the requirement text value + date = child.text + elif child.tag == Namespaces.PROFILE.qualify('structural_requirements'): + requirements = cls._processs_requirements(child) + elif child.tag in [Namespaces.PROFILE.qualify('URI'), 'URI']: + profile = child.text + # Add the structural requirements + struct_reqs = StructuralRequirements.get_requirements() + # Return the Specification + return Specification.model_validate({ + 'title': title, + 'url': profile, + 'version': version, + 'date': date, + 'requirements': requirements, + 'structural_requirements': struct_reqs + }) + + @classmethod + def _processs_requirements(cls, req_root: ET.Element) -> dict[str, 'Requirement']: + requirements = {} + for sect_ele in req_root: + section = sect_ele.tag.replace(Namespaces.PROFILE.qualifier, '') + reqs = [] + for req_ele in sect_ele: + requirement = Requirements.from_element(req_ele) + if not requirement.id.startswith('REF_'): + reqs.append(requirement) + requirements[section] = reqs + return requirements + +class Requirements(): + @staticmethod + def from_element(req_ele: ET.Element) -> Requirement: + """Return a Requirement instance from an XML element.""" + req_id = req_ele.get('ID') + level: Level = Level.from_string(req_ele.get('REQLEVEL')) + name = '' + for child in req_ele: + if child.tag == Namespaces.PROFILE.qualify('description'): + for req_child in child: + if req_child.tag == Namespaces.PROFILE.qualify('head'): + name = req_child.text + return Requirement.model_validate({ + 'id': req_id, + 'name': name, + 'level': level + }) + +class StructuralRequirements(): + @staticmethod + def from_rule_no(rule_no: int) -> Requirement: + """Create an StructuralRequirement from a numerical rule id and a sub_message.""" + item = REQUIREMENTS.get(rule_no) + if not item: + raise ValueError(f'No rule with number {rule_no}') + return StructuralRequirements.from_dictionary(item) + + @staticmethod + def from_dictionary(item: dict[str, str]) -> Requirement: + """Create an StructuralRequirement from dictionary item and a sub_message.""" + return Requirement.model_validate({ + 'id': item.get('id'), + 'level': item.get('level'), + 'message': item.get('message') + }) + + @staticmethod + def get_requirements() -> list[Requirement]: + reqs = [] + for req in REQUIREMENTS.values(): + reqs.append(Requirement.model_validate(req)) + return reqs + +@unique +class SpecificationVersion(str, Enum): + V2_0_4 = 'V2.0.4' + V2_1_0 = 'V2.1.0' + + def __str__(self): + return self.value + +@unique +class SpecificationType(str, Enum): + CSIP = 'E-ARK-CSIP' + SIP = 'E-ARK-SIP' + DIP = 'E-ARK-DIP' + + @classmethod + def from_string(cls, type: str) -> Optional['SpecificationType']: + """Get the enum from the value.""" + for spec in cls: + if type in [spec.name, spec.value]: + return spec + raise ValueError('{type} does not exists') + +class EarkSpecification: + def __init__(self, type: SpecificationType, version: SpecificationVersion): + self._type: SpecificationType = type + self._version: SpecificationVersion = version + + self._path = str(files(profiles).joinpath(version).joinpath(type + '.xml')) + self._specfication = Specifications._from_xml_file(self.path) + + @property + def version(self) -> SpecificationVersion: + """Get the specification version.""" + return self._version + + @property + def type(self) -> SpecificationType: + """Get the specification type.""" + return self._type + + @property + def path(self) -> str: + """Get the path to the specification file.""" + return self._path + + @property + def specification(self) -> Specification: + """Get the specification.""" + return self._specfication diff --git a/eark_validator/specifications/struct_reqs.py b/eark_validator/specifications/struct_reqs.py index ad738b6..a11bf00 100644 --- a/eark_validator/specifications/struct_reqs.py +++ b/eark_validator/specifications/struct_reqs.py @@ -23,116 +23,151 @@ # under the License. # """Structural requirements as a dictionary.""" -STRUCT_REQS = { + +from eark_validator.model import Level + + +REQUIREMENTS = { 1: { - 'id': 'CSIPSTR1', - 'level': 'MUST', - 'message': """Any Information Package MUST be included within a single physical root - folder (known as the “Information Package root folder”). For packages contained - in an archive format, see CSIPSTR3, the archive MUST unpack to a single root folder.""" + 'id': 'CSIPSTR1', + 'level': Level.MUST, + 'message': ' '.join([ + 'Any Information Package MUST be included within a single physical root', + 'folder (known as the “Information Package root folder”). For packages contained', + 'in an archive format, see CSIPSTR3, the archive MUST unpack to a single root folder.' + ]) }, 2: { - 'id': 'CSIPSTR2', - 'level': 'SHOULD', - 'message': """The Information Package root folder SHOULD be named with the ID or name of - the Information Package, that is the value of the package METS.xml's root - element's @OBJID attribute.""" + 'id': 'CSIPSTR2', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'The Information Package root folder SHOULD be named with the ID or name of', + 'the Information Package, that is the value of the package METS.xml\'s root ', + 'element\'s @OBJID attribute.' + ]) }, 3: { - 'id': 'CSIPSTR3', - 'level': 'MAY', - 'message': """The Information Package MAY be contained in an archive/compressed form, - e.g. TAR or ZIP, for storage or transfer. The specific format details should be decided - by the interested parties and documented, for example in a submission agreement or - statement of access terms.""" + 'id': 'CSIPSTR3', + 'level': Level.MAY, + 'message': ' '.join([ + 'The Information Package MAY be contained in an archive/compressed form,', + 'e.g. TAR or ZIP, for storage or transfer. The specific format details should be', + 'decided by the interested parties and documented, for example in a submission', + 'agreement or statement of access terms.' + ]) }, 4: { - 'id': 'CSIPSTR4', - 'level': 'MUST', - 'message': """The Information Package root folder MUST include a file named METS.xml. - This file MUST contain metadata that identifies the package, provides a high-level - package description, and describes its structure, including pointers to constituent - representations.""" + 'id': 'CSIPSTR4', + 'level': Level.MUST, + 'message': ' '.join([ + 'The Information Package root folder MUST include a file named METS.xml.', + 'This file MUST contain metadata that identifies the package, provides a high-level', + 'package description, and describes its structure, including pointers to constituent', + 'representations.' + ]) }, 5: { - 'id': 'CSIPSTR5', - 'level': 'SHOULD', - 'message': """The Information Package root folder SHOULD include a folder named - metadata, which SHOULD include metadata relevant to the whole package.""" + 'id': 'CSIPSTR5', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'The Information Package root folder SHOULD include a folder named', + 'metadata, which SHOULD include metadata relevant to the whole package.' + ]) }, 6: { - 'id': 'CSIPSTR6', - 'level': 'SHOULD', - 'message': """If preservation metadata are available, they SHOULD be included in - sub-folder preservation.""" + 'id': 'CSIPSTR6', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'If preservation metadata are available they SHOULD be included in', + 'sub-folder preservation.' + ]) }, 7: { - 'id': 'CSIPSTR7', - 'level': 'SHOULD', - 'message': """If descriptive metadata are available, they SHOULD be included in - sub-folder descriptive.""" + 'id': 'CSIPSTR7', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'If descriptive metadata are available, they SHOULD be included in', + 'sub-folder descriptive.' + ]) }, 8: { - 'id': 'CSIPSTR8', - 'level': 'MAY', - 'message': """If any other metadata are available, they MAY be included in separate - sub-folders, for example an additional folder named other.""" + 'id': 'CSIPSTR8', + 'level': Level.MAY, + 'message': ' '.join([ + 'If any other metadata are available, they MAY be included in separate', + 'sub-folders, for example an additional folder named other.' + ]) }, 9: { - 'id': 'CSIPSTR9', - 'level': 'SHOULD', - 'message': """The Information Package folder SHOULD include a folder named - representations.""" + 'id': 'CSIPSTR9', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'The Information Package folder SHOULD include a folder named', + 'representations.' + ]) }, 10: { - 'id': 'CSIPSTR10', - 'level': 'SHOULD', - 'message': """The representations folder SHOULD include a sub-folder for each - individual representation (i.e. the “representation folder”). Each representation - folder should have a string name that is unique within the package scope. For - example the name of the representation and/or its creation date might be good - candidates as a representation sub-folder name.""" + 'id': 'CSIPSTR10', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'The representations folder SHOULD include a sub-folder for each', + 'individual representation (i.e. the “representation folder”). Each representation', + 'folder should have a string name that is unique within the package scope. For', + 'example the name of the representation and/or its creation date might be good', + 'candidates as a representation sub-folder name.' + ]) }, 11: { - 'id': 'CSIPSTR11', - 'level': 'SHOULD', - 'message': """The representation folder SHOULD include a sub-folder named data - which MAY include all data constituting the representation.""" + 'id': 'CSIPSTR11', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'The representation folder SHOULD include a sub-folder named data', + 'which MAY include all data constituting the representation.' + ]) }, 12: { - 'id': 'CSIPSTR12', - 'level': 'SHOULD', - 'message': """The representation folder SHOULD include a metadata file named METS.xml - which includes information about the identity and structure of the representation - and its components. The recommended best practice is to always have a METS.xml in - the representation folder.""" + 'id': 'CSIPSTR12', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'The representation folder SHOULD include a metadata file named METS.xml', + 'which includes information about the identity and structure of the representation', + 'and its components. The recommended best practice is to always have a METS.xml in', + 'the representation folder.' + ]) }, 13: { - 'id': 'CSIPSTR13', - 'level': 'SHOULD', - 'message': """The representation folder SHOULD include a sub-folder named metadata - which MAY include all metadata about the specific representation.""" + 'id': 'CSIPSTR13', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'The representation folder SHOULD include a sub-folder named metadata', + 'which MAY include all metadata about the specific representation.' + ]) }, 14: { - 'id': 'CSIPSTR14', - 'level': 'MAY', - 'message': """The Information Package MAY be extended with additional sub-folders.""" + 'id': 'CSIPSTR14', + 'level': Level.MAY, + 'message': 'The Information Package MAY be extended with additional sub-folders.' }, 15: { - 'id': 'CSIPSTR15', - 'level': 'SHOULD', - 'message': """We recommend including all XML schema documents for any structured - metadata within package. These schema documents SHOULD be placed in a sub-folder - called schemas within the Information Package root folder and/or the representation - folder.""" + 'id': 'CSIPSTR15', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'We recommend including all XML schema documents for any structured', + 'metadata within package. These schema documents SHOULD be placed in a sub-folder', + 'called schemas within the Information Package root folder and/or the representation', + 'folder.' + ]) }, 16: { - 'id': 'CSIPSTR16', - 'level': 'SHOULD', - 'message': """We recommend including any supplementary documentation for the package - or a specific representation within the package. Supplementary documentation SHOULD - be placed in a sub-folder called documentation within the Information Package root - folder and/or the representation folder. Examples of documentation include representation - information and manuals for the system the data objects have been exported from.""" + 'id': 'CSIPSTR16', + 'level': Level.SHOULD, + 'message': ' '.join([ + 'We recommend including any supplementary documentation for the package', + 'or a specific representation within the package. Supplementary documentation SHOULD', + 'be placed in a sub-folder called documentation within the Information Package root', + 'folder and/or the representation folder. Examples of documentation include', + 'representation information and manuals for the system the data objects have been', + 'exported from.' + ]) } } diff --git a/eark_validator/structure.py b/eark_validator/structure.py index 5043d38..0d78d59 100644 --- a/eark_validator/structure.py +++ b/eark_validator/structure.py @@ -23,320 +23,231 @@ # under the License. # """Encapsulates all things related to information package structure.""" -from enum import Enum, unique import os -import tarfile -import tempfile -import zipfile +from pathlib import Path +from typing import Dict, List, Optional, Set, Tuple + +from eark_validator.specifications.struct_reqs import REQUIREMENTS +from eark_validator.infopacks.package_handler import PackageHandler, PackageError +from eark_validator.model import ( + StructResults, + StructureStatus, + Result, + Severity, + Representation +) -from eark_validator.rules import Severity -import eark_validator.specifications.specification as SPECS - -from eark_validator.infopacks.manifest import Checksum - -MD_DIR = 'metadata' -REPS_DIR = 'representations' -SCHEMA_DIR = 'schemas' METS_NAME = 'METS.xml' STR_REQ_PREFIX = 'CSIPSTR' -SUB_MESS_NOT_EXIST = 'Path {} does not exist' -SUB_MESS_NOT_ARCH = 'Path {} is not a directory or archive format file.' -# Map requirement levels to severity -LEVEL_SEVERITY = { - 'MUST': Severity.ERROR, - 'SHOULD': Severity.WARN, - 'MAY': Severity.INFO +ROOT = 'root' +DIR_NAMES = { + 'DATA': 'data', + 'DESC': 'descriptive', + 'DOCS': 'documentation', + 'META': 'metadata', + 'OTHR': 'other', + 'PRES': 'preservation', + 'REPS': 'representations', + 'SCHM': 'schemas' } -@unique -class StructureStatus(Enum): - """Enum covering information package validation statuses.""" - Unknown = 'Unknown' - # Package has basic parse / structure problems and can't be validated - NotWellFormed = 'Not Well Formed' - # Package structure is OK - WellFormed = 'Well Formed' -class StructureReport: - """Stores the vital facts and figures about a package.""" - structure_values = list(StructureStatus) - def __init__(self, status: StructureStatus=StructureStatus.Unknown, errors: list[str]=None, warnings: list[str]=None, infos: list[str]=None): - self.status = status - self._errors = errors if errors else [] - self._warnings = warnings if warnings else [] - self._infos = infos if infos else [] - - @property - def status(self) -> StructureStatus: - """Get the structure status.""" - return self._status - - @status.setter - def status(self, value: StructureStatus) -> None: - if value not in self.structure_values: - raise ValueError('Illegal package status value') - self._status = value - - @property - def errors(self) -> list[str]: - """Return the full list of error messages.""" - return self._errors - - @property - def warnings(self) -> list[str]: - """Return the full list of warnings messages.""" - return self._warnings - - @property - def infos(self) -> list[str]: - """Return the full list of info messages.""" - return self._infos - - @property - def messages(self): - """Generator that yields all of the messages in the report.""" - for entry in self.errors: - yield entry - for entry in self.warnings: - yield entry - for entry in self.infos: - yield entry - - def add_error(self, error: str) -> None: - """Add a validation error to package lists.""" - if error.severity == Severity.INFO: - self._infos.append(error) - elif error.severity == Severity.WARN: - self._warnings.append(error) - elif error.severity == Severity.ERROR: - self._errors.append(error) - self.status = StructureStatus.NotWellFormed - - def add_errors(self, errors: list[str]) -> None: - """Add a validation error to package lists.""" - for error in errors: - self.add_error(error) - - @classmethod - def from_path(cls, path: str) -> 'StructureReport': - """Create a structure report from a path, this can be a folder or an archive file.""" - rep = StructureReport(status=StructureStatus.WellFormed) - root = path - if not os.path.exists(path): - # If it doesn't exist then add an error message - rep.add_error(StructError.from_rule_no(1, sub_message=SUB_MESS_NOT_EXIST.format(path))) - elif os.path.isfile(path): - if ArchivePackageHandler.is_archive(path): - root = cls._handle_archive(path) - else: - rep.add_error(StructError.from_rule_no(1, - sub_message=SUB_MESS_NOT_ARCH.format(path))) - if rep.errors: - return rep - - struct_checker = StructureChecker.from_directory(root) - rep.add_errors(struct_checker.validate_manifest()) - reps_dir = os.path.join(root, REPS_DIR) - if os.path.isdir(reps_dir): - for entry in os.listdir(reps_dir): - struct_checker = StructureChecker.from_directory(os.path.join(reps_dir, entry)) - rep.add_errors(struct_checker.validate_manifest(is_root=False)) - return rep - - @classmethod - def _handle_archive(cls, archive_path: str) -> str: - arch_handler = ArchivePackageHandler() - root = arch_handler.unpack_package(archive_path) - if len(os.listdir(root)) == 1: - for entry in os.listdir(root): - ent_path = os.path.join(root, entry) - if os.path.isdir(ent_path): - root = ent_path - return root - - - def __str__(self): - return 'status:' + str(self.status) - -class StructError(): - """Encapsulates an individual validation test result.""" - def __init__(self, requirement: str, sub_message: str): - self._requirement = requirement - self.severity = LEVEL_SEVERITY.get(requirement.level, Severity.UNKNOWN) - self._sub_message = sub_message - - @property - def id(self) -> str: # pylint: disable-msg=C0103 - """Get the rule_id.""" - return self._requirement.id +class StructureParser(): + _package_handler = PackageHandler() + """Encapsulates the set of tests carried out on folder structure.""" + def __init__(self, package_path: Path): + self._is_archive = PackageHandler.is_archive(package_path) + self.md_folders: set[str]= set() + self.folders: set[str] = set() + self.files : set[str] = set() + self.is_parsable = False + if self._is_archive or package_path.is_dir(): + self.is_parsable = True + self.resolved_path = self._package_handler.prepare_package(package_path) + self.folders, self.files = _folders_and_files(self.resolved_path) + if DIR_NAMES['META'] in self.folders: + self.md_folders, _ = _folders_and_files( + os.path.join(self.resolved_path, + DIR_NAMES['META'])) + + def has_data(self) -> bool: + """Returns True if the package/representation has a structure folder.""" + return DIR_NAMES['DATA'] in self.folders + + def has_descriptive_md(self) -> bool: + """Returns True if the package/representation has a descriptive metadata folder.""" + return DIR_NAMES['DESC'] in self.md_folders + + def has_documentation(self) -> bool: + """Returns True if the package/representation has a documentation folder.""" + return DIR_NAMES['DOCS'] in self.folders + + def has_mets(self) -> bool: + """Returns True if the package/representation has a root METS.xml file.""" + return METS_NAME in self.files + + def has_metadata(self) -> bool: + """Returns True if the package/representation has a metadata folder.""" + return DIR_NAMES['META'] in self.folders + + def has_other_md(self) -> bool: + """Returns True if the package/representation has extra metadata folders + after preservation and descriptive.""" + md_folder_count = len(self.md_folders) + if self.has_preservation_md(): + md_folder_count-=1 + if self.has_descriptive_md(): + md_folder_count-=1 + return md_folder_count > 0 + + def has_preservation_md(self) -> bool: + """Returns True if the package/representation has a preservation metadata folder.""" + return DIR_NAMES['PRES'] in self.md_folders + + def has_representations_folder(self) -> bool: + """Returns True if the package/representation has a representations folder.""" + return DIR_NAMES['REPS'] in self.folders + + def has_schemas(self) -> bool: + """Returns True if the package/representation has a schemas folder.""" + return DIR_NAMES['SCHM'] in self.folders @property - def severity(self) -> Severity: - """Get the severity.""" - return self._severity - - @severity.setter - def severity(self, value: Severity) -> None: - if value not in list(Severity): - raise ValueError('Illegal severity value') - self._severity = value - - @property - def is_error(self) -> bool: - """Returns True if this is an error message, false otherwise.""" - return self.severity == Severity.ERROR - - @property - def is_info(self) -> bool: - """Returns True if this is an info message, false otherwise.""" - return self.severity == Severity.INFO - - @property - def is_warning(self) -> bool: - """Returns True if this is an warning message, false otherwise.""" - return self.severity == Severity.WARN - - @property - def message(self) -> str: - """Get the message.""" - return self._requirement.message - - @property - def sub_message(self) -> str: - """Get the sub-message.""" - return self._sub_message - - def to_json(self) -> dict: - """Output the message in JSON format.""" - return {'id' : self.id, 'severity' : str(self.severity.name), - 'message' : self.message, 'sub_message' : self.sub_message} - - def __str__(self) -> str: - return 'id:{}, severity:{}, message:{}, sub_message:{}'.format(self.id, - str(self.severity.name), - self.message, - self.sub_message) - @classmethod - def from_rule_no(cls, rule_no: int, sub_message: str=None) -> 'StructError': - """Create an StructError from values supplied.""" - requirement = SPECS.Specification.StructuralRequirement.from_rule_no(rule_no) - return StructError(requirement, sub_message) - - @classmethod - def from_values(cls, requirement: str, sub_message: str=None) -> 'StructError': - """Create an StructError from values supplied.""" - return StructError(requirement, sub_message) - -class ArchivePackageHandler(): - """Class to handle archive / compressed information packages.""" - def __init__(self, unpack_root: str=tempfile.gettempdir()): - self._unpack_root = unpack_root - - @property - def unpack_root(self) -> str: - """Returns the root directory for archive unpacking.""" - return self._unpack_root - - def unpack_package(self, to_unpack: str, dest: str=None) -> str: - """Unpack an archived package to a destination (defaults to tempdir). - returns the destination folder.""" - if not os.path.isfile(to_unpack) or not self.is_archive(to_unpack): - raise PackageStructError('File is not an archive file.') - sha1 = Checksum.from_file(to_unpack, 'sha1') - dest_root = dest if dest else self.unpack_root - destination = os.path.join(dest_root, sha1.value) - if zipfile.is_zipfile(to_unpack): - zip_ip = zipfile.ZipFile(to_unpack) - zip_ip.extractall(path=destination) - elif tarfile.is_tarfile(to_unpack): - tar_ip = tarfile.open(to_unpack) - tar_ip.extractall(path=destination) - return destination - - @staticmethod - def is_archive(to_test: str) -> bool: - """Return True if the file is a recognised archive type, False otherwise.""" - if zipfile.is_zipfile(to_test): - return True - return tarfile.is_tarfile(to_test) - -def validate_package_structure(package_path: str) -> StructureReport: - """Carry out all structural package tests.""" - # It's a file so we need to unpack it - return StructureReport.from_path(package_path) + def is_archive(self) -> bool: + """Returns True if the package/representation is an archive.""" + return self._is_archive class StructureChecker(): - """Encapsulate the mess that is the manifest details.""" - def __init__(self, name, has_mets=True, has_md=True, has_schema=True, - has_data=False, has_reps=True): - self.name = name - self.has_mets = has_mets - self.has_md = has_md - self.has_schema = has_schema - self.has_data = has_data - self.has_reps = has_reps - - def validate_manifest(self, is_root: bool=True) -> list[StructError]: - """Validate a manifest report and return the list of validation errors.""" - validation_errors = [] - # [CSIPSTR4] Is there a file called METS.xml (perform case checks) - # [CSIPSTR12] Does each representation folder have a METS.xml file? (W) - if not self.has_mets: - if is_root: - validation_errors.append(StructError.from_rule_no(4)) - else: - validation_errors.append(StructError.from_rule_no(12)) - # [CSIPSTR5] Is there a first level folder called metadata? - # [CSIPSTR13] Does each representation folder have a metadata folder (W) - if not self.has_md: - if is_root: - validation_errors.append(StructError.from_rule_no(5)) - else: - validation_errors.append(StructError.from_rule_no(13)) - # [CSIPSTR15] Is there a schemas folder at the root level/representations? (W) - if not self.has_schema: - validation_errors.append(StructError.from_rule_no(15)) - # [CSIPSTR11] Does each representation folder have a sub folder called data? (W) - if not self.has_data and not is_root: - validation_errors.append(StructError.from_rule_no(11)) - # [CSIPSTR9] Is there a first level folder called representations (W) - if not self.has_reps and is_root: - validation_errors.append(StructError.from_rule_no(9)) - return validation_errors + def __init__(self, dir_to_scan: Path): + self.name: str = os.path.basename(dir_to_scan) + self.parser: StructureParser = StructureParser(dir_to_scan) + self.representations: Dict[Representation, StructureParser] = {} + if self.parser.is_parsable: + _reps = os.path.join(self.parser.resolved_path, DIR_NAMES['REPS']) + if os.path.isdir(_reps): + for entry in os.listdir(_reps): + self.representations[entry] = StructureParser(Path(os.path.join(_reps, entry))) + + def get_test_results(self) -> StructResults: + if not self.parser.is_parsable: + return get_bad_path_results(self.name) + + results: List[Result] = self.get_root_results() + results = results + self.get_package_results() + for name, tests in self.representations.items(): + location = str(name) + ' representation' + if not tests.has_data(): + results.append(test_result_from_id(11, location)) + if not tests.has_mets(): + results.append(test_result_from_id(12, location)) + if not tests.has_metadata(): + results.append(test_result_from_id(13, location)) + return StructResults.model_validate({ + 'status': self.get_status(results), + 'messages': results + }) + + def get_representations(self) -> List[Representation]: + reps: List[Representation] = [] + for rep in self.representations: # pylint: disable=C0201 + reps.append(Representation.model_validate({ 'name': rep })) + return reps + + def get_root_results(self) -> List[Result]: + results: List[Result] = [] + location: str = _root_loc(self.name) + if not self.parser.is_archive: + results.append(test_result_from_id(3, location)) + if not self.parser.has_mets(): + results.append(test_result_from_id(4, location)) + results.extend(self._get_metadata_results(location=location)) + if not self.parser.has_representations_folder(): + results.append(test_result_from_id(9, location)) + elif len(self.representations) < 1: + results.append(test_result_from_id(10, location)) + return results + + def get_package_results(self) -> List[Result]: + results: List[Result] = [] + if not self.parser.has_schemas(): + result = self._get_schema_results() + if result: + results.append(result) + if not self.parser.has_documentation(): + result = self._get_dox_results() + if result: + results.append(result) + return results + + def _get_metadata_results(self, location: str) -> List[Result]: + results: List[Result] = [] + if not self.parser.has_metadata(): + results.append(test_result_from_id(5, location)) + else: + if not self.parser.has_preservation_md(): + results.append(test_result_from_id(6, location)) + if not self.parser.has_descriptive_md(): + results.append(test_result_from_id(7, location)) + if not self.parser.has_other_md(): + results.append(test_result_from_id(8, location)) + return results + + def _get_schema_results(self) -> Optional[Result]: + for tests in self.representations.values(): + if tests.has_schemas(): + return None + return test_result_from_id(15, _root_loc(self.name)) + + def _get_dox_results(self) -> Optional[Result]: + for tests in self.representations.values(): + if tests.has_documentation(): + return None + return test_result_from_id(16, _root_loc(self.name)) @classmethod - def from_directory(cls, dir_to_scan: str) -> 'StructureChecker': - """Create a manifest instance from a directory.""" - has_mets = False - has_md = False - has_schema = False - has_data = False - has_reps = False - name = os.path.basename(dir_to_scan) + def get_status(cls, results: List[Result]) -> StructureStatus: + for result in results: + if result.severity == Severity.ERROR: + return StructureStatus.NOTWELLFORMED + return StructureStatus.WELLFORMED + +def _folders_and_files(dir_to_scan: Path) -> Tuple[Set[str], Set[str]]: + folders: Set[str] = set() + files: Set[str] = set() + if os.path.isdir(dir_to_scan): for entry in os.listdir(dir_to_scan): - entry_path = os.path.join(dir_to_scan, entry) - # [CSIPSTR4] Is there a file called METS.xml (perform case checks) - # [CSIPSTR12] Does each representation folder have a METS.xml file? (W) - if entry == METS_NAME: - if os.path.isfile(entry_path): - has_mets = True - # [CSIPSTR5] Is there a first level folder called metadata? - # [CSIPSTR13] Does each representation folder have a metadata folder (W) - if os.path.isdir(entry_path): - if entry == 'metadata': - has_md = True - # [CSIPSTR15] Is there a schemas folder at the root level/representations? (W) - elif entry == 'schemas': - has_schema = True - # [CSIPSTR11] Does each representation folder have a sub folder called data? (W) - elif entry == 'data': - has_data = True - # [CSIPSTR9] Is there a first level folder called representations (W) - elif entry == REPS_DIR: - has_reps = True - return StructureChecker(name, has_mets, has_md, has_schema, has_data, has_reps) - -class PackageStructError(RuntimeError): - """Exception to signal fatal pacakge structure errors.""" - def __init__(self, arg): - super().__init__() - self.args = arg + path = os.path.join(dir_to_scan, entry) + if os.path.isfile(path): + files.add(entry) + elif os.path.isdir(path): + folders.add(entry) + return folders, files + +def test_result_from_id(requirement_id, location, message=None) -> Result: + """Return a TestResult instance created from the requirment ID and location.""" + req = REQUIREMENTS[requirement_id] + test_msg = message if message else req['message'] + return Result.model_validate({ + 'rule_id': req['id'], + 'location': location, + 'message': test_msg, + 'severity': Severity.from_level(req['level']) + }) + +def get_bad_path_results(path) -> StructResults: + return StructResults.model_validate({ + 'status': StructureStatus.NOTWELLFORMED, + 'messages': _get_str1_result_list(path) + }) + +def _get_str1_result_list(name: str) -> List[Result]: + return [ test_result_from_id(1, _root_loc(name)) ] + +def _root_loc(name: str) -> str: + return f'{ROOT} {name}' + +def validate(to_validate) -> Tuple[bool, StructResults]: + try: + struct_tests = StructureChecker(to_validate).get_test_results() + return struct_tests.status == StructureStatus.WELLFORMED, struct_tests + except PackageError: + return False, get_bad_path_results(to_validate) diff --git a/eark_validator/utils.py b/eark_validator/utils.py new file mode 100644 index 0000000..5e10a5e --- /dev/null +++ b/eark_validator/utils.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +"""Utilities used across validation modules""" +from pathlib import Path + +from .const import NO_PATH + +def get_path(path: Path | str, check_exists: bool=False) -> Path: + result: Path = Path(path) if isinstance(path, str) else path + if check_exists and not result.exists(): + raise FileNotFoundError(NO_PATH.format(path)) + return result diff --git a/pyproject.toml b/pyproject.toml index b56eb0b..6c2d1c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=61.0", "setuptools-git-versioning<2"] +requires = ["setuptools>=61.0", "setuptools-git-versioning>=2.0,<3"] build-backend = "setuptools.build_meta" [project] @@ -15,16 +15,18 @@ maintainers = [ license = {file = "LICENSE"} description = "E-ARK Python Information Package Validation" readme = "README.md" -requires-python = ">=3.7" +requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", ] dependencies = [ - "lxml==4.9.3", + "lxml==5.1.0", "importlib_resources==5.12.0", + "pydantic>=2.5.3,<3.0.0", ] + [project.optional-dependencies] testing = [ "pre-commit", @@ -33,12 +35,15 @@ testing = [ "pytest-cov", 'pytest-runner', ] + [project.urls] "Homepage" = "https://www.e-ark-foundation.eu/e-ark-software-py-e-ark-ip-validator/" "Issues" = "https://github.com/E-ARK-Software/eark-validator/issues" "Repository" = "https://github.com/E-ARK-Software/eark-validator.git" + [project.scripts] -ip-check = "eark_validator.cli.app:main" +eark-validator = "eark_validator.cli.app:main" + [tool.pytest.ini_options] minversion = "6.0" addopts = "-ra -q" @@ -46,9 +51,13 @@ testpaths = [ "tests", ] +[tool.setuptools] +packages = ["eark_validator", "eark_validator.cli", "eark_validator.infopacks", "eark_validator.ipxml", "eark_validator.model", "eark_validator.specifications" ] + +[tool.setuptools.package-data] +"eark_validator" = ["ipxml/resources/profiles/*/*.xml", "ipxml/resources/schema/*.xsd", "ipxml/resources/schematron/*/*/*.xml", "ipxml/resources/vocabs/*.*"] + [tool.setuptools-git-versioning] enabled = true -version_file = "VERSION" -count_commits_from_version_file = true dev_template = "{tag}.dev{env:GITHUB_RUN_NUMBER:{ccount}}" dirty_template = "{tag}.dev{env:GITHUB_RUN_NUMBER:{ccount}}+git.{sha}.dirty" diff --git a/tests/archive_handler_test.py b/tests/archive_handler_test.py index fa4be59..82f3e28 100644 --- a/tests/archive_handler_test.py +++ b/tests/archive_handler_test.py @@ -24,13 +24,16 @@ # from enum import Enum +from pathlib import Path import os import unittest -from eark_validator import structure as STRUCT -from eark_validator.infopacks.manifest import Checksum +from eark_validator.infopacks.manifest import Checksummer +from eark_validator.infopacks.package_handler import PackageError, PackageHandler -MIN_TAR_SHA1 = '47ca3a9d7f5f23bf35b852a99785878c5e543076' +from eark_validator.model import StructureStatus, StructResults + +MIN_TAR_SHA1 = '47CA3A9D7F5F23BF35B852A99785878C5E543076' class TestStatus(Enum): __test__ = False @@ -39,46 +42,67 @@ class TestStatus(Enum): class StatusValuesTest(unittest.TestCase): """Tests for package and manifest status values.""" def test_lgl_pckg_status(self): - for status in list(STRUCT.StructureStatus): - details = STRUCT.StructureReport(status=status) - self.assertTrue(details.status == status) + for status in list(StructureStatus): + results = StructResults(status=status, messages=[]) + self.assertEqual(results.status, status) def test_illgl_pckg_status(self): - self.assertRaises(ValueError, STRUCT.StructureReport, status=TestStatus.Illegal) + self.assertRaises(ValueError, StructResults, status=TestStatus.Illegal) -class ArchiveHandlerTest(unittest.TestCase): - empty_path = os.path.join(os.path.dirname(__file__), 'resources', 'empty.file') - min_tar_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'minimal', - 'minimal_IP_with_schemas.tar') - min_zip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'minimal', - 'minimal_IP_with_schemas.zip') - min_targz_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'minimal', - 'minimal_IP_with_schemas.tar.gz') +class PackageHandlerTest(unittest.TestCase): + dir_path = Path(os.path.join(os.path.dirname(__file__), 'resources')) + empty_path = Path(os.path.join(os.path.dirname(__file__), 'resources', 'empty.file')) + not_exists_path = Path(os.path.join(os.path.dirname(__file__), 'resources', 'not_there.zip')) + min_tar_path = Path(os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'minimal', + 'minimal_IP_with_schemas.tar')) + min_zip_path = Path(os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'minimal', + 'minimal_IP_with_schemas.zip')) + min_targz_path = Path(os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'minimal', + 'minimal_IP_with_schemas.tar.gz')) + multi_dir_path = Path(os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'bad', + 'multi_dir.zip')) + single_file_path = Path(os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'bad', + 'single_file.zip')) def test_sha1(self): - sha1 = Checksum.from_file(self.empty_path, 'SHA1').value - self.assertTrue(sha1 == 'da39a3ee5e6b4b0d3255bfef95601890afd80709') - sha1 = Checksum.from_file(self.min_tar_path, 'SHA1').value - self.assertTrue(sha1 == MIN_TAR_SHA1) + sha1 = Checksummer.from_file(self.empty_path, 'SHA-1').value + self.assertEqual(sha1, 'DA39A3EE5E6B4B0D3255BFEF95601890AFD80709') + sha1 = Checksummer.from_file(self.min_tar_path, 'SHA-1').value + self.assertEqual(sha1, MIN_TAR_SHA1) def test_is_archive(self): - self.assertTrue(STRUCT.ArchivePackageHandler.is_archive(self.min_tar_path)) - self.assertTrue(STRUCT.ArchivePackageHandler.is_archive(self.min_zip_path)) - self.assertTrue(STRUCT.ArchivePackageHandler.is_archive(self.min_targz_path)) - self.assertFalse(STRUCT.ArchivePackageHandler.is_archive(self.empty_path)) + self.assertTrue(PackageHandler.is_archive(self.min_tar_path)) + self.assertTrue(PackageHandler.is_archive(self.min_zip_path)) + self.assertTrue(PackageHandler.is_archive(self.min_targz_path)) + self.assertFalse(PackageHandler.is_archive(self.empty_path)) def test_unpack_illgl_archive(self): - handler = STRUCT.ArchivePackageHandler() - self.assertRaises(STRUCT.PackageStructError, handler.unpack_package, self.empty_path) + handler = PackageHandler() + self.assertRaises(ValueError, handler.unpack_package, self.empty_path) + + def test_multi_dir(self): + handler = PackageHandler() + self.assertRaises(PackageError, handler.unpack_package, self.multi_dir_path) + + def test_single_file(self): + handler = PackageHandler() + self.assertRaises(PackageError, handler.unpack_package, self.single_file_path) + + def test_prepare_not_exists(self): + handler = PackageHandler() + self.assertRaises(ValueError, handler.prepare_package, self.not_exists_path) def test_unpack_archives(self): - handler = STRUCT.ArchivePackageHandler() - dest = handler.unpack_package(self.min_tar_path) - self.assertTrue(os.path.basename(dest) == MIN_TAR_SHA1) - dest = handler.unpack_package(self.min_zip_path) - self.assertTrue(os.path.basename(dest) == '54bbe654fe332b51569baf21338bc811cad2af66') - dest = handler.unpack_package(self.min_targz_path) - self.assertTrue(os.path.basename(dest) == 'db2703ff464e613e9d1dc5c495e23a2e2d49b89d') + handler = PackageHandler() + dest = Path(handler.unpack_package(self.min_tar_path)) + self.assertEqual(os.path.basename(dest.parent), MIN_TAR_SHA1) + dest = Path(handler.unpack_package(self.min_zip_path)) + self.assertEqual(os.path.basename(dest.parent), '54BBE654FE332B51569BAF21338BC811CAD2AF66') + dest = Path(handler.unpack_package(self.min_targz_path)) + self.assertEqual(os.path.basename(dest.parent), 'DB2703FF464E613E9D1DC5C495E23A2E2D49B89D') + + def test_is_dir_archive(self): + self.assertFalse(PackageHandler.is_archive(self.dir_path)) if __name__ == '__main__': unittest.main() diff --git a/tests/ips_test.py b/tests/ips_test.py index b65f023..e7d7592 100644 --- a/tests/ips_test.py +++ b/tests/ips_test.py @@ -23,14 +23,17 @@ # under the License. # +import os +from pathlib import Path import unittest from importlib_resources import files +from eark_validator.model.package_details import InformationPackage import tests.resources.xml as XML import tests.resources.ips.unpacked as UNPACKED -from eark_validator.infopacks.information_package import PackageDetails +from eark_validator.infopacks.information_package import InformationPackages from eark_validator.ipxml.schema import LOCAL_SCHEMA, get_local_schema METS_XML = 'METS.xml' @@ -38,44 +41,52 @@ class PackageDetailsTest(unittest.TestCase): @classmethod def setUpClass(cls): - cls._mets_file = str(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031').joinpath(METS_XML)) + cls._mets_file = Path(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031').joinpath(METS_XML)) def test_not_exists(self): with self.assertRaises(FileNotFoundError): - PackageDetails.from_mets_file('not-exists.xml') + InformationPackages.details_from_mets_file(Path('not-exists.xml')) def test_isdir(self): with self.assertRaises(ValueError): - PackageDetails.from_mets_file(str(files(XML))) + InformationPackages.details_from_mets_file(Path(files(XML))) - """Tests for Schematron validation rules.""" - def test_objid(self): - parser = PackageDetails.from_mets_file(self._mets_file) - self.assertEqual(parser.objid, '733dc055-34be-4260-85c7-5549a7083031') + def test_bad_xml(self): + with self.assertRaises(ValueError): + InformationPackages.details_from_mets_file(Path(files(XML).joinpath('METS-no-hdr.xml'))) def test_label(self): - parser = PackageDetails.from_mets_file(self._mets_file) - self.assertEqual(parser.label, '') - - def test_type(self): - parser = PackageDetails.from_mets_file(self._mets_file) - self.assertEqual(parser.type, 'Other') + package_details = InformationPackages.details_from_mets_file(self._mets_file) + self.assertEqual(package_details.label, '') def test_othertype(self): - parser = PackageDetails.from_mets_file(self._mets_file) - self.assertEqual(parser.othertype, 'type') + package_details = InformationPackages.details_from_mets_file(self._mets_file) + self.assertEqual(package_details.othertype, 'type') def test_contentinformationtype(self): - parser = PackageDetails.from_mets_file(self._mets_file) - self.assertEqual(parser.contentinformationtype, 'MIXED') - - def test_profile(self): - parser = PackageDetails.from_mets_file(self._mets_file) - self.assertEqual(parser.profile, 'NOT_DEFINED') + package_details = InformationPackages.details_from_mets_file(self._mets_file) + self.assertEqual(package_details.contentinformationtype, 'MIXED') def test_oaispackagetype(self): - parser = PackageDetails.from_mets_file(self._mets_file) - self.assertEqual(parser.oaispackagetype, 'AIP') + package_details = InformationPackages.details_from_mets_file(self._mets_file) + self.assertEqual(package_details.oaispackagetype, 'AIP') + +class InformationPackageTest(unittest.TestCase): + def test_from_path_not_exists(self): + with self.assertRaises(FileNotFoundError): + InformationPackages.from_path(Path('not-exists')) + + def test_from_path_not_archive(self): + with self.assertRaises(ValueError): + InformationPackages.from_path(Path(os.path.join(os.path.dirname(__file__), 'resources', 'empty.file'))) + + def test_from_path_dir_no_mets(self): + with self.assertRaises(ValueError): + InformationPackages.from_path(Path(files(UNPACKED))) + + def test_from_path_dir(self): + ip: InformationPackage = InformationPackages.from_path(Path(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031'))) + self.assertEqual(ip.details.name, '733dc055-34be-4260-85c7-5549a7083031') class SchemaTest(unittest.TestCase): def test_schema(self): diff --git a/tests/manifests_test.py b/tests/manifests_test.py index 755f315..db8afdf 100644 --- a/tests/manifests_test.py +++ b/tests/manifests_test.py @@ -25,25 +25,34 @@ """Module containing tests covering the manifest class.""" from enum import Enum import os +from pathlib import Path +import tempfile import unittest from importlib_resources import files import xml.etree.ElementTree as ET +from eark_validator.model.manifest import Manifest, SourceType +from eark_validator.model.manifest import ManifestEntry + +import tests.resources as RES import tests.resources.xml as XML import tests.resources.ips.unpacked as UNPACKED from eark_validator.infopacks.manifest import ( - HashAlgorithms, - FileItem, - Checksum, - Manifest + Checksummer, + ManifestEntries, + Manifests, + _resolve_manifest_root ) +from eark_validator.mets import _parse_file_entry +from eark_validator.model import ChecksumAlg, Checksum + METS = 'METS.xml' PERSON = 'person.xml' -DIR_PATH = str(files(XML)) -PERSON_PATH = os.path.join(DIR_PATH, PERSON) -MISSING_PATH = os.path.join(DIR_PATH, 'missing.xml') +DIR_PATH = Path(str(files(XML))) +PERSON_PATH = Path(os.path.join(DIR_PATH, PERSON)) +MISSING_PATH = Path(os.path.join(DIR_PATH, 'missing.xml')) FILE_XML = """ @@ -58,169 +67,196 @@ def test_alg_values(self): """Test that checksums are present in HashAlgorithms.""" alg_values = ['MD5', 'SHA-1', 'SHA-256', 'SHA-384', 'SHA-512'] for value in alg_values: - alg = HashAlgorithms.from_string(value) - self.assertIsNotNone(alg) + alg = ChecksumAlg.from_string(value) + self.assertIsNotNone(alg, 'Expected {} to be a valid checksum algorithm'.format(alg)) def test_alg_names(self): """Test that checksums are present in HashAlgorithms.""" alg_names = ['MD5', 'SHA1', 'SHA256', 'SHA384', 'SHA512'] for name in alg_names: - alg = HashAlgorithms.from_string(name) + alg = ChecksumAlg.from_string(name) self.assertIsNotNone(alg) def test_alg_items(self): """Test that checksums are present in HashAlgorithms.""" - for alg_item in HashAlgorithms: - alg = HashAlgorithms.from_string(alg_item) + for alg_item in ChecksumAlg: + alg = ChecksumAlg.from_string(alg_item) self.assertIsNotNone(alg) def test_missing_alg(self): - """Test that a missing algorithm returns None.""" - alg = HashAlgorithms.from_string('NOT_AN_ALGORITHM') - self.assertIsNone(alg) + """Test that a missing algorithm raises a ValueError.""" + with self.assertRaises(ValueError): + ChecksumAlg.from_string('NOT_AN_ALGORITHM') def test_missing_implementation(self): """Test that a missing algorithm returns None.""" with self.assertRaises(ValueError): - HashAlgorithms.get_implementation(FakeEnum.FAKE) + ChecksumAlg.get_implementation(FakeEnum.FAKE) + +class ChecksummerTest(unittest.TestCase): + def test_from_alg(self): + """Test that a checksum algorithm is returned.""" + alg = ChecksumAlg.from_string('MD5') + summer = Checksummer(alg) + self.assertEqual(summer.algorithm, alg) + + def test_from_string_name(self): + """Test that a checksum algorithm is returned.""" + summer = Checksummer('SHA1') + self.assertEqual(summer.algorithm, ChecksumAlg.SHA1) + + def test_from_string_value(self): + """Test that a checksum algorithm is returned.""" + summer = Checksummer('SHA-1') + self.assertEqual(summer.algorithm, ChecksumAlg.SHA1) def test_md5(self): """Test MD5 calculation by HashAlgorithms.""" - alg = HashAlgorithms.from_string('MD5') - digest = alg.hash_file(PERSON_PATH) + summer = Checksummer(ChecksumAlg.from_string('MD5')) + digest = summer.hash_file(PERSON_PATH) self.assertEqual(digest.algorithm.name, 'MD5', 'Expected MD5 digest id, not {}'.format(digest.algorithm.name)) - self.assertEqual(digest.value, '9958111af1284696d07ec9d2e70d2517', 'MD5 digest {} does not match'.format(digest.value)) + self.assertEqual(digest.value, '9958111AF1284696D07EC9D2E70D2517', 'MD5 digest {} does not match'.format(digest.value)) def test_sha1(self): """Test SHA1 calculation by HashAlgorithms.""" - alg = HashAlgorithms.from_string('SHA1') + alg = Checksummer(ChecksumAlg.from_string('SHA1')) digest = alg.hash_file(PERSON_PATH) self.assertEqual(digest.algorithm.name, 'SHA1', 'Expected SHA1 digest id, not {}'.format(digest.algorithm.name)) - self.assertEqual(digest.value, 'ed294aaff253f66e4f1c839b732a43f36ba91677', 'SHA1 digest {} does not match'.format(digest.value)) + self.assertNotEqual(digest.value, 'ed294aaff253f66e4f1c839b732a43f36ba91677', 'SHA1 digest {} does not match'.format(digest.value)) + self.assertEqual(Checksum(algorithm=ChecksumAlg.SHA1, value='ed294aaff253f66e4f1c839b732a43f36ba91677'), digest, 'Digest {} does not match'.format(digest.value)) + self.assertEqual(Checksum.model_validate({'algorithm': ChecksumAlg.SHA1, 'value': 'ed294aaff253f66e4f1c839b732a43f36ba91677'}, strict=True), digest, 'SHA1 digest {} does not match'.format(digest.value)) def test_sha256(self): """Test SHA256 calculation by HashAlgorithms.""" - alg = HashAlgorithms.from_string('SHA256') + alg = Checksummer(ChecksumAlg.from_string('SHA256')) digest = alg.hash_file(PERSON_PATH) self.assertEqual(digest.algorithm.name, 'SHA256', 'Expected SHA256 digest id, not {}'.format(digest.algorithm.name)) - self.assertEqual(digest.value, 'c944af078a5ac0bac02e423d663cf6ad2efbf94f92343d547d32907d13d44683', 'SHA256 digest {} does not match'.format(digest.value)) + self.assertEqual(digest.value, 'C944AF078A5AC0BAC02E423D663CF6AD2EFBF94F92343D547D32907D13D44683', 'SHA256 digest {} does not match'.format(digest.value)) def test_sha384(self): """Test SHA384 calculation by HashAlgorithms.""" - alg = HashAlgorithms.from_string('SHA384') + alg = Checksummer(ChecksumAlg.from_string('SHA384')) digest = alg.hash_file(PERSON_PATH) self.assertEqual(digest.algorithm.name, 'SHA384', 'Expected SHA384 digest id, not {}'.format(digest.algorithm.name)) - self.assertEqual(digest.value, 'aa7af70d126e215013c8e335eada664379e1947bf7a194672af3dbc529d82e9adb0b4f5098bdded9aaba83439ad9bee9', 'SHA384 digest {} does not match'.format(digest.value)) + self.assertEqual(digest.value, 'AA7AF70D126E215013C8E335EADA664379E1947BF7A194672AF3DBC529D82E9ADB0B4F5098BDDED9AABA83439AD9BEE9', 'SHA384 digest {} does not match'.format(digest.value)) def test_sha512(self): """Test SHA512 calculation by HashAlgorithms.""" - alg = HashAlgorithms.from_string('SHA512') + alg = Checksummer(ChecksumAlg.from_string('SHA512')) digest = alg.hash_file(PERSON_PATH) self.assertEqual(digest.algorithm.name, 'SHA512', 'Expected SHA512 digest id, not {}'.format(digest.algorithm.name)) - self.assertEqual(digest.value, '04e2b2a51fcbf8b26a88a819723f928d2aee2fd3342bed090571fc2de3c9c2d2ed7b75545951ba3a4a7f5e4bd361544accbcd6e3932dc0d26fcaf4dadc79512b', 'MSHA512D5 digest {} does not match'.format(digest.value)) + self.assertEqual(digest.value, '04E2B2A51FCBF8B26A88A819723F928D2AEE2FD3342BED090571FC2DE3C9C2D2ED7B75545951BA3A4A7F5E4BD361544ACCBCD6E3932DC0D26FCAF4DADC79512B', 'MSHA512D5 digest {} does not match'.format(digest.value)) def test_dir_error(self): - alg = HashAlgorithms.from_string('MD5') + alg = Checksummer(ChecksumAlg.from_string('MD5')) with self.assertRaises(ValueError): alg.hash_file(DIR_PATH) def test_missing_error(self): - alg = HashAlgorithms.from_string('MD5') + alg = Checksummer(ChecksumAlg.from_string('MD5')) with self.assertRaises(FileNotFoundError): alg.hash_file(MISSING_PATH) - def test_from_xml(self): - element = ET.fromstring(FILE_XML) - checksum = Checksum.from_mets_element(element) - self.assertEqual(checksum.algorithm, HashAlgorithms.SHA256) - self.assertTrue(checksum.is_value('F37E90511B5DDE2E9C60378A0F0A0A1CF07145C8F12651E0E19731892C608DA7')) - -class FileItemTest(unittest.TestCase): - def test_from_path(self): - item = FileItem.from_file_path(PERSON_PATH) - self.assertEqual(item.path, PERSON_PATH) - self.assertEqual(item.name, PERSON) - self.assertEqual(item.size, 75) - self.assertEqual(item.mime, 'application/octet-stream') - self.assertIsNone(item.checksum) - - def test_from_path_with_mime(self): - item = FileItem.from_file_path(PERSON_PATH, mime='text/ipxml') - self.assertEqual(item.path, PERSON_PATH) - self.assertEqual(item.name, PERSON) - self.assertEqual(item.size, 75) - self.assertEqual(item.mime, 'text/ipxml') - self.assertIsNone(item.checksum) - - def test_from_path_with_checksum(self): - item = FileItem.from_file_path(PERSON_PATH, checksum_algorithm='MD5') - self.assertEqual(item.path, PERSON_PATH) - self.assertEqual(item.name, PERSON) - self.assertEqual(item.size, 75) - self.assertIsNotNone(item.checksum) - self.assertEqual(item.checksum.algorithm.name, 'MD5', 'Expected MD5 digest id, not {}'.format(item.checksum.algorithm.name)) - self.assertEqual(item.checksum.value, '9958111af1284696d07ec9d2e70d2517', 'MD5 digest {} does not match'.format(item.checksum.value)) - - def test_dir_path(self): +class ManifestEntryTest(unittest.TestCase): + def test_from_missing_path(self): + with self.assertRaises(FileNotFoundError): + ManifestEntries.from_file_path(Path('/none'), MISSING_PATH) + + def test_from_dir_path(self): with self.assertRaises(ValueError): - FileItem.from_file_path(DIR_PATH) + ManifestEntries.from_file_path(DIR_PATH, DIR_PATH) - def test_file_not_found(self): - with self.assertRaises(FileNotFoundError): - FileItem.from_file_path(MISSING_PATH) + def test_from_file(self): + item = ManifestEntries.from_file_path(PERSON_PATH, PERSON_PATH, 'SHA256') + self.assertEqual(item.checksums[0].algorithm.value, 'SHA-256', 'Expected SHA-256 digest value not {}'.format(item.checksums[0].algorithm.value)) + self.assertEqual(item.checksums[0].value, 'C944AF078A5AC0BAC02E423D663CF6AD2EFBF94F92343D547D32907D13D44683', 'SHA256 digest {} does not match'.format(item.checksums[0].value)) - def test_from_xml(self): - element = ET.fromstring(FILE_XML) - file_item = FileItem.from_element(element) - self.assertEqual(file_item.path, 'representations/rep1/METS.xml') - self.assertEqual(file_item.name, METS) - self.assertEqual(file_item.size, 3554) - self.assertEqual(file_item.checksum.algorithm, HashAlgorithms.SHA256) - self.assertTrue(file_item.checksum.is_value('F37E90511B5DDE2E9C60378A0F0A0A1CF07145C8F12651E0E19731892C608DA7')) + def test_from_file_entry(self): + entry: ManifestEntry = ManifestEntries.from_file_entry(_parse_file_entry(ET.fromstring(FILE_XML))) + self.assertEqual(entry.checksums[0].algorithm, ChecksumAlg.SHA256) + self.assertEqual(entry.checksums[0].value, 'F37E90511B5DDE2E9C60378A0F0A0A1CF07145C8F12651E0E19731892C608DA7') + self.assertEqual(entry.path, 'representations/rep1/METS.xml') + self.assertEqual(entry.size, 3554) class ManifestTest(unittest.TestCase): @classmethod def setUpClass(cls): - cls._manifest = Manifest.from_directory(str(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031')), 'MD5') + cls._manifest = Manifests.from_directory(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031'), 'MD5') + + def setUp(self): + self._test_dir = tempfile.TemporaryDirectory() + self._temp_man_pickle = os.path.join(self._test_dir.name, 'manifest.pickle') - def test_root_dir(self): - self.assertEqual(self._manifest.root_path, str(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031'))) + def tearDown(self): + self._test_dir.cleanup() def test_items(self): - self.assertEqual(len(self._manifest.items), self._manifest.file_count) + self.assertEqual(len(self._manifest.entries), self._manifest.file_count) + + def test_validate_manifest(self): + is_valid, _ = Manifests.validate_manifest(self._manifest) + self.assertTrue(is_valid) def test_no_dir(self): missing = str(files(UNPACKED).joinpath('missing')) with self.assertRaises(FileNotFoundError): - Manifest(missing, []) - with self.assertRaises(FileNotFoundError): - Manifest.from_directory(missing) - with self.assertRaises(FileNotFoundError): - Manifest.from_file_items(missing, []) + Manifests.from_directory(missing) def test_file_path(self): - file_path = str(files(UNPACKED).joinpath('single_file').joinpath('empty.file')) + file_path = str(files(RES).joinpath('empty.file')) with self.assertRaises(ValueError): - Manifest(file_path, []) + Manifests.from_directory(file_path) with self.assertRaises(ValueError): - Manifest.from_directory(file_path) - with self.assertRaises(ValueError): - Manifest.from_file_items(file_path, []) + Manifests.from_mets_file(file_path) def test_manifest_filecount(self): self.assertEqual(self._manifest.file_count, 23) def test_manifest_size(self): - self.assertEqual(self._manifest.size, 306216) - - def test_manifest_get_rel_file(self): - mets_file = self._manifest.get_item(METS) - self.assertIsNotNone(mets_file, 'METS.xml not found in via relative path') - mets_file = self._manifest.get_item('representations/rep1/data/RODA-in.png') - self.assertIsNotNone(mets_file, 'representations/rep1/data/RODA-in.png not found in via relative path') - - def test_manifest_get_abs_file(self): - mets_file = self._manifest.get_item(os.path.join(str(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031')), METS)) - self.assertIsNotNone(mets_file, 'METS.xml not found using absolute path') - mets_file = self._manifest.get_item(os.path.join(str(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031')), 'representations/rep1/data/RODA-in.png')) - self.assertIsNotNone(mets_file, 'representations/rep1/data/RODA-in.png not found using absolute path') + self.assertEqual(self._manifest.total_size, 306216) + + def test_from_missing_source(self): + missing = str(files(UNPACKED).joinpath('missing')) + with self.assertRaises(FileNotFoundError): + Manifests.from_source(missing, 'MD5') + + def test_from_package_source(self): + manifest = Manifests.from_source(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031'), 'MD5') + self.assertEqual(manifest.source, SourceType.PACKAGE) + is_valid, _ = Manifests.validate_manifest(manifest) + self.assertTrue(is_valid) + + def test_from_mets_source(self): + manifest = Manifests.from_source(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031').joinpath(METS), 'MD5') + self.assertEqual(manifest.source, SourceType.METS) + is_valid, _ = Manifests.validate_manifest(manifest) + self.assertTrue(is_valid) + + def test_from_missing_mets_file(self): + with self.assertRaises(FileNotFoundError): + Manifests.from_mets_file(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031').joinpath('missing_mets.xml')) + + def test_from_dir_mets_file(self): + with self.assertRaises(ValueError): + Manifests.from_mets_file(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031')) + + def test_to_file(self): + Manifests.to_file(self._manifest, self._temp_man_pickle) + self.assertTrue(os.path.exists(self._temp_man_pickle)) + + def test_from_file(self): + Manifests.to_file(self._manifest, self._temp_man_pickle) + manifest: Manifest = Manifests.from_file(self._temp_man_pickle) + is_valid, _ = Manifests.validate_manifest(manifest, files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031')) + self.assertTrue(is_valid) + is_valid, errors = Manifests.validate_manifest(manifest, files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031-bad')) + self.assertFalse(is_valid) + self.assertEqual(len(errors), 3) + + def test_resolve_manifest_bad_source(self): + manifest = Manifest.model_validate({ + 'root': Path(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031')), + 'source': SourceType.UNKNOWN + }) + with self.assertRaises(ValueError): + _resolve_manifest_root(manifest) diff --git a/tests/mets_test.py b/tests/mets_test.py index 69f9a32..fa7b9b8 100644 --- a/tests/mets_test.py +++ b/tests/mets_test.py @@ -26,6 +26,7 @@ import unittest from importlib_resources import files +from eark_validator.infopacks.manifest import Manifests import tests.resources.xml as XML import tests.resources.ips.unpacked as UNPACKED @@ -67,9 +68,6 @@ def test_multi_mets(self): self.assertGreater(len(validator.file_references), 0) self.assertGreater(len(validator.representation_mets), 0) self.assertEqual(validator.get_mets_path('rep1'), 'representations/rep1/METS.xml') - is_complete, issues = validator.get_manifest().check_integrity() - self.assertTrue(is_complete) - self.assertEqual(len(issues), 0) def test_bad_manifest(self): validator = MetsValidator(str(files(UNPACKED).joinpath('733dc055-34be-4260-85c7-5549a7083031-bad'))) @@ -78,9 +76,6 @@ def test_bad_manifest(self): self.assertEqual(len(validator.validation_errors), 0) self.assertEqual(len(validator.representations), 1) self.assertGreater(len(validator.file_references), 0) - is_complete, issues = validator.get_manifest().check_integrity() - self.assertFalse(is_complete) - self.assertEqual(len(issues), 27) class SchemaTest(unittest.TestCase): def test_schema(self): diff --git a/tests/namespaces_test.py b/tests/namespaces_test.py index 1d48fcc..25e6a7d 100644 --- a/tests/namespaces_test.py +++ b/tests/namespaces_test.py @@ -25,10 +25,6 @@ import unittest -from importlib_resources import files - -import tests.resources.xml as XML - from eark_validator.ipxml import namespaces as NS class MetsValidatorTest(unittest.TestCase): @@ -37,13 +33,23 @@ def test_from_prefix(self): for namespace in NS.Namespaces: self.assertEqual(namespace, NS.Namespaces.from_prefix(namespace.prefix)) + def test_from_bad_prefix(self): + self.assertEqual(NS.Namespaces.METS, NS.Namespaces.from_prefix('bad')) + self.assertEqual(NS.Namespaces.METS, NS.Namespaces.from_prefix('')) + self.assertEqual(NS.Namespaces.METS, NS.Namespaces.from_prefix(None)) + def test_from_id(self): for namespace in NS.Namespaces: - self.assertEqual(namespace, NS.Namespaces.from_id(namespace.id)) + self.assertEqual(namespace, NS.Namespaces.from_uri(namespace.uri)) + + def test_from_bad_id(self): + self.assertEqual(NS.Namespaces.METS, NS.Namespaces.from_uri('bad')) + self.assertEqual(NS.Namespaces.METS, NS.Namespaces.from_uri('')) + self.assertEqual(NS.Namespaces.METS, NS.Namespaces.from_uri(None)) def test_qualify(self): for namespace in NS.Namespaces: - self.assertEqual('{{{}}}file'.format(namespace.id), namespace.qualify('file')) + self.assertEqual('{{{}}}file'.format(namespace.uri), namespace.qualify('file')) if __name__ == '__main__': diff --git a/tests/resources/ips/__init__.py b/tests/resources/ips/__init__.py index e69de29..d75961a 100644 --- a/tests/resources/ips/__init__.py +++ b/tests/resources/ips/__init__.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# flake8: noqa +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package unit test data +""" diff --git a/tests/resources/ips/bad/multi_dir.zip b/tests/resources/ips/bad/multi_dir.zip new file mode 100644 index 0000000..4bb2c4f Binary files /dev/null and b/tests/resources/ips/bad/multi_dir.zip differ diff --git a/tests/resources/ips/bad/multi_file.zip b/tests/resources/ips/bad/multi_file.zip new file mode 100644 index 0000000..4164052 Binary files /dev/null and b/tests/resources/ips/bad/multi_file.zip differ diff --git a/tests/resources/ips/bad/multi_var.zip b/tests/resources/ips/bad/multi_var.zip new file mode 100644 index 0000000..eb238af Binary files /dev/null and b/tests/resources/ips/bad/multi_var.zip differ diff --git a/tests/resources/ips/bad/single_file.zip b/tests/resources/ips/bad/single_file.zip new file mode 100644 index 0000000..14d91d3 Binary files /dev/null and b/tests/resources/ips/bad/single_file.zip differ diff --git a/tests/resources/ips/minimal/__init__.py b/tests/resources/ips/minimal/__init__.py new file mode 100644 index 0000000..d75961a --- /dev/null +++ b/tests/resources/ips/minimal/__init__.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# flake8: noqa +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package unit test data +""" diff --git a/tests/resources/ips/struct/__init__.py b/tests/resources/ips/struct/__init__.py index e69de29..d75961a 100644 --- a/tests/resources/ips/struct/__init__.py +++ b/tests/resources/ips/struct/__init__.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# flake8: noqa +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package unit test data +""" diff --git a/tests/resources/ips/struct/empty_reps.tar.gz b/tests/resources/ips/struct/empty_reps.tar.gz new file mode 100644 index 0000000..26971ce Binary files /dev/null and b/tests/resources/ips/struct/empty_reps.tar.gz differ diff --git a/tests/resources/ips/struct/no_data.tar.gz b/tests/resources/ips/struct/no_data.tar.gz index 7fb9a20..999de4d 100644 Binary files a/tests/resources/ips/struct/no_data.tar.gz and b/tests/resources/ips/struct/no_data.tar.gz differ diff --git a/tests/resources/ips/struct/no_desc.tar.gz b/tests/resources/ips/struct/no_desc.tar.gz new file mode 100644 index 0000000..3471e5b Binary files /dev/null and b/tests/resources/ips/struct/no_desc.tar.gz differ diff --git a/tests/resources/ips/struct/no_docs.tar.gz b/tests/resources/ips/struct/no_docs.tar.gz new file mode 100644 index 0000000..fcdd1b8 Binary files /dev/null and b/tests/resources/ips/struct/no_docs.tar.gz differ diff --git a/tests/resources/ips/struct/no_md.tar.gz b/tests/resources/ips/struct/no_md.tar.gz index c099c50..2f83b92 100644 Binary files a/tests/resources/ips/struct/no_md.tar.gz and b/tests/resources/ips/struct/no_md.tar.gz differ diff --git a/tests/resources/ips/struct/no_messages.tar.gz b/tests/resources/ips/struct/no_messages.tar.gz new file mode 100644 index 0000000..c075dfb Binary files /dev/null and b/tests/resources/ips/struct/no_messages.tar.gz differ diff --git a/tests/resources/ips/struct/no_mets.tar.gz b/tests/resources/ips/struct/no_mets.tar.gz index 86acb0b..44d0749 100644 Binary files a/tests/resources/ips/struct/no_mets.tar.gz and b/tests/resources/ips/struct/no_mets.tar.gz differ diff --git a/tests/resources/ips/struct/no_other.tar.gz b/tests/resources/ips/struct/no_other.tar.gz new file mode 100644 index 0000000..7b77719 Binary files /dev/null and b/tests/resources/ips/struct/no_other.tar.gz differ diff --git a/tests/resources/ips/struct/no_pres.tar.gz b/tests/resources/ips/struct/no_pres.tar.gz new file mode 100644 index 0000000..6869196 Binary files /dev/null and b/tests/resources/ips/struct/no_pres.tar.gz differ diff --git a/tests/resources/ips/struct/no_repmd.tar.gz b/tests/resources/ips/struct/no_repmd.tar.gz new file mode 100644 index 0000000..d205d7b Binary files /dev/null and b/tests/resources/ips/struct/no_repmd.tar.gz differ diff --git a/tests/resources/ips/struct/no_repmets.tar.gz b/tests/resources/ips/struct/no_repmets.tar.gz new file mode 100644 index 0000000..9cd9c26 Binary files /dev/null and b/tests/resources/ips/struct/no_repmets.tar.gz differ diff --git a/tests/resources/ips/struct/no_reps.tar.gz b/tests/resources/ips/struct/no_reps.tar.gz index 6a8f865..3eb6d29 100644 Binary files a/tests/resources/ips/struct/no_reps.tar.gz and b/tests/resources/ips/struct/no_reps.tar.gz differ diff --git a/tests/resources/ips/struct/no_schemas.tar.gz b/tests/resources/ips/struct/no_schemas.tar.gz index 3f445ba..5292a25 100644 Binary files a/tests/resources/ips/struct/no_schemas.tar.gz and b/tests/resources/ips/struct/no_schemas.tar.gz differ diff --git a/tests/resources/ips/unpacked/__init__.py b/tests/resources/ips/unpacked/__init__.py new file mode 100644 index 0000000..d75961a --- /dev/null +++ b/tests/resources/ips/unpacked/__init__.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# flake8: noqa +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package unit test data +""" diff --git a/tests/resources/ips/unpacked/multi_dir/dir1/empty.file b/tests/resources/ips/unpacked/multi_dir/dir1/empty.file deleted file mode 100644 index e69de29..0000000 diff --git a/tests/resources/ips/unpacked/multi_dir/dir2/empty.file b/tests/resources/ips/unpacked/multi_dir/dir2/empty.file deleted file mode 100644 index e69de29..0000000 diff --git a/tests/resources/ips/unpacked/multi_file/empty.file b/tests/resources/ips/unpacked/multi_file/empty.file deleted file mode 100644 index e69de29..0000000 diff --git a/tests/resources/ips/unpacked/multi_file/empty_1.file b/tests/resources/ips/unpacked/multi_file/empty_1.file deleted file mode 100644 index e69de29..0000000 diff --git a/tests/resources/ips/unpacked/multi_var/dir1/empty.file b/tests/resources/ips/unpacked/multi_var/dir1/empty.file deleted file mode 100644 index e69de29..0000000 diff --git a/tests/resources/ips/unpacked/multi_var/empty.file b/tests/resources/ips/unpacked/multi_var/empty.file deleted file mode 100644 index e69de29..0000000 diff --git a/tests/resources/ips/unpacked/single_file/empty.file b/tests/resources/ips/unpacked/single_file/empty.file deleted file mode 100644 index e69de29..0000000 diff --git a/tests/resources/json/__init__.py b/tests/resources/json/__init__.py new file mode 100644 index 0000000..d9d1168 --- /dev/null +++ b/tests/resources/json/__init__.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + JSON report representation +""" diff --git a/tests/resources/json/commons-ip-invalid.json b/tests/resources/json/commons-ip-invalid.json new file mode 100644 index 0000000..f60419d --- /dev/null +++ b/tests/resources/json/commons-ip-invalid.json @@ -0,0 +1,79 @@ +{ + "package" : { + "details" : { + "name" : "minimal_IP_with_schemas.zip", + "checksums" : [ { + "algorithm" : "SHA1", + "value" : "54BBE654FE332B51569BAF21338BC811CAD2AF66" + } ] + }, + "profile" : { + "type" : "CSIP", + "name" : "minimal_IP_with_schemas.zip", + "version" : "CSIP-" + }, + "representations" : { + "name" : "minimal_IP_with_schemas.zip" + } + }, + "uid" : "e11da9f2-e59e-4f23-b7f8-5ac86632231f", + "structure" : { + "status" : "WellFormed", + "messages" : [ { + "ruleId" : "CSIPSTR7", + "location" : "", + "message" : "If descriptive metadata are available should include inside metadata/descriptive ", + "severity" : "Warn" + }, { + "ruleId" : "CSIPSTR8", + "location" : "", + "message" : "If any other metadata are available, they MAY be included in separate sub-folders, for example an additional folder named other. ", + "severity" : "Info" + }, { + "ruleId" : "CSIPSTR12", + "location" : "", + "message" : "The recommended best practice is to always have a METS.xml in the representation folder. ", + "severity" : "Warn" + }, { + "ruleId" : "CSIPSTR13", + "location" : "", + "message" : "The representation folder SHOULD include a sub-folder named metadata which MAY include all metadata about the specific representation. ", + "severity" : "Warn" + }, { + "ruleId" : "CSIPSTR16", + "location" : "", + "message" : "There is no documentation folder in the representation folder minimal_IP_with_schemas/representations/rep1. ", + "severity" : "Warn" + } ] + }, + "metadata" : { + "schemaResults" : { + "status" : "VALID", + "messages" : [ ] + }, + "schematronResults" : { + "status" : "NOTVALID", + "messages" : [ { + "ruleId" : "CSIP31", + "location" : "mets/amdSec", + "message" : "You have administrative files in the metadata/folder, you must have mets/amdSec in Root METS.xml ", + "severity" : "Warn" + }, { + "ruleId" : "CSIP45", + "location" : "mets/amdSec/rightsMD", + "message" : "Individual representations should state their specific rights in their representation METS file (Root METS.xml) ", + "severity" : "Info" + }, { + "ruleId" : "CSIP66", + "location" : "mets/fileSec/fileGrp/file", + "message" : "You have files in SIP that are not referenced in Root METS.xml ", + "severity" : "Error" + }, { + "ruleId" : "CSIP80", + "location" : "mets/structMap", + "message" : "Must have one structMap with the mets/structMap[@LABEL='CSIP'] in Root METS.xml doens't appear mets/structMap[@LABEL='CSIP']. ", + "severity" : "Error" + } ] + } + } +} diff --git a/tests/resources/json/commons-ip-report.json b/tests/resources/json/commons-ip-report.json new file mode 100644 index 0000000..815d66a --- /dev/null +++ b/tests/resources/json/commons-ip-report.json @@ -0,0 +1,49 @@ +{ + "package" : { + "details" : { + "name" : "733dc055-34be-4260-85c7-5549a7083031.zip", + "checksums" : [ { + "algorithm" : "SHA1", + "value" : "B8EFAE7679EF63CDB9EF80B643672EE31E1C2898" + } ] + }, + "profile" : { + "type" : null, + "name" : "733dc055-34be-4260-85c7-5549a7083031.zip", + "version" : "CSIP-" + }, + "representations" : { + "name" : "733dc055-34be-4260-85c7-5549a7083031.zip" + } + }, + "uid" : "d991e991-8abc-4ad3-984c-c936de6c53d7", + "structure" : { + "status" : "WellFormed", + "messages" : [ { + "ruleId" : "CSIPSTR16", + "location" : "", + "message" : "There is no documentation folder in the representation folder 733dc055-34be-4260-85c7-5549a7083031/representations/rep1. ", + "severity" : "Warn" + } ] + }, + "metadata" : { + "schemaResults" : { + "status" : "VALID", + "messages" : [ ] + }, + "schematronResults" : { + "status" : "VALID", + "messages" : [ { + "ruleId" : "CSIP17", + "location" : "mets/dmdSec", + "message" : "Doesn't have files in metadata/descriptive folder but have dmdSec in 733dc055-34be-4260-85c7-5549a7083031/representations/rep1/METS.xml; Put the files under metadata folder ", + "severity" : "Warn" + }, { + "ruleId" : "CSIP45", + "location" : "mets/amdSec/rightsMD", + "message" : "Individual representations should state their specific rights in their representation METS file (733dc055-34be-4260-85c7-5549a7083031/representations/rep1/METS.xml) Individual representations should state their specific rights in their representation METS file (Root METS.xml) ", + "severity" : "Info" + } ] + } + } +} diff --git a/tests/resources/schematron/__init__.py b/tests/resources/schematron/__init__.py index e69de29..4398a39 100644 --- a/tests/resources/schematron/__init__.py +++ b/tests/resources/schematron/__init__.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package unit test data +""" diff --git a/tests/resources/xml/METS-hdr-no-type.xml b/tests/resources/xml/METS-hdr-no-type.xml index 3b7e78d..fe44b61 100644 --- a/tests/resources/xml/METS-hdr-no-type.xml +++ b/tests/resources/xml/METS-hdr-no-type.xml @@ -8,7 +8,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance schemas/XMLSchema.xsd http://www.loc.gov/METS/ schemas/mets.xsd http://www.w3.org/1999/xlink schemas/xlink.xsd https://DILCIS.eu/XML/METS/CSIPExtensionMETS schemas/CSIPExtensionMETS.xsd" OBJID="minimal_IP_with_schemas" - TYPE="OTHER" + TYPE="Other" csip:OTHERTYPE="SIARD2" csip:CONTENTINFORMATIONTYPE="SIARD2" PROFILE="https://earkcsip.dilcis.eu/profile/CSIP.xml" > diff --git a/tests/resources/xml/METS-hdr-no-version.xml b/tests/resources/xml/METS-hdr-no-version.xml index c842a65..1114d70 100644 --- a/tests/resources/xml/METS-hdr-no-version.xml +++ b/tests/resources/xml/METS-hdr-no-version.xml @@ -8,7 +8,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance schemas/XMLSchema.xsd http://www.loc.gov/METS/ schemas/mets.xsd http://www.w3.org/1999/xlink schemas/xlink.xsd https://DILCIS.eu/XML/METS/CSIPExtensionMETS schemas/CSIPExtensionMETS.xsd" OBJID="minimal_IP_with_schemas" - TYPE="OTHER" + TYPE="Other" csip:OTHERTYPE="SIARD2" csip:CONTENTINFORMATIONTYPE="SIARD2" PROFILE="https://earkcsip.dilcis.eu/profile/CSIP.xml" > diff --git a/tests/resources/xml/METS-no-createdate.xml b/tests/resources/xml/METS-no-createdate.xml index 003433b..d2b0a31 100644 --- a/tests/resources/xml/METS-no-createdate.xml +++ b/tests/resources/xml/METS-no-createdate.xml @@ -8,7 +8,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance schemas/XMLSchema.xsd http://www.loc.gov/METS/ schemas/mets.xsd http://www.w3.org/1999/xlink schemas/xlink.xsd https://DILCIS.eu/XML/METS/CSIPExtensionMETS schemas/CSIPExtensionMETS.xsd" OBJID="minimal_IP_with_schemas" - TYPE="OTHER" + TYPE="Other" csip:OTHERTYPE="SIARD2" csip:CONTENTINFORMATIONTYPE="SIARD2" PROFILE="https://earkcsip.dilcis.eu/profile/CSIP.xml" > diff --git a/tests/resources/xml/METS-no-hdr.xml b/tests/resources/xml/METS-no-hdr.xml index dc12927..656992e 100644 --- a/tests/resources/xml/METS-no-hdr.xml +++ b/tests/resources/xml/METS-no-hdr.xml @@ -8,7 +8,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance schemas/XMLSchema.xsd http://www.loc.gov/METS/ schemas/mets.xsd http://www.w3.org/1999/xlink schemas/xlink.xsd https://DILCIS.eu/XML/METS/CSIPExtensionMETS schemas/CSIPExtensionMETS.xsd" OBJID="minimal_IP_with_schemas" - TYPE="OTHER" + TYPE="Other" csip:OTHERTYPE="SIARD2" csip:CONTENTINFORMATIONTYPE="SIARD2" PROFILE="https://earkcsip.dilcis.eu/profile/CSIP.xml" > diff --git a/tests/resources/xml/METS-no-profile.xml b/tests/resources/xml/METS-no-profile.xml index cce2abe..ff65573 100644 --- a/tests/resources/xml/METS-no-profile.xml +++ b/tests/resources/xml/METS-no-profile.xml @@ -8,7 +8,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance schemas/XMLSchema.xsd http://www.loc.gov/METS/ schemas/mets.xsd http://www.w3.org/1999/xlink schemas/xlink.xsd https://DILCIS.eu/XML/METS/CSIPExtensionMETS schemas/CSIPExtensionMETS.xsd" OBJID="minimal_IP_with_schemas" - TYPE="OTHER" + TYPE="Other" csip:OTHERTYPE="SIARD2" csip:CONTENTINFORMATIONTYPE="SIARD2" > diff --git a/tests/resources/xml/METS-no-structmap.xml b/tests/resources/xml/METS-no-structmap.xml index f78dfa1..c260cb3 100644 --- a/tests/resources/xml/METS-no-structmap.xml +++ b/tests/resources/xml/METS-no-structmap.xml @@ -8,7 +8,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance schemas/XMLSchema.xsd http://www.loc.gov/METS/ schemas/mets.xsd http://www.w3.org/1999/xlink schemas/xlink.xsd https://DILCIS.eu/XML/METS/CSIPExtensionMETS schemas/CSIPExtensionMETS.xsd" OBJID="minimal_IP_with_schemas" - TYPE="OTHER" + TYPE="Other" csip:OTHERTYPE="SIARD2" csip:CONTENTINFORMATIONTYPE="SIARD2" PROFILE="https://earkcsip.dilcis.eu/profile/CSIP.xml" > diff --git a/tests/resources/xml/METS-other-type.xml b/tests/resources/xml/METS-other-type.xml index d28ba9f..2780d54 100644 --- a/tests/resources/xml/METS-other-type.xml +++ b/tests/resources/xml/METS-other-type.xml @@ -8,7 +8,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance schemas/XMLSchema.xsd http://www.loc.gov/METS/ schemas/mets.xsd http://www.w3.org/1999/xlink schemas/xlink.xsd https://DILCIS.eu/XML/METS/CSIPExtensionMETS schemas/CSIPExtensionMETS.xsd" OBJID="minimal_IP_with_schemas" - TYPE="OTHER" + TYPE="Other" csip:CONTENTINFORMATIONTYPE="SIARD2" PROFILE="https://earkcsip.dilcis.eu/profile/CSIP.xml" > diff --git a/tests/resources/xml/METS-ownerid.xml b/tests/resources/xml/METS-ownerid.xml index d29c11f..9e27cd4 100644 --- a/tests/resources/xml/METS-ownerid.xml +++ b/tests/resources/xml/METS-ownerid.xml @@ -8,7 +8,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance schemas/XMLSchema.xsd http://www.loc.gov/METS/ schemas/mets.xsd http://www.w3.org/1999/xlink schemas/xlink.xsd https://DILCIS.eu/XML/METS/CSIPExtensionMETS schemas/CSIPExtensionMETS.xsd" OBJID="minimal_IP_with_schemas" - TYPE="OTHER" + TYPE="Other" csip:OTHERTYPE="SIARD2" csip:CONTENTINFORMATIONTYPE="SIARD2" PROFILE="https://earkcsip.dilcis.eu/profile/CSIP.xml" > diff --git a/tests/resources/xml/METS-valid.xml b/tests/resources/xml/METS-valid.xml index 07dde6a..17c2822 100644 --- a/tests/resources/xml/METS-valid.xml +++ b/tests/resources/xml/METS-valid.xml @@ -8,7 +8,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance schemas/XMLSchema.xsd http://www.loc.gov/METS/ schemas/mets.xsd http://www.w3.org/1999/xlink schemas/xlink.xsd https://DILCIS.eu/XML/METS/CSIPExtensionMETS schemas/CSIPExtensionMETS.xsd" OBJID="minimal_IP_with_schemas" - TYPE="OTHER" + TYPE="Other" csip:OTHERTYPE="SIARD2" csip:CONTENTINFORMATIONTYPE="SIARD2" PROFILE="https://earkcsip.dilcis.eu/profile/CSIP.xml" > @@ -109,8 +109,13 @@
+
+ +
+
+
diff --git a/tests/resources/xml/__init__.py b/tests/resources/xml/__init__.py index e69de29..4398a39 100644 --- a/tests/resources/xml/__init__.py +++ b/tests/resources/xml/__init__.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# flake8: noqa +# -*- coding: utf-8 -*- +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +""" +E-ARK : Information Package Validation + Information Package unit test data +""" diff --git a/tests/rules_test.py b/tests/rules_test.py index 65e9b0f..0de3b04 100644 --- a/tests/rules_test.py +++ b/tests/rules_test.py @@ -22,19 +22,26 @@ # specific language governing permissions and limitations # under the License. # +from typing import List import unittest from enum import Enum from importlib_resources import files +from pydantic import ValidationError from eark_validator import rules as SC -from eark_validator.specifications.specification import EarkSpecifications +from eark_validator.model.validation_report import Severity, Result, ValidationReport +from eark_validator.specifications.specification import SpecificationType, SpecificationVersion import tests.resources.schematron as SCHEMATRON import tests.resources.xml as XML +import tests.resources.json as JSON TEST_RES = 'tests.resources' TEST_RES_XML = TEST_RES + '.xml' +TEST_RES_JSON = TEST_RES + '.json' +COMMONS_IP_JSON = str(files(JSON).joinpath('commons-ip-report.json')) +COMMONS_IP_INVALID_JSON = str(files(JSON).joinpath('commons-ip-invalid.json')) PERSON_PATH = str(files(SCHEMATRON).joinpath('person.xml')) NOT_FOUND_PATH = str(files(SCHEMATRON).joinpath('not-found.xml')) EMPTY_FILE_PATH = str(files(TEST_RES).joinpath('empty.file')) @@ -69,8 +76,8 @@ def test_mets_root_no_type(self): def test_mets_root_other_type(self): result, failures, _, _ = _test_validation(METS_ROOT_RULES, 'METS-other-type.xml') - self.assertEqual(failures, 0) - self.assertTrue(result) + self.assertEqual(failures, 1) + self.assertFalse(result) def test_mets_root_no_profile(self): result, failures, _, _ = _test_validation(METS_ROOT_RULES, 'METS-no-profile.xml') @@ -108,10 +115,10 @@ def test_mets_hdr_no_version(self): self.assertFalse(result) def test_mets_root_dmd(self): - result, _, warnings = _full_validation(METS_ROOT_RULES, METS_VALID) + results = _full_validation(METS_ROOT_RULES, METS_VALID) found_csip17 = False - for warning in warnings: - if warning.rule_id == 'CSIP17': + for result in results: + if result.rule_id == 'CSIP17': found_csip17 = True self.assertTrue(found_csip17) self.assertTrue(result) @@ -137,141 +144,137 @@ def test_mets_file(self): def test_mets_structmap(self): result, failures, warnings, _ = _test_validation(METS_STRUCT_RULES, METS_VALID) self.assertEqual(failures, 0) - self.assertEqual(warnings, 1) + self.assertEqual(warnings, 3) self.assertTrue(result) class ValidationProfileTest(unittest.TestCase): def test_load_by_str(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, CSIP_PROF) - profile = SC.ValidationProfile.from_specification('SIP') + profile = SC.ValidationProfile(SpecificationType.from_string('SIP'), SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, SIP_PROF) - profile = SC.ValidationProfile.from_specification('DIP') + profile = SC.ValidationProfile(SpecificationType.from_string('DIP'), SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, DIP_PROF) def test_load_by_eark_spec(self): - profile = SC.ValidationProfile.from_specification(EarkSpecifications.CSIP) + profile = SC.ValidationProfile(SpecificationType.CSIP, SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, CSIP_PROF) - profile = SC.ValidationProfile.from_specification(EarkSpecifications.SIP) + profile = SC.ValidationProfile(SpecificationType.SIP, SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, SIP_PROF) - profile = SC.ValidationProfile.from_specification(EarkSpecifications.DIP) - self.assertEqual(profile.specification.url, DIP_PROF) - - def test_load_by_spec(self): - profile = SC.ValidationProfile.from_specification(EarkSpecifications.CSIP.specification) - self.assertEqual(profile.specification.url, CSIP_PROF) - profile = SC.ValidationProfile.from_specification(EarkSpecifications.SIP.specification) - self.assertEqual(profile.specification.url, SIP_PROF) - profile = SC.ValidationProfile.from_specification(EarkSpecifications.DIP.specification) + profile = SC.ValidationProfile(SpecificationType.DIP, SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, DIP_PROF) def test_bad_value(self): with self.assertRaises(ValueError): - SC.ValidationProfile.from_specification('BAD') + SC.ValidationProfile(SpecificationType.from_string('BAD'), SpecificationVersion.V2_0_4) + def test_unimplemented_specifications(self): + with self.assertRaises(ValueError): + SC.ValidationProfile(SpecificationType.from_string('AIP'), SpecificationVersion.V2_0_4) + with self.assertRaises(ValueError): + SC.ValidationProfile(SpecificationType.from_string('AIU'), SpecificationVersion.V2_0_4) + with self.assertRaises(ValueError): + SC.ValidationProfile(SpecificationType.from_string('AIC'), SpecificationVersion.V2_0_4) def test_valid(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) self.assertTrue(profile.is_valid) def test_invalid(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath('METS-no-hdr.xml'))) self.assertFalse(profile.is_valid) def test_validate_file_not_found(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) with self.assertRaises(FileNotFoundError): profile.validate(str(files(SCHEMATRON).joinpath('not-found.xml'))) def test_validate_dir_value_err(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) with self.assertRaises(ValueError): profile.validate(str(files(SCHEMATRON))) def test_validate_empty_file(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES).joinpath('empty.file'))) self.assertFalse(profile.is_valid) def test_validate_not_mets(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath('person.xml'))) self.assertFalse(profile.is_valid) def test_validate_json(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES).joinpath('aip.json'))) self.assertFalse(profile.is_valid) def test_get_results(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) self.assertTrue(profile.is_valid) self.assertEqual(len(profile.get_results()), 8) def test_get_result(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) result = profile.get_result('metsHdr') self.assertTrue(profile.is_valid) - self.assertEqual(len(result.warnings), 1) + self.assertEqual(len(list(filter(lambda a: a.severity == Severity.WARNING, result))), 1) def test_get_bad_key(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) result = profile.get_result('badkey') self.assertIsNone(result) -class SeverityTest(Enum): - NOT_SEV: 'NOT_SEV' + def test_deserialise_commons_ip_report(self): + file_name = COMMONS_IP_JSON + with open(file_name, 'r', encoding='utf-8') as _f: + contents = _f.read() + result: ValidationReport = ValidationReport.model_validate_json(contents) + self.assertIsNotNone(result) + + def test_deserialise_commons_ip_invalid(self): + file_name = COMMONS_IP_INVALID_JSON + with open(file_name, 'r', encoding='utf-8') as _f: + contents = _f.read() + result: ValidationReport = ValidationReport.model_validate_json(contents) + self.assertIsNotNone(result) + +class SeverityTest(str, Enum): + NOT_SEV = 'NOT_SEV' class ResultTest(unittest.TestCase): @classmethod def setUpClass(cls): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.CSIP, SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) - cls._result = profile.get_result('metsHdr').warnings[0] + cls._result = profile.get_result('metsHdr')[0] def test_get_message(self): self.assertIsNotNone(self._result.message) - def test_sev_instances(self): - for sev in SC.Severity: - self._result.severity = sev - - def test_sev_names(self): - for sev in SC.Severity: - self._result.severity = sev.name - - def test_sev_values(self): - for sev in SC.Severity: - self._result.severity = sev.value - - def test_bad_sev_string(self): - with self.assertRaises(ValueError): - self._result.severity = 'BAD' - def test_bad_sev_att(self): - with self.assertRaises(AttributeError): - self._result.severity = SeverityTest.NOT_SEV + with self.assertRaises(ValidationError): + Result.model_validate({ 'severity': SeverityTest.NOT_SEV }) def _test_validation(name, to_validate): - rules = SC.SchematronRuleset(SC.get_schematron_path('CSIP', name)) + rules = SC.SchematronRuleset(SC.get_schematron_path(SpecificationVersion.V2_0_4, 'CSIP', name)) rules.validate(str(files(XML).joinpath(to_validate))) - for failure in SC.TestReport.from_validation_report(rules._schematron.validation_report).errors: - print(failure) - for warning in SC.TestReport.from_validation_report(rules._schematron.validation_report).warnings: - print(warning) - report = SC.TestReport.from_validation_report(rules._schematron.validation_report) - return report.is_valid, len(report.errors), len(report.warnings), len(report.infos) + results: List[Result] = SC.TestResults.from_validation_report(rules._schematron.validation_report) + errors = warnings = infos = 0 + for result in results: + if result.severity == SC.Severity.ERROR: + errors += 1 + elif result.severity == SC.Severity.WARNING: + warnings += 1 + elif result.severity == SC.Severity.INFORMATION: + infos += 1 + return errors < 1, errors, warnings, infos def _full_validation(name, to_validate): - rules = SC.SchematronRuleset(SC.get_schematron_path('CSIP', name)) + rules = SC.SchematronRuleset(SC.get_schematron_path(SpecificationVersion.V2_0_4, 'CSIP', name)) rules.validate(str(files(XML).joinpath(to_validate))) - for failure in SC.TestReport.from_validation_report(rules._schematron.validation_report).errors: - print(failure) - for warning in SC.TestReport.from_validation_report(rules._schematron.validation_report).warnings: - print(warning) - report = SC.TestReport.from_validation_report(rules._schematron.validation_report) - return report.is_valid, report.errors, report.warnings + return SC.TestResults.from_validation_report(rules._schematron.validation_report) diff --git a/tests/schematron_test.py b/tests/schematron_test.py index 6371eaf..6ff513d 100644 --- a/tests/schematron_test.py +++ b/tests/schematron_test.py @@ -22,13 +22,13 @@ # specific language governing permissions and limitations # under the License. # +from typing import List import unittest -from enum import Enum - from importlib_resources import files from eark_validator import rules as SC +from eark_validator.model.validation_report import Severity, Result import tests.resources.schematron as SCHEMATRON import tests.resources.xml as XML @@ -68,26 +68,29 @@ def test_notschematron_file(self): def test_load_schematron(self): assert_count = 0 - for _ in self._person_rules.get_assertions(): + for _ in self._person_rules.assertions: assert_count += 1 - self.assertTrue(assert_count > 0) + self.assertGreater(assert_count, 0) def test_validate_person(self): self._person_rules.validate(str(files(XML).joinpath(PERSON_XML))) - self.assertTrue(SC.TestReport.from_validation_report(self._person_rules._schematron.validation_report).is_valid) + self.assertTrue(_is_list_valid(SC.TestResults.from_validation_report(self._person_rules._schematron.validation_report))) def test_validate_invalid_person(self): self._person_rules.validate(str(files(XML).joinpath('invalid-person.xml'))) - self.assertFalse(SC.TestReport.from_validation_report(self._person_rules._schematron.validation_report).is_valid) + self.assertFalse(_is_list_valid(SC.TestResults.from_validation_report(self._person_rules._schematron.validation_report))) def test_validate_mets(self): self._mets_one_def_rules.validate(METS_VALID_PATH) - self.assertTrue(SC.TestReport.from_validation_report(self._mets_one_def_rules._schematron.validation_report).is_valid) + self.assertTrue(_is_list_valid(SC.TestResults.from_validation_report(self._mets_one_def_rules._schematron.validation_report))) def test_validate_mets_no_root(self): self._mets_one_def_rules.validate(str(files(XML).joinpath('METS-no-root.xml'))) - self.assertFalse(SC.TestReport.from_validation_report(self._mets_one_def_rules._schematron.validation_report).is_valid) + self.assertFalse(_is_list_valid(SC.TestResults.from_validation_report(self._mets_one_def_rules._schematron.validation_report))) def test_validate_mets_no_objid(self): self._mets_one_def_rules.validate(str(files(XML).joinpath('METS-no-objid.xml'))) - self.assertFalse(SC.TestReport.from_validation_report(self._mets_one_def_rules._schematron.validation_report).is_valid) + self.assertFalse(_is_list_valid(SC.TestResults.from_validation_report(self._mets_one_def_rules._schematron.validation_report))) + +def _is_list_valid(to_test: List[Result]) -> bool: + return len(list(filter(lambda a: a.severity == Severity.ERROR, to_test))) < 1 diff --git a/tests/specification_test.py b/tests/specification_test.py index b316a41..cfef0a0 100644 --- a/tests/specification_test.py +++ b/tests/specification_test.py @@ -23,107 +23,81 @@ # under the License. # +from typing import Optional import unittest from lxml import etree as ET from importlib_resources import files +from eark_validator.model.specifications import Specification, Requirement -from eark_validator.specifications.specification import EarkSpecifications -from eark_validator.specifications.specification import Specification +from eark_validator.specifications.specification import EarkSpecification, Specifications, StructuralRequirements, SpecificationType, SpecificationVersion import tests.resources.xml as XML - - +from eark_validator.ipxml.resources import profiles class SpecificationTest(unittest.TestCase): def test_no_file(self): with self.assertRaises(FileNotFoundError): - Specification._from_xml_file(str(files('tests.resources').joinpath('nosuch.file'))) + Specifications._from_xml_file(str(files('tests.resources').joinpath('nosuch.file'))) def test_is_dir(self): with self.assertRaises(ValueError): - Specification._from_xml_file(str(files(XML))) + Specifications._from_xml_file(str(files(XML))) def test_no_xml(self): with self.assertRaises(ET.XMLSyntaxError): - Specification._from_xml_file(str(files('tests.resources').joinpath('empty.file'))) + Specifications._from_xml_file(str(files('tests.resources').joinpath('empty.file'))) def test_invalid_xml(self): with self.assertRaises(ET.XMLSyntaxError): - Specification._from_xml_file(str(files('tests.resources.xml').joinpath('person.xml'))) - - def test_title(self): - spec = EarkSpecifications.CSIP.specification - self.assertEqual(spec.title, 'E-ARK CSIP METS Profile') - spec = EarkSpecifications.SIP.specification - self.assertEqual(spec.title, 'E-ARK SIP METS Profile 2.0') - spec = EarkSpecifications.DIP.specification - self.assertEqual(spec.title, 'E-ARK DIP METS Profile') - - def test_url(self): - spec = EarkSpecifications.CSIP.specification - self.assertEqual(spec.url, 'https://earkcsip.dilcis.eu/profile/E-ARK-CSIP.xml') - spec = EarkSpecifications.SIP.specification - self.assertEqual(spec.url, 'https://earksip.dilcis.eu/profile/E-ARK-SIP.xml') - spec = EarkSpecifications.DIP.specification - self.assertEqual(spec.url, 'https://earkdip.dilcis.eu/profile/E-ARK-DIP.xml') - - def test_version(self): - spec = EarkSpecifications.CSIP.specification - self.assertEqual(spec.version, 'V2.0.4') - spec = EarkSpecifications.SIP.specification - self.assertEqual(spec.version, 'SIPV2.0.4') - spec = EarkSpecifications.DIP.specification - self.assertEqual(spec.version, 'DIPV2.0.4') - - def test_date(self): - spec_date = '2020-06-12T09:00:00' - spec = EarkSpecifications.CSIP.specification - self.assertEqual(spec.date, spec_date) - spec = EarkSpecifications.SIP.specification - self.assertEqual(spec.date, spec_date) - spec = EarkSpecifications.DIP.specification - self.assertEqual(spec.date, spec_date) - - def test_requirements(self): - spec = EarkSpecifications.CSIP.specification - self.assertEqual(self._count_reqs(spec), spec.requirement_count) - spec = EarkSpecifications.SIP.specification - self.assertEqual(self._count_reqs(spec), spec.requirement_count) - spec = EarkSpecifications.DIP.specification - self.assertEqual(self._count_reqs(spec), spec.requirement_count) - - def test_get_requirement(self): - spec = EarkSpecifications.CSIP.specification - rule_1 = spec.get_requirement_by_id('CSIP1') - rule_1_by_sect = spec.get_requirement_by_sect('CSIP1', 'metsRootElement') - self.assertEqual(rule_1, rule_1_by_sect) - self.assertIsNone(spec.get_requirement_by_id('CSIP999')) - - def test_sections(self): - spec = EarkSpecifications.CSIP.specification - self.assertGreater(spec.section_count, 0) - self.assertEqual(self._count_reqs_via_section(spec), spec.requirement_count) - self.assertEqual(len(spec.section_requirements()), spec.requirement_count) - spec = EarkSpecifications.SIP.specification - self.assertGreater(spec.section_count, 0) - self.assertEqual(self._count_reqs_via_section(spec), spec.requirement_count) - self.assertEqual(len(spec.section_requirements()), spec.requirement_count) - spec = EarkSpecifications.DIP.specification - self.assertGreater(spec.section_count, 0) - self.assertEqual(self._count_reqs_via_section(spec), spec.requirement_count) - self.assertEqual(len(spec.section_requirements()), spec.requirement_count) - - def _count_reqs(self, spec): - req_count = 0 - for _ in spec.requirements: - req_count += 1 - return req_count - - def _count_reqs_via_section(self, spec): - req_count = 0 - for section in spec.sections: - for _ in spec.section_requirements(section): - req_count += 1 - return req_count + Specifications._from_xml_file(str(files('tests.resources.xml').joinpath('person.xml'))) + + def test_valid_xml(self): + specification: Specification = Specifications._from_xml_file(str(files(profiles).joinpath('V2.0.4', 'E-ARK-CSIP' + '.xml'))) + self.assertEqual(EarkSpecification(SpecificationType.CSIP, SpecificationVersion.V2_0_4).specification, specification) + +class StructuralRequirementsTest(unittest.TestCase): + + def test_from_rule_no_none(self): + with self.assertRaises(ValueError): + StructuralRequirements.from_rule_no(None) + + def test_from_rule_no_str(self): + with self.assertRaises(ValueError): + StructuralRequirements.from_rule_no('1') + + def test_from_rule_no(self): + req: Requirement = StructuralRequirements.from_rule_no(1) + self.assertEqual(req.id, 'CSIPSTR1') + +class SpecificationTypeTest(unittest.TestCase): + def test_value(self): + spec_type: SpecificationType = SpecificationType.CSIP + self.assertEqual(spec_type.value, 'E-ARK-CSIP') + spec_type = SpecificationType.SIP + self.assertEqual(spec_type.value, 'E-ARK-SIP') + spec_type = SpecificationType.DIP + self.assertEqual(spec_type.value, 'E-ARK-DIP') + +class SpecificationVersionTest(unittest.TestCase): + def test_value(self): + version = SpecificationVersion.V2_0_4 + self.assertEqual(version.value, 'V2.0.4') + version = SpecificationVersion.V2_1_0 + self.assertEqual(version.value, 'V2.1.0') + +class EarkSpecificationsTest(unittest.TestCase): + def test_specifiction_type(self): + eark_specification = EarkSpecification(SpecificationType.CSIP, SpecificationVersion.V2_0_4) + self.assertEqual(eark_specification.type, SpecificationType.CSIP) + eark_specification = EarkSpecification(SpecificationType.SIP, SpecificationVersion.V2_0_4) + self.assertEqual(eark_specification.type, SpecificationType.SIP) + eark_specification = EarkSpecification(SpecificationType.DIP, SpecificationVersion.V2_0_4) + self.assertEqual(eark_specification.type, SpecificationType.DIP) + + def test_specifiction_version(self): + eark_specification = EarkSpecification(SpecificationType.CSIP, SpecificationVersion.V2_0_4) + self.assertEqual(eark_specification.version, SpecificationVersion.V2_0_4) + eark_specification = EarkSpecification(SpecificationType.SIP, SpecificationVersion.V2_1_0) + self.assertEqual(eark_specification.version, SpecificationVersion.V2_1_0) diff --git a/tests/structure_test.py b/tests/structure_test.py index c6b2acd..631bde1 100644 --- a/tests/structure_test.py +++ b/tests/structure_test.py @@ -25,264 +25,305 @@ """Module covering tests for package structure errors.""" import os import unittest +from pathlib import Path from eark_validator import structure as STRUCT -from eark_validator.rules import Severity +from eark_validator.model import Severity from tests.utils_test import contains_rule_id -EXP_NOT_WELLFORMED = 'Expecting status NotWellFormed, not {}' +EXP_NOT_WELLFORMED = 'Expecting status NOTWELLFORMED, not {}' EXP_WELLFORMED = 'Expecting status WellFormed, not {}' EXP_ERRORS = 'Expecting {} errors but found {}' class StructValidationTests(unittest.TestCase): """Unit tests covering structural validation of information packages, spcifically unpacking archived packages and establishing that the files and folders specified if the CSSTRUCT are present.""" + ip_res_root: str = os.path.join(os.path.dirname(__file__), 'resources', 'ips') + bad_ip_root: str = os.path.join(ip_res_root, 'bad') - def test_check_package_root_single(self): + def test_str1_bad_path(self): + """Test a package that's just a compressed single file.""" + ip_path = Path(os.path.join(os.path.dirname(__file__), 'resources', 'empty.file')) + _, details = STRUCT.validate(ip_path) + err_count = 1 + self.assertEqual(len(details.errors), err_count, + EXP_ERRORS.format(err_count, len(details.errors))) + self.assertTrue(contains_rule_id(details.errors, 'CSIPSTR1', + severity=Severity.ERROR)) + + def test_str1_package_root_single(self): """Dedicated test for package root detection errors.""" - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'unpacked', - 'single_file') - details = STRUCT.validate_package_structure(ip_path) - self.assertTrue(details.status == STRUCT.StructureStatus.NotWellFormed, + ip_path = Path(os.path.join(self.bad_ip_root, 'single_file.zip')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.NOTWELLFORMED, EXP_NOT_WELLFORMED.format(details.status)) - val_errors = details.errors err_count = 1 - self.assertTrue(len(val_errors) == err_count, - EXP_ERRORS.format(err_count, len(val_errors))) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR4', + self.assertEqual(len(details.errors), err_count, + EXP_ERRORS.format(err_count, len(details.errors))) + self.assertTrue(contains_rule_id(details.errors, 'CSIPSTR1', severity=Severity.ERROR)) - def test_check_package_root_multi_dir(self): + def test_str1_package_root_multi_dir(self): """Dedicated test for package root detection errors.""" - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'unpacked', - 'multi_dir') - details = STRUCT.validate_package_structure(ip_path) - self.assertTrue(details.status == STRUCT.StructureStatus.NotWellFormed, + ip_path = Path(os.path.join(self.bad_ip_root, 'multi_dir')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.NOTWELLFORMED, EXP_NOT_WELLFORMED.format(details.status)) - val_errors = details.errors err_count = 1 - self.assertTrue(len(val_errors) == err_count, - EXP_ERRORS.format(err_count, len(val_errors))) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR4', + self.assertEqual(len(details.errors), err_count, + EXP_ERRORS.format(err_count, len(details.errors))) + self.assertTrue(contains_rule_id(details.errors, 'CSIPSTR1', severity=Severity.ERROR)) - def test_check_package_root_multi_file(self): + def test_str1_package_root_multi_file(self): """Dedicated test for package root detection errors.""" - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'unpacked', - 'multi_file') - details = STRUCT.validate_package_structure(ip_path) - self.assertTrue(details.status == STRUCT.StructureStatus.NotWellFormed, + ip_path = Path(os.path.join(self.bad_ip_root, 'multi_file.zip')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.NOTWELLFORMED, EXP_NOT_WELLFORMED.format(details.status)) - val_errors = details.errors err_count = 1 - self.assertTrue(len(val_errors) == err_count, - EXP_ERRORS.format(err_count, len(val_errors))) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR4', + self.assertEqual(len(details.errors), err_count, + EXP_ERRORS.format(err_count, len(details.errors))) + self.assertTrue(contains_rule_id(details.errors, 'CSIPSTR1', severity=Severity.ERROR)) - def test_check_package_root_multi_var(self): + def test_str1_package_root_multi_var(self): """Dedicated test for package root detection errors.""" - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'unpacked', - 'multi_var') - details = STRUCT.validate_package_structure(ip_path) - self.assertTrue(details.status == STRUCT.StructureStatus.NotWellFormed, + ip_path = Path(os.path.join(self.bad_ip_root, 'multi_var.zip')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.NOTWELLFORMED, EXP_NOT_WELLFORMED.format(details.status)) - val_errors = details.errors err_count = 1 - self.assertTrue(len(val_errors) == err_count, - EXP_ERRORS.format(err_count, len(val_errors))) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR4', + self.assertEqual(len(details.errors), err_count, + EXP_ERRORS.format(err_count, len(details.errors))) + self.assertTrue(contains_rule_id(details.errors, 'CSIPSTR1', severity=Severity.ERROR)) - def test_single_file_archive(self): + def test_str1_single_file_archive(self): """Test a package that's just a compressed single file.""" - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'struct', - 'empty.zip') - details = STRUCT.validate_package_structure(ip_path) - val_errors = details.errors + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'empty.zip')) + _, details = STRUCT.validate(ip_path) err_count = 1 - self.assertTrue(len(val_errors) == err_count, - EXP_ERRORS.format(err_count, len(val_errors))) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR4', + self.assertEqual(len(details.errors), err_count, + EXP_ERRORS.format(err_count, len(details.errors))) + self.assertTrue(contains_rule_id(details.errors, 'CSIPSTR1', severity=Severity.ERROR)) + def test_no_messages(self): + """Test package with no METS.xml file""" + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_messages.tar.gz')) + is_valid, details = STRUCT.validate(ip_path) + self.assertTrue(is_valid) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, + EXP_NOT_WELLFORMED.format(details.status)) + self.assertEqual(len(details.messages), 0) + def test_minimal(self): """Test minimal STRUCT with schemas, the basic no errors but with warnings package.""" - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'minimal', - 'minimal_IP_with_schemas.zip') - details = STRUCT.validate_package_structure(ip_path) - self.assertTrue(details.status == STRUCT.StructureStatus.WellFormed, + ip_path = Path(os.path.join(self.ip_res_root, 'minimal', + 'minimal_IP_with_schemas.zip')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, EXP_WELLFORMED.format(details.status)) - val_errors = details.warnings - self.assertTrue(len(val_errors) == 3, - 'Expecting 3 errors but found {}'.format(len(val_errors))) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR12', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR13', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR15', - severity=Severity.WARN)) + val_warnings = details.warnings + self.assertEqual(len(val_warnings), 5, + 'Expecting 2 warnings but found {}'.format(len(val_warnings))) + self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR6', + severity=Severity.WARNING)) + self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR7', + severity=Severity.WARNING)) + self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR12', + severity=Severity.WARNING)) + self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR13', + severity=Severity.WARNING)) + self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR16', + severity=Severity.WARNING)) - def test_nomets(self): + def test_str3_package(self): + """Test minimal STRUCT with schemas, the basic no errors but with warnings package.""" + ip_path = Path(os.path.join(self.ip_res_root, 'unpacked', + '733dc055-34be-4260-85c7-5549a7083031')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, + EXP_WELLFORMED.format(details.status)) + self.assertEqual(len(details.warnings), 1, + 'Expecting 1 warning but found {}'.format(len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR16', + severity=Severity.WARNING)) + self.assertEqual(len(details.infos), 2, + 'Expecting 2 info messages but found {}'.format(len(details.infos))) + self.assertTrue(contains_rule_id(details.infos, 'CSIPSTR3', + severity=Severity.INFORMATION)) + + def test_str4_nomets(self): """Test package with no METS.xml file""" - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'struct', - 'no_mets.tar.gz') - details = STRUCT.validate_package_structure(ip_path) - self.assertTrue(details.status == STRUCT.StructureStatus.NotWellFormed, + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_mets.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.NOTWELLFORMED, EXP_NOT_WELLFORMED.format(details.status)) - val_errors = details.errors err_count = 1 - self.assertTrue(len(val_errors) == err_count, - EXP_ERRORS.format(err_count, len(val_errors))) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR4')) + self.assertEqual(len(details.messages), err_count, + EXP_ERRORS.format(err_count, len(details.messages))) + self.assertTrue(contains_rule_id(details.messages, 'CSIPSTR4')) - val_warnings = details.warnings - self.assertTrue(len(val_warnings) == 3, - 'Expecting 3 errors but found {}'.format(len(val_warnings))) - self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR12', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR13', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR15', - severity=Severity.WARN)) - - def test_nomd(self): + def test_str5_nomd(self): # test as root - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'struct', - 'no_md.tar.gz') - details = STRUCT.validate_package_structure(ip_path) - self.assertTrue(details.status == STRUCT.StructureStatus.WellFormed, + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_md.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, EXP_WELLFORMED.format(details.status)) - val_errors = details.warnings - err_count = 4 - self.assertTrue(len(val_errors) == err_count, - EXP_ERRORS.format(err_count, len(val_errors))) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR5', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR12', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR13', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR15', - severity=Severity.WARN)) + warn_count = 1 + self.assertEqual(len(details.messages), warn_count, + EXP_ERRORS.format(warn_count, len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR5', + severity=Severity.WARNING)) - def test_noschema(self): + def test_str6_nopres(self): # test as root - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'struct', - 'no_schemas.tar.gz') - details = STRUCT.validate_package_structure(ip_path) + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_pres.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, + EXP_WELLFORMED.format(details.status)) + warn_count = 1 + self.assertEqual(len(details.messages), warn_count, + EXP_ERRORS.format(warn_count, len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR6', + severity=Severity.WARNING)) - self.assertTrue(details.status == STRUCT.StructureStatus.WellFormed, + def test_str7_nodesc(self): + # test as root + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_desc.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, EXP_WELLFORMED.format(details.status)) - val_warnings = details.warnings - for entry in val_warnings: - print(str(entry)) - err_count = 4 - self.assertTrue(len(val_warnings) == err_count, - EXP_ERRORS.format(err_count, len(val_warnings))) - self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR12', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR13', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR15', - severity=Severity.WARN)) + warn_count = 1 + self.assertEqual(len(details.messages), warn_count, + EXP_ERRORS.format(warn_count, len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR7', + severity=Severity.WARNING)) - def test_nodata(self): + def test_str8_noother(self): # test as root - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'struct', - 'no_data.tar.gz') - details = STRUCT.validate_package_structure(ip_path) - self.assertTrue(details.status == STRUCT.StructureStatus.WellFormed, + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_other.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, EXP_WELLFORMED.format(details.status)) - val_errors = details.warnings - err_count = 4 - self.assertTrue(len(val_errors) == err_count, - EXP_ERRORS.format(err_count, len(val_errors))) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR11', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR12', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR13', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR15', - severity=Severity.WARN)) + warn_count = 1 + self.assertEqual(len(details.messages), warn_count, + EXP_ERRORS.format(warn_count, len(details.infos))) + self.assertTrue(contains_rule_id(details.infos, 'CSIPSTR8', + severity=Severity.INFORMATION)) - def test_noreps(self): - ip_path = os.path.join(os.path.dirname(__file__), 'resources', 'ips', 'struct', - 'no_reps.tar.gz') - details = STRUCT.validate_package_structure(ip_path) - self.assertTrue(details.status == STRUCT.StructureStatus.WellFormed, + def test_str9_noreps(self): + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_reps.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, EXP_WELLFORMED.format(details.status)) val_warnings = details.warnings - print('ERRORS') - for err in details.messages: - print(err) err_count = 1 - self.assertTrue(len(val_warnings) == err_count, + self.assertEqual(len(val_warnings), err_count, EXP_ERRORS.format(err_count, len(val_warnings))) self.assertTrue(contains_rule_id(val_warnings, 'CSIPSTR9', - severity=Severity.WARN)) - """Unit tests covering structural validation of information packages, spcifically - unpacking archived packages and establishing that the files and folders specified - if the CSIP are present.""" + severity=Severity.WARNING)) + + def test_str10_emptyreps(self): + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'empty_reps.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, + EXP_WELLFORMED.format(details.status)) + err_count = 1 + self.assertEqual(len(details.messages), err_count, + EXP_ERRORS.format(err_count, len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR10', + severity=Severity.WARNING)) - def test_manifest_nomets(self): - """Ensure proper behaviour when no METS file is present.""" + def test_str11_nodata(self): # test as root - man_no_mets = STRUCT.StructureChecker('no_mets', has_mets=False) - val_errors = man_no_mets.validate_manifest() - self.assertTrue(len(val_errors) == 1) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR4')) - val_errors = man_no_mets.validate_manifest(is_root=False) - self.assertTrue(len(val_errors) == 2) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR12', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR11', - severity=Severity.WARN)) + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_data.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, + EXP_WELLFORMED.format(details.status)) + warn_count = 1 + self.assertEqual(len(details.messages), warn_count, + EXP_ERRORS.format(warn_count, len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR11', + severity=Severity.WARNING)) - def test_manifest_nomd(self): + def test_str12_norepmets(self): # test as root - man_no_md = STRUCT.StructureChecker('no_md', has_md=False) - val_errors = man_no_md.validate_manifest() - self.assertTrue(len(val_errors) == 1) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR5', - severity=Severity.WARN)) - val_errors = man_no_md.validate_manifest(is_root=False) - self.assertTrue(len(val_errors) == 2) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR13', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR11', - severity=Severity.WARN)) + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_repmets.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, + EXP_WELLFORMED.format(details.status)) + warn_count = 1 + self.assertEqual(len(details.messages), warn_count, + EXP_ERRORS.format(warn_count, len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR12', + severity=Severity.WARNING)) - def test_manifest_noschema(self): + def test_str13_norepmd(self): # test as root - man_no_schema = STRUCT.StructureChecker('no_schema', has_schema=False) - val_errors = man_no_schema.validate_manifest() - self.assertTrue(len(val_errors) == 1) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR15', - severity=Severity.WARN)) - val_errors = man_no_schema.validate_manifest(is_root=False) - self.assertTrue(len(val_errors) == 2) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR15', - severity=Severity.WARN)) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR11', - severity=Severity.WARN)) + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_repmd.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, + EXP_WELLFORMED.format(details.status)) + warn_count = 1 + self.assertEqual(len(details.messages), warn_count, + EXP_ERRORS.format(warn_count, len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR13', + severity=Severity.WARNING)) - def test_manifest_data(self): + def test_str15_noschema(self): # test as root - man_data = STRUCT.StructureChecker('data', has_data=True) - val_errors = man_data.validate_manifest() - self.assertTrue(len(val_errors) == 0) - val_errors = man_data.validate_manifest(is_root=False) - self.assertTrue(len(val_errors) == 0) + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_schemas.tar.gz')) + _, details = STRUCT.validate(ip_path) + + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, + EXP_WELLFORMED.format(details.status)) + warn_count = 1 + self.assertEqual(len(details.messages), warn_count, + EXP_ERRORS.format(warn_count, len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR15', + severity=Severity.WARNING)) + + def test_str16_nodocs(self): + # test as root + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_docs.tar.gz')) + _, details = STRUCT.validate(ip_path) + self.assertEqual(details.status, STRUCT.StructureStatus.WELLFORMED, + EXP_WELLFORMED.format(details.status)) + warn_count = 1 + self.assertEqual(len(details.messages), warn_count, + EXP_ERRORS.format(warn_count, len(details.warnings))) + self.assertTrue(contains_rule_id(details.warnings, 'CSIPSTR16', + severity=Severity.WARNING)) + + def test_get_reps(self): + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_messages.tar.gz')) + checker: STRUCT.StructureChecker = STRUCT.StructureChecker(ip_path) + reps_count = 1 + self.assertEqual(len(checker.get_representations()), reps_count, + EXP_ERRORS.format(reps_count, len(checker.get_representations()))) - def test_manifest_noreps(self): - man_no_reps = STRUCT.StructureChecker('no_reps', has_reps=False) - val_errors = man_no_reps.validate_manifest() - self.assertTrue(len(val_errors) == 1) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR9', - severity=Severity.WARN)) - val_errors = man_no_reps.validate_manifest(is_root=False) - self.assertTrue(len(val_errors) == 1) - self.assertTrue(contains_rule_id(val_errors, 'CSIPSTR11', - severity=Severity.WARN)) + def test_get_no_reps(self): + ip_path = Path(os.path.join(self.ip_res_root, 'struct', + 'no_reps.tar.gz')) + checker: STRUCT.StructureChecker = STRUCT.StructureChecker(ip_path) + reps_count = 0 + self.assertEqual(len(checker.get_representations()), reps_count, + EXP_ERRORS.format(reps_count, len(checker.get_representations()))) diff --git a/tests/utils_test.py b/tests/utils_test.py index 42058db..4a4db3f 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -23,11 +23,11 @@ # under the License. # """Module that holds common utilities for unit testing.""" -from eark_validator.rules import Severity +from eark_validator.model import Severity def contains_rule_id(error_list, rule_id, severity=Severity.ERROR): """Check that a particular error with specified severity is present in a list of errors.""" for val_error in error_list: - if val_error.id == rule_id and val_error.severity == severity: + if val_error.rule_id == rule_id and (val_error.severity is severity or val_error.severity == severity.value) : return True return False