Skip to content

Commit

Permalink
Merge pull request #21 from E-ARK-Software/refact/publication
Browse files Browse the repository at this point in the history
MAINT: Quick package publication
  • Loading branch information
shsdev authored Feb 16, 2024
2 parents da7c45e + 3e7b310 commit dfa7003
Show file tree
Hide file tree
Showing 33 changed files with 844 additions and 687 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,33 @@ jobs:

steps:
- uses: actions/[email protected]
with:
fetch-depth: 0
- name: Set up Python
uses: actions/[email protected]
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ".[testing]"
- name: Install python package
run: |
pip install --editable ".[testing]"
- name: Static Pylint code QA
run: |
pylint --errors-only eark_validator
- name: Run pre-commit tests
run: pre-commit run --all-files --verbose
- name: Test with pytest
run: |
pytest
- name: Test setuptools-git-versioning versioning
run: |
python -m pip install setuptools_git_versioning
python -m setuptools_git_versioning
- name: Install build utils
run: |
pip install build
- name: Build package
run: python -m build
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ website of the Digital Information LifeCycle Interoperability Standards Board (D

### Pre-requisites

You must be running either a Debian/Ubuntu Linux distribution or Windows Subsystem for Linux on Windows to follow these commands.
Python 3.10 or later is required to run the E-ARK Python Information Package Validator.

You must be running either a Debian/Ubuntu Linux distribution or Windows Subsystem for Linux on Windows to follow these commands.
If you are running a different Linux distribution you must change the apt commands to your package manager.

For getting Windows Subsystem for Linux up and running, please follow the guide further down and then come back to this step.

### Getting up and running with the E-ARK Python Information Package Validator
Expand Down Expand Up @@ -88,7 +88,7 @@ pip install -U pip
pip install .
```

You are now able to run the application "ip-check". It will validate an Information Package for you.
You are now able to run the application "eark-validator". It will validate an Information Package for you.


#### Testing a valid package.
Expand All @@ -111,10 +111,10 @@ Delete the .zip-file you just downloaded:
rm mets-xml_metsHdr_agent_TYPE_exist.zip
```

Run the ip-check:
Run the eark-validator:

```shell
ip-check mets-xml_metsHdr_agent_TYPE_exist/
eark-validator mets-xml_metsHdr_agent_TYPE_exist/
```

Result:
Expand Down Expand Up @@ -146,7 +146,7 @@ user@machine:~$ tree input

If you do not have Linux and have not previously used WSL please perform the following steps. You must either be logged in as Administrator on the machine or as a user with Administrator rights on the machine.

Start er command prompt (cmd.exe) and then enter the following command:
Start a command prompt (cmd.exe) and then enter the following command:

```shell
wsl --install
Expand Down
1 change: 0 additions & 1 deletion VERSION

This file was deleted.

39 changes: 23 additions & 16 deletions eark_validator/cli/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,52 +26,59 @@
E-ARK : Information package validation
Command line validation application
"""
import argparse
import os.path
from pathlib import Path
import sys
from typing import Optional, Tuple
from eark_validator.model import StructResults
import importlib.metadata

import argparse

from eark_validator.model import ValidationReport
import eark_validator.packages as PACKAGES
from eark_validator.infopacks.package_handler import PackageHandler

__version__ = '0.1.0'
__version__ = importlib.metadata.version('eark_validator')

defaults = {
'description': """E-ARK Information Package validation (ip-check).
ip-check is a command-line tool to analyse and validate the structure and
'description': """E-ARK Information Package validation (eark-validator).
eark-validator is a command-line tool to analyse and validate the structure and
metadata against the E-ARK Information Package specifications.
It is designed for simple integration into automated work-flows.""",
'epilog': """
DILCIS Board (http://dilcis.eu)
See LICENSE for license information.
GitHub: https://github.com/E-ARK-Software/py-rest-ip-validator
Author: Carl Wilson (OPF), 2020-2023
Maintainer: Carl Wilson (OPF), 2020-2023"""
GitHub: https://github.com/E-ARK-Software/eark-validator
Author: Carl Wilson (OPF), 2020-2024
Maintainer: Carl Wilson (OPF), 2020-2024"""
}

# Create PARSER
PARSER = argparse.ArgumentParser(description=defaults['description'], epilog=defaults['epilog'])
PARSER = argparse.ArgumentParser(prog='eark-validator', description=defaults['description'], epilog=defaults['epilog'])

def parse_command_line():
"""Parse command line arguments."""
# Add arguments
PARSER.add_argument('-r', '--recurse',
action='store_true',
dest='inputRecursiveFlag',
default=True,
default=False,
help='When analysing an information package recurse into representations.')
PARSER.add_argument('-c', '--checksum',
action='store_true',
dest='inputChecksumFlag',
default=False,
help='Calculate and verify file checksums in packages.')
help='Calculate and verify package checksums.')
PARSER.add_argument('-m', '--manifest',
action='store_true',
dest='inputManifestFlag',
default=False,
help='Display package manifest information.')
PARSER.add_argument('-v', '--verbose',
action='store_true',
dest='outputVerboseFlag',
default=False,
help='report results in verbose format')
help='Verbose reporting for selected output options.')
PARSER.add_argument('--version',
action='version',
version=__version__)
Expand Down Expand Up @@ -101,17 +108,17 @@ def main():
_exit = _loop_exit if (_loop_exit > 0) else _exit
sys.exit(_exit)

def _validate_ip(path: str) -> Tuple[int, Optional[StructResults]]:
def _validate_ip(path: str) -> Tuple[int, Optional[ValidationReport]]:
ret_stat, checked_path = _check_path(path)
if ret_stat > 0:
return ret_stat, None
report = PACKAGES.PackageValidator(checked_path).validation_report
print('Path {}, struct result is: {}'.format(checked_path,
report.structure.status.value))
for message in report.structure.messages:
print(message.model_dump_json())
# for message in report.structure.messages:
print(report.model_dump_json())

return ret_stat, report.structure
return ret_stat, report

def _check_path(path: str) -> Tuple[int, Optional[Path]]:
if not os.path.exists(path):
Expand Down
139 changes: 49 additions & 90 deletions eark_validator/infopacks/information_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,114 +24,73 @@
#
"""Module covering information package structure validation and navigation."""
import os
from pathlib import Path
from lxml import etree

from eark_validator.const import NO_PATH, NOT_FILE, NOT_VALID_FILE
from eark_validator.mets import MetsFiles, MetsFile
from eark_validator.ipxml.namespaces import Namespaces
from eark_validator.infopacks.manifest import Manifest
from eark_validator.model import PackageDetails
from eark_validator.model.package_details import InformationPackage
from eark_validator.model.validation_report import Result
from .package_handler import PackageHandler

class PackageDetails:
class InformationPackages:

def __init__(
self: str,
objid: str,
label: str,
type: str,
othertype: str,
contentinformationtype: str,
profile: str,
oaispackagetype: str,
ns: str):
self._objid = objid
self._label = label
self._type = type
self._othertype = othertype
self._contentinformationtype = contentinformationtype
self._profile = profile
self._oaispackagetype = oaispackagetype
self._ns = ns

@property
def objid(self) -> str:
return self._objid

@property
def label(self) -> str:
return self._label

@property
def type(self) -> str:
return self._type

@property
def othertype(self) -> str:
return self._othertype

@property
def contentinformationtype(self) -> str:
return self._contentinformationtype

@property
def profile(self) -> str:
return self._profile

@property
def oaispackagetype(self) -> str:
return self._oaispackagetype

@property
def namespaces(self) -> str:
return self._ns

@classmethod
def from_mets_file(cls, mets_file: str) -> 'PackageDetails':
if (not os.path.exists(mets_file)):
@staticmethod
def details_from_mets_file(mets_file: Path) -> PackageDetails:
if (not mets_file.exists()):
raise FileNotFoundError(NO_PATH.format(mets_file))
if (not os.path.isfile(mets_file)):
if (not mets_file.is_file()):
raise ValueError(NOT_FILE.format(mets_file))
ns = {}
objid = label = ptype = othertype = contentinformationtype = profile = oaispackagetype = ''
label = othertype = contentinformationtype = oaispackagetype = ''
try:
parsed_mets = etree.iterparse(mets_file, events=['start', 'start-ns'])
for event, element in parsed_mets:
if event == 'start-ns':
prefix = element[0]
ns_uri = element[1]
ns[prefix] = ns_uri
# Add namespace id to the dictionary
ns[element[1]] = element[0]
if event == 'start':
if element.tag == Namespaces.METS.qualify('mets'):
objid = element.get('OBJID', '')
label = element.get('LABEL', '')
ptype = element.get('TYPE', '')
othertype = element.get(Namespaces.CSIP.qualify('OTHERTYPE'), '')
contentinformationtype = element.get(Namespaces.CSIP.qualify('CONTENTINFORMATIONTYPE'), '')
profile = element.get('PROFILE', '')
oaispackagetype = element.find(Namespaces.METS.qualify('metsHdr')).get(Namespaces.CSIP.qualify('OAISPACKAGETYPE'), '')
elif element.tag == Namespaces.METS.qualify('metsHdr'):
else:
break
except etree.XMLSyntaxError:
except (etree.XMLSyntaxError, AttributeError):
raise ValueError(NOT_VALID_FILE.format(mets_file, 'XML'))
return cls(objid, label, ptype, othertype, contentinformationtype, profile, oaispackagetype, ns)


class InformationPackage:
"""Stores the vital facts and figures about a package."""
def __init__(self, path: str, details: PackageDetails, manifest: Manifest=None):
self._path = path
self._details = details
self._manifest = manifest if manifest else Manifest.from_directory(path)

@property
def path(self) -> str:
"""Get the specification of the package."""
return self._path

@property
def details(self) -> PackageDetails:
"""Get the package details."""
return self._details

@property
def manifest(self) -> Manifest:
"""Return the package manifest."""
return self._manifest
return PackageDetails.model_validate({
'label': label,
'othertype': othertype,
'contentinformationtype': contentinformationtype,
'oaispackagetype': oaispackagetype
})

@staticmethod
def from_path(package_path: Path) -> InformationPackage:
if (not package_path.exists()):
raise FileNotFoundError(NO_PATH.format(package_path))
handler: PackageHandler = PackageHandler()
to_parse:Path = handler.prepare_package(package_path)
mets_path: Path = to_parse.joinpath('METS.xml')
if (not mets_path.is_file()):
raise ValueError('No METS file found in package')
mets: MetsFile = MetsFiles.from_file(to_parse.joinpath('METS.xml'))
return InformationPackage.model_validate({
'name': to_parse.stem,
'mets': mets,
'package': InformationPackages.details_from_mets_file(to_parse.joinpath('METS.xml'))
})

@staticmethod
def validate(package_path: Path) -> Result:
if (not package_path.exists()):
raise FileNotFoundError(NO_PATH.format(package_path))
handler: PackageHandler = PackageHandler()
to_parse:Path = handler.prepare_package(package_path)
mets_path: Path = to_parse.joinpath('METS.xml')
if (not mets_path.is_file()):
raise ValueError('No METS file found in package')
return True
Loading

0 comments on commit dfa7003

Please sign in to comment.