Skip to content

Commit

Permalink
Merge pull request #74 from E-ARK-Software/feat/validating-files
Browse files Browse the repository at this point in the history
Validation of file entries
  • Loading branch information
dockmd authored Nov 19, 2024
2 parents 8a7ba8f + 4245d49 commit 4958ea9
Show file tree
Hide file tree
Showing 13 changed files with 2,561 additions and 2,422 deletions.
50 changes: 50 additions & 0 deletions eark_validator/infopacks/checksummer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
from pathlib import Path

from eark_validator.const import NO_PATH, NOT_FILE
from eark_validator.model import Checksum, ChecksumAlg

class Checksummer:
def __init__(self, algorithm: ChecksumAlg | str):
if isinstance(algorithm, ChecksumAlg):
self._algorithm: ChecksumAlg = algorithm
else:
self._algorithm: ChecksumAlg = ChecksumAlg.from_string(algorithm)

@property
def algorithm(self) -> ChecksumAlg:
"""Return the checksum algorithm used by this checksummer."""
return self._algorithm

def hash_file(self, path: Path) -> 'Checksum':
"""Calculate the checksum of a file.
Args:
path (Path): A path to a file to checksum.
Raises:
FileNotFoundError: If the path parameter is found.
ValueError: If the path parameter resolves to a directory.
Returns:
Checksum: A Checksum object containing the Hexadecimal digest of the file.
"""
if not path.exists():
raise FileNotFoundError(NO_PATH.format(path))
if not path.is_file():
raise ValueError(NOT_FILE.format(path))
implemenation: ChecksumAlg = ChecksumAlg.get_implementation(self._algorithm)
with open(path, 'rb') as file:
for chunk in iter(lambda: file.read(4096), b''):
implemenation.update(chunk)
return Checksum.model_validate({
'algorithm': self._algorithm,
'value': implemenation.hexdigest()
}, strict=True
)

@classmethod
def from_file(cls, path: Path, algorithm: 'ChecksumAlg') -> 'Checksum':
"""Create a Checksum from an etree element."""
# Get the child flocat element and grab the href attribute.
return Checksummer(algorithm).hash_file(path)
8 changes: 5 additions & 3 deletions eark_validator/infopacks/information_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,16 @@ def from_path(package_path: Path) -> InformationPackage:
if not package_path.exists():
raise FileNotFoundError(NO_PATH.format(package_path))
handler: PackageHandler = PackageHandler()
to_parse:Path = handler.prepare_package(package_path)
to_parse: Path = handler.prepare_package(package_path)
mets_path: Path = to_parse.joinpath(METS_FILE)
if not mets_path.is_file():
raise ValueError('No METS file found in package')
mets: MetsFile = MetsFiles.from_file(to_parse.joinpath(METS_FILE))

mets: MetsFile = MetsFiles.from_file(mets_path)
details: PackageDetails = InformationPackages.details_from_mets_file(mets_path)
return InformationPackage.model_validate({
METS: mets,
'details': InformationPackages.details_from_mets_file(to_parse.joinpath(METS_FILE))
'details': details
})

@staticmethod
Expand Down
47 changes: 1 addition & 46 deletions eark_validator/infopacks/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,52 +34,7 @@
from eark_validator.model.manifest import SourceType
from eark_validator.model.metadata import FileEntry
from eark_validator.utils import get_path


class Checksummer:
def __init__(self, algorithm: ChecksumAlg | str):
if isinstance(algorithm, ChecksumAlg):
self._algorithm: ChecksumAlg = algorithm
else:
self._algorithm: ChecksumAlg = ChecksumAlg.from_string(algorithm)

@property
def algorithm(self) -> ChecksumAlg:
"""Return the checksum algorithm used by this checksummer."""
return self._algorithm

def hash_file(self, path: Path) -> 'Checksum':
"""Calculate the checksum of a file.
Args:
path (Path): A path to a file to checksum.
Raises:
FileNotFoundError: If the path parameter is found.
ValueError: If the path parameter resolves to a directory.
Returns:
Checksum: A Checksum object containing the Hexadecimal digest of the file.
"""
if not path.exists():
raise FileNotFoundError(NO_PATH.format(path))
if not path.is_file():
raise ValueError(NOT_FILE.format(path))
implemenation: ChecksumAlg = ChecksumAlg.get_implementation(self._algorithm)
with open(path, 'rb') as file:
for chunk in iter(lambda: file.read(4096), b''):
implemenation.update(chunk)
return Checksum.model_validate({
'algorithm': self._algorithm,
'value': implemenation.hexdigest()
}, strict=True
)

@classmethod
def from_file(cls, path: Path, algorithm: 'ChecksumAlg') -> 'Checksum':
"""Create a Checksum from an etree element."""
# Get the child flocat element and grab the href attribute.
return Checksummer(algorithm).hash_file(path)
from eark_validator.infopacks.checksummer import Checksummer

class ManifestEntries:
@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<assert id="CSIP37" role="ERROR" test="@xlink:type = 'simple'">Attribute MUST be used with the value “simple”. Value list is maintained by the xlink standard.</assert>
<assert id="CSIP38" role="ERROR" test="@xlink:href">MUST record the actual location of the resource. This specification recommends recording a URL type filepath within this attribute.</assert>
<assert id="CSIP39" role="ERROR" test="@MDTYPE">MUST record the type of metadata at the referenced location.</assert>
<assert id="CSIP40" role="ERROR" test="@MIMETYPE_IANA_test">MUST record the MIME type of the referenced file.</assert>
<assert id="CSIP40" role="ERROR" test="@MIMETYPE">MUST record the MIME type of the referenced file.</assert>
<assert id="CSIP41" role="ERROR" test="@SIZE">MUST record the size in bytes of the referenced file.</assert>
<assert id="CSIP42" role="ERROR" test="@CREATED">MUST record the date the referenced file was created.</assert>
<assert id="CSIP43" role="ERROR" test="@CHECKSUM">MUST record the checksum of the referenced file.</assert>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
<assert id="CSIP23" role="ERROR" test="@xlink:type = 'simple'">Attribute used with the value “simple”. Value list is maintained by the xlink standard.</assert>
<assert id="CSIP24" role="ERROR" test="@xlink:href">The actual location of the resource. This specification recommends recording a URL type filepath in this attribute.</assert>
<assert id="CSIP25" role="ERROR" test="@MDTYPE">Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS.</assert>
<assert id="CSIP26" role="ERROR" test="@MIMETYPE_IANA_test">MUST hold the IANA mime type of the referenced file.</assert>
<assert id="CSIP26" role="ERROR" test="@MIMETYPE">MUST hold the IANA mime type of the referenced file.</assert>
<assert id="CSIP27" role="ERROR" test="@SIZE">MUST hold the size of the referenced file in bytes.</assert>
<assert id="CSIP28" role="ERROR" test="@CREATED">MUST hold the creation date of the referenced file.</assert>
<assert id="CSIP29" role="ERROR" test="@CHECKSUM">MUST hold the checksum of the referenced file.</assert>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<assert id="CSIP37" role="ERROR" test="@xlink:type = 'simple'">Attribute MUST be used with the value “simple”. Value list is maintained by the xlink standard.</assert>
<assert id="CSIP38" role="ERROR" test="@xlink:href">MUST record the actual location of the resource. This specification recommends recording a URL type filepath within this attribute.</assert>
<assert id="CSIP39" role="ERROR" test="@MDTYPE">MUST record the type of metadata at the referenced location.</assert>
<assert id="CSIP40" role="ERROR" test="@MIMETYPE_IANA_test">MUST record the MIME type of the referenced file.</assert>
<assert id="CSIP40" role="ERROR" test="@MIMETYPE">MUST record the MIME type of the referenced file.</assert>
<assert id="CSIP41" role="ERROR" test="@SIZE">MUST record the size in bytes of the referenced file.</assert>
<assert id="CSIP42" role="ERROR" test="@CREATED">MUST record the date and time the referenced file was created.</assert>
<assert id="CSIP43" role="ERROR" test="@CHECKSUM">MUST record the checksum of the referenced file.</assert>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
<assert id="CSIP23" role="ERROR" test="@xlink:type = 'simple'">Attribute used with the value “simple”. Value list is maintained by the xlink standard.</assert>
<assert id="CSIP24" role="ERROR" test="@xlink:href">The actual location of the resource. This specification recommends recording a URL type filepath in this attribute.</assert>
<assert id="CSIP25" role="ERROR" test="@MDTYPE">Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS.</assert>
<assert id="CSIP26" role="ERROR" test="@MIMETYPE_IANA_test">MUST hold the IANA mime type of the referenced file.</assert>
<assert id="CSIP26" role="ERROR" test="@MIMETYPE">MUST hold the IANA mime type of the referenced file.</assert>
<assert id="CSIP27" role="ERROR" test="@SIZE">MUST hold the size of the referenced file in bytes.</assert>
<assert id="CSIP28" role="ERROR" test="@CREATED">MUST hold the creation date and time of the referenced file.</assert>
<assert id="CSIP29" role="ERROR" test="@CHECKSUM">MUST hold the checksum of the referenced file.</assert>
Expand Down
Loading

0 comments on commit 4958ea9

Please sign in to comment.