Skip to content

Commit

Permalink
FIX: Support commons-ip use of NOTVALID
Browse files Browse the repository at this point in the history
- added `convert_status` validator to `MetadataResults` class to convert commons-ip `NOTVALID` status to `INVALID`;
- moved checksum algorithm ID validation to the `PackageDetails` class;
- added test and test data for status conversion; and
- fixed type hinting for validation methods to `Any`.
  • Loading branch information
carlwilson committed Aug 29, 2024
1 parent 56deacc commit 3d3ce76
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 19 deletions.
38 changes: 21 additions & 17 deletions eark_validator/model/package_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,25 @@ class PackageDetails(BaseModel):
contentinformationtype: str = ''
checksums: List[Checksum] = []

# Validator to add a hyphen to the SHA checksum algorithm IDs generated by commons-ip
@model_validator(mode='before')
@classmethod
def convert_checksum_ids(cls, data: Any) -> Any:
incoming_checksums = data.get('checksums', [])
if isinstance(incoming_checksums, list):
# If the details are a dict type then it's a commons-ip set
checksums : list[Checksum] = []
# Loop through the checksums
for checksum in incoming_checksums:
alg_name = checksum.get('algorithm')
if alg_name and alg_name.startswith('SHA') and '-' not in alg_name:
# If it's a SHA checksum alg ID without a hyphen, add one
alg_name = f'{alg_name[:3]}-{alg_name[3:]}'
checksums.append(Checksum(algorithm=alg_name, value=checksum.get('value')))
data['checksums'] = checksums
# Return the reps for further validation.
return data

class Representation(BaseModel):
mets: Optional[MetsFile] = None
name: Optional[str] = ''
Expand All @@ -52,9 +71,10 @@ class InformationPackage(BaseModel):
details: Optional[PackageDetails] = None
representations: List[Representation] = []

# Validator to convert the commons-ip representations dict to a list of representations
@model_validator(mode='before')
@classmethod
def convert_representations_dict(cls, data: Any) -> list[Representation]:
def convert_representations_dict(cls, data: Any) -> Any:
representations = data.get('representations')
if isinstance(representations, dict):
# If this is a dict type then it's a commons-ip type, coerce to list
Expand All @@ -64,19 +84,3 @@ def convert_representations_dict(cls, data: Any) -> list[Representation]:
data['representations'] = reps
# Return the reps for further validation.
return data

@model_validator(mode='before')
@classmethod
def convert_checksum_ids(cls, data: Any) -> list[Representation]:
details = data.get('details', {})
if isinstance(details, dict):
incoming_checksums = details.get('checksums', [])
checksums : list[Checksum] = []
for checksum in incoming_checksums:
alg_name = checksum.get('algorithm')
if alg_name and alg_name.startswith('SHA') and '-' not in alg_name:
alg_name = f'{alg_name[:3]}-{alg_name[3:]}'
checksums.append(Checksum(algorithm=alg_name, value=checksum.get('value')))
data['details']['checksums'] = checksums
# Return the reps for further validation.
return data
13 changes: 11 additions & 2 deletions eark_validator/model/validation_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@
"""

from enum import Enum, unique
from typing import List, Optional
from typing import Any, List, Optional
import uuid

from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, model_validator

from .package_details import InformationPackage
from .specifications import Level
Expand Down Expand Up @@ -123,6 +123,15 @@ class MetadataResults(BaseModel):
status: MetadataStatus = MetadataStatus.UNKNOWN
messages: List[Result] = []

# Validator to convert commons-ip status from NOTVALID to INVALID
@model_validator(mode='before')
@classmethod
def convert_status(cls, data: Any) -> Any:
status = data.get('status')
if status and status == 'NOTVALID':
data['status'] = 'INVALID'
return data

class MetatdataResultSet(BaseModel):
model_config = ConfigDict(populate_by_name=True)
schema_results: MetadataResults = Field(validation_alias='schemaResults')
Expand Down
79 changes: 79 additions & 0 deletions tests/resources/json/commons-ip-invalid.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{
"package" : {
"details" : {
"name" : "minimal_IP_with_schemas.zip",
"checksums" : [ {
"algorithm" : "SHA1",
"value" : "54BBE654FE332B51569BAF21338BC811CAD2AF66"
} ]
},
"profile" : {
"type" : "CSIP",
"name" : "minimal_IP_with_schemas.zip",
"version" : "CSIP-"
},
"representations" : {
"name" : "minimal_IP_with_schemas.zip"
}
},
"uid" : "e11da9f2-e59e-4f23-b7f8-5ac86632231f",
"structure" : {
"status" : "WellFormed",
"messages" : [ {
"ruleId" : "CSIPSTR7",
"location" : "",
"message" : "If descriptive metadata are available should include inside metadata/descriptive ",
"severity" : "Warn"
}, {
"ruleId" : "CSIPSTR8",
"location" : "",
"message" : "If any other metadata are available, they MAY be included in separate sub-folders, for example an additional folder named other. ",
"severity" : "Info"
}, {
"ruleId" : "CSIPSTR12",
"location" : "",
"message" : "The recommended best practice is to always have a METS.xml in the representation folder. ",
"severity" : "Warn"
}, {
"ruleId" : "CSIPSTR13",
"location" : "",
"message" : "The representation folder SHOULD include a sub-folder named metadata which MAY include all metadata about the specific representation. ",
"severity" : "Warn"
}, {
"ruleId" : "CSIPSTR16",
"location" : "",
"message" : "There is no documentation folder in the representation folder minimal_IP_with_schemas/representations/rep1. ",
"severity" : "Warn"
} ]
},
"metadata" : {
"schemaResults" : {
"status" : "VALID",
"messages" : [ ]
},
"schematronResults" : {
"status" : "NOTVALID",
"messages" : [ {
"ruleId" : "CSIP31",
"location" : "mets/amdSec",
"message" : "You have administrative files in the metadata/folder, you must have mets/amdSec in Root METS.xml ",
"severity" : "Warn"
}, {
"ruleId" : "CSIP45",
"location" : "mets/amdSec/rightsMD",
"message" : "Individual representations should state their specific rights in their representation METS file (Root METS.xml) ",
"severity" : "Info"
}, {
"ruleId" : "CSIP66",
"location" : "mets/fileSec/fileGrp/file",
"message" : "You have files in SIP that are not referenced in Root METS.xml ",
"severity" : "Error"
}, {
"ruleId" : "CSIP80",
"location" : "mets/structMap",
"message" : "Must have one structMap with the mets/structMap[@LABEL='CSIP'] in Root METS.xml doens't appear mets/structMap[@LABEL='CSIP']. ",
"severity" : "Error"
} ]
}
}
}
8 changes: 8 additions & 0 deletions tests/rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
TEST_RES_XML = TEST_RES + '.xml'
TEST_RES_JSON = TEST_RES + '.json'
COMMONS_IP_JSON = str(files(JSON).joinpath('commons-ip-report.json'))
COMMONS_IP_INVALID_JSON = str(files(JSON).joinpath('commons-ip-invalid.json'))
PERSON_PATH = str(files(SCHEMATRON).joinpath('person.xml'))
NOT_FOUND_PATH = str(files(SCHEMATRON).joinpath('not-found.xml'))
EMPTY_FILE_PATH = str(files(TEST_RES).joinpath('empty.file'))
Expand Down Expand Up @@ -235,6 +236,13 @@ def test_deserialise_commons_ip_report(self):
result: ValidationReport = ValidationReport.model_validate_json(contents)
self.assertIsNotNone(result)

def test_deserialise_commons_ip_invalid(self):
file_name = COMMONS_IP_INVALID_JSON
with open(file_name, 'r', encoding='utf-8') as _f:
contents = _f.read()
result: ValidationReport = ValidationReport.model_validate_json(contents)
self.assertIsNotNone(result)

class SeverityTest(str, Enum):
NOT_SEV = 'NOT_SEV'

Expand Down

0 comments on commit 3d3ce76

Please sign in to comment.