Skip to content

Commit

Permalink
Split has_direct_input/output into has_input/output (multivalued) and…
Browse files Browse the repository at this point in the history
… has_primary_input/output (single valued)
  • Loading branch information
pkalita-lbl committed Sep 13, 2024
1 parent a636e05 commit 5368d0d
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 41 deletions.
6 changes: 4 additions & 2 deletions src/gocam/datamodel/gocam.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,10 @@ class Activity(ConfiguredBaseModel):
molecular_function: Optional[MolecularFunctionAssociation] = Field(None, description="""The molecular function that is carried out by the gene product or complex""")
occurs_in: Optional[CellularAnatomicalEntityAssociation] = Field(None, description="""The cellular location in which the activity occurs""")
part_of: Optional[BiologicalProcessAssociation] = Field(None, description="""The larger biological process in which the activity is a part""")
has_direct_input: Optional[MoleculeAssociation] = Field(None, description="""The input molecules that are directly consumed by the activity""")
has_direct_output: Optional[MoleculeAssociation] = Field(None, description="""The output molecules that are directly produced by the activity""")
has_input: Optional[List[MoleculeAssociation]] = Field(default_factory=list, description="""The input molecules that are directly consumed by the activity""")
has_primary_input: Optional[MoleculeAssociation] = Field(None, description="""The primary input molecule that is directly consumed by the activity""")
has_output: Optional[List[MoleculeAssociation]] = Field(default_factory=list, description="""The output molecules that are directly produced by the activity""")
has_primary_output: Optional[MoleculeAssociation] = Field(None, description="""The primary output molecule that is directly produced by the activity""")
causal_associations: Optional[List[CausalAssociation]] = Field(default_factory=list, description="""The causal associations that connect this activity to other activities""")
provenances: Optional[List[ProvenanceInfo]] = Field(default_factory=list, description="""Provenance information for the activity""")

Expand Down
14 changes: 12 additions & 2 deletions src/gocam/schema/gocam.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,24 @@ classes:
description: The larger biological process in which the activity is a part
range: BiologicalProcessAssociation
inlined: true
has_direct_input:
has_input:
description: The input molecules that are directly consumed by the activity
range: MoleculeAssociation
inlined: true
has_direct_output:
multivalued: true
has_primary_input:
description: The primary input molecule that is directly consumed by the activity
range: MoleculeAssociation
inlined: true
has_output:
description: The output molecules that are directly produced by the activity
range: MoleculeAssociation
inlined: true
multivalued: true
has_primary_output:
description: The primary output molecule that is directly produced by the activity
range: MoleculeAssociation
inlined: true
causal_associations:
description: The causal associations that connect this activity to other activities
range: CausalAssociation
Expand Down
48 changes: 30 additions & 18 deletions src/gocam/translation/minerva_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
OCCURS_IN = "BFO:0000066"
HAS_INPUT = "RO:0002233"
HAS_OUTPUT = "RO:0002234"
HAS_PRIMARY_INPUT = "RO:0004009"
HAS_PRIMARY_OUTPUT = "RO:0004008"

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -64,6 +66,12 @@ def _annotations_multivalued(obj: Dict) -> Dict[str, List[str]]:
return anns


def _setattr_with_warning(obj, attr, value):
if getattr(obj, attr, None) is not None:
logger.warning(f"Overwriting {attr} for {obj.id if hasattr(obj, 'id') else obj}")
setattr(obj, attr, value)


MAIN_TYPES = [
"molecular_function",
"biological_process",
Expand Down Expand Up @@ -300,36 +308,40 @@ def _iter_activities_by_fact_subject(
for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=PART_OF
):
if activity.part_of is not None:
logger.warning(f"Overwriting part_of for Activity: {activity.id}")
activity.part_of = BiologicalProcessAssociation(term=term, evidence=evs)
association = BiologicalProcessAssociation(term=term, evidence=evs)
_setattr_with_warning(activity, "part_of", association)

for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=OCCURS_IN
):
if activity.occurs_in is not None:
logger.warning(f"Overwriting occurs_in for Activity: {activity.id}")
activity.occurs_in = CellularAnatomicalEntityAssociation(
term=term, evidence=evs
)
association = CellularAnatomicalEntityAssociation(term=term, evidence=evs)
_setattr_with_warning(activity, "occurs_in", association)

for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=HAS_INPUT
):
if activity.has_direct_input is not None:
logger.warning(
f"Overwriting has_direct_input for Activity: {activity.id}"
)
activity.has_direct_input = MoleculeAssociation(term=term, evidence=evs)
if activity.has_input is None:
activity.has_input = []
activity.has_input.append(MoleculeAssociation(term=term, evidence=evs))

for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=HAS_PRIMARY_INPUT
):
association = MoleculeAssociation(term=term, evidence=evs)
_setattr_with_warning(activity, "has_primary_input", association)

for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=HAS_OUTPUT
):
if activity.has_direct_output is not None:
logger.warning(
f"Overwriting has_direct_output for Activity: {activity.id}"
)
activity.has_direct_output = MoleculeAssociation(term=term, evidence=evs)
if activity.has_output is None:
activity.has_output = []
activity.has_output.append(MoleculeAssociation(term=term, evidence=evs))

for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=HAS_PRIMARY_OUTPUT
):
association = MoleculeAssociation(term=term, evidence=evs)
_setattr_with_warning(activity, "has_primary_output", association)

for fact_property, facts in facts_by_property.items():
for fact in facts:
Expand Down
1 change: 1 addition & 0 deletions tests/input/minerva-633b013300000306.json

Large diffs are not rendered by default.

46 changes: 27 additions & 19 deletions tests/test_translation/test_minerva_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,37 +57,45 @@ def test_protein_complex():
]


def test_has_direct_input_and_has_direct_output():
"""Test that direct input/output molecule associations are added to activities"""
def test_has_input_and_has_output():
"""Test that input/output molecule associations are added to activities"""
mw = MinervaWrapper()
with open(INPUT_DIR / "minerva-665912ed00002626.json", "r") as f:
minerva_object = json.load(f)
model = mw.minerva_object_to_model(minerva_object)

activities_with_direct_input = []
activities_with_direct_output = []
activities_with_input = []
activities_with_output = []
for activity in model.activities:
if activity.has_direct_input:
activities_with_direct_input.append(activity)
if activity.has_direct_output:
activities_with_direct_output.append(activity)
if activity.has_input:
activities_with_input.append(activity)
if activity.has_output:
activities_with_output.append(activity)

# Basic sanity check on the number of activities with direct input/output
assert len(activities_with_direct_input) == 3
assert len(activities_with_direct_output) == 7
# Basic sanity check on the number of activities with input/output
assert len(activities_with_input) == 3
assert len(activities_with_output) == 7

# Verify that one activity has uric acid as a direct input
# Verify that one activity has uric acid as an input
uric_acid_input_activities = [
a
for a in activities_with_direct_input
if a.has_direct_input.term == "CHEBI:27226"
a for a in activities_with_input if a.has_input[0].term == "CHEBI:27226"
]
assert len(uric_acid_input_activities) == 1

# Verify that three activities have urea as a direct output
# Verify that three activities have urea as an output
urea_output_activities = [
a
for a in activities_with_direct_output
if a.has_direct_output.term == "CHEBI:16199"
a for a in activities_with_output if a.has_output[0].term == "CHEBI:16199"
]
assert len(urea_output_activities) == 3


def test_multivalued_input_and_output():
"""Test that activities with multiple inputs and outputs are correctly translated."""
mw = MinervaWrapper()
with open(INPUT_DIR / "minerva-633b013300000306.json", "r") as f:
minerva_object = json.load(f)
model = mw.minerva_object_to_model(minerva_object)

cs_activity = next(a for a in model.activities if a.molecular_function.term == "GO:0004108")
assert len(cs_activity.has_input) == 3
assert len(cs_activity.has_output) == 2

0 comments on commit 5368d0d

Please sign in to comment.