Skip to content

Commit

Permalink
Merge pull request #12 from geneontology/issue-6-iquery-cx2-output
Browse files Browse the repository at this point in the history
Updates to CX2 conversion to follow IQuery standards
  • Loading branch information
pkalita-lbl authored Sep 24, 2024
2 parents e29440e + 8188db9 commit 8bf9673
Show file tree
Hide file tree
Showing 6 changed files with 775 additions and 20 deletions.
2 changes: 1 addition & 1 deletion src/gocam/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def fetch(model_ids, format):

for model_id in model_ids:
model = wrapper.fetch_model(model_id)
model_dict = model.model_dump(exclude_none=True, exclude_defaults=True)
model_dict = model.model_dump(exclude_none=True)

if format == "json":
click.echo(json.dumps(model_dict, indent=2))
Expand Down
140 changes: 136 additions & 4 deletions src/gocam/translation/cx2/main.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,107 @@
import logging
import re
from enum import Enum
from typing import Dict, List, Optional, Union

from ndex2.cx2 import CX2Network

from gocam.datamodel import Model
from gocam.datamodel import (
EnabledByProteinComplexAssociation,
Model,
MoleculeAssociation,
)
from gocam.translation.cx2.style import (
RELATIONS,
VISUAL_EDITOR_PROPERTIES,
VISUAL_PROPERTIES,
)

logger = logging.getLogger(__name__)

# Derived from
# https://github.com/geneontology/wc-gocam-viz/blob/6ef1fcaddfef97ece94d04b7c23ac09c33ace168/src/globals/%40noctua.form/data/taxon-dataset.json
# If maintaining this list becomes onerous, consider splitting the label on a space and taking only
# the first part
SPECIES_CODES = [
"Atal",
"Btau",
"Cele",
"Cfam",
"Ddis",
"Dmel",
"Drer",
"Ggal",
"Hsap",
"Mmus",
"Pseudomonas",
"Rnor",
"Scer",
"Sjap",
"Solanaceae",
"Spom",
"Sscr",
"Xenopus",
]


def _remove_species_code_suffix(label: str) -> str:
for code in SPECIES_CODES:
label = label.removesuffix(code).strip()
return label


# Regex from
# https://github.com/ndexbio/ndex-enrichment-rest/wiki/Enrichment-network-structure#via-node-attributes-preferred-method
IQUERY_GENE_SYMBOL_PATTERN = re.compile("(^[A-Z][A-Z0-9-]*$)|(^C[0-9]+orf[0-9]+$)")


class NODE_TYPE(str, Enum):
GENE = "gene"
COMPLEX = "complex"


def model_to_cx2(gocam: Model) -> list:

# Internal state
input_output_nodes: Dict[str, int] = {}
activity_nodes: Dict[str, int] = {}

# Internal helper functions that access internal state
def _get_object_label(object_id: str) -> str:
object = next((obj for obj in gocam.objects if obj.id == object_id), None)
return object.label if object is not None else ""
return _remove_species_code_suffix(object.label) if object is not None else ""

def _add_input_output_nodes(
associations: Optional[Union[MoleculeAssociation, List[MoleculeAssociation]]],
edge_attributes: dict,
) -> None:
if associations is None:
return
if not isinstance(associations, list):
associations = [associations]
for association in associations:
if association.term not in input_output_nodes:
node_attributes = {
"name": _get_object_label(association.term),
"represents": association.term,
}

if association.provenances:
node_attributes["provenance"] = [
p.contributor for p in association.provenances
]

input_output_nodes[association.term] = cx2_network.add_node(
attributes=node_attributes
)

cx2_network.add_edge(
source=input_output_nodes[association.term],
target=activity_nodes[activity.id],
attributes=edge_attributes,
)

# Create the CX2 network and set network-level attributes
cx2_network = CX2Network()
cx2_network.set_network_attributes(
{
Expand All @@ -22,16 +110,41 @@ def _get_object_label(object_id: str) -> str:
}
)

activity_nodes = {}
# Add nodes for activities, labeled by the activity's enabled_by object
for activity in gocam.activities:
if activity.enabled_by is None:
continue

if isinstance(activity.enabled_by, EnabledByProteinComplexAssociation):
node_type = NODE_TYPE.COMPLEX
else:
node_type = NODE_TYPE.GENE

node_name = _get_object_label(activity.enabled_by.term)
if (
node_type == NODE_TYPE.GENE
and IQUERY_GENE_SYMBOL_PATTERN.match(node_name) is None
):
logger.warning(
f"Name for gene node does not match expected pattern: {node_name}"
)

node_attributes = {
"name": _get_object_label(activity.enabled_by.term),
"name": node_name,
"represents": activity.enabled_by.term,
"type": node_type.value,
}

if node_type == NODE_TYPE.COMPLEX and activity.enabled_by.members:
node_attributes["member"] = []
for member in activity.enabled_by.members:
member_name = _get_object_label(member)
if IQUERY_GENE_SYMBOL_PATTERN.match(member_name) is None:
logger.warning(
f"Name for complex member does not match expected pattern: {member_name}"
)
node_attributes["member"].append(member_name)

if activity.molecular_function:
node_attributes["molecular_function_id"] = activity.molecular_function.term
node_attributes["molecular_function_label"] = _get_object_label(
Expand All @@ -55,6 +168,24 @@ def _get_object_label(object_id: str) -> str:

activity_nodes[activity.id] = cx2_network.add_node(attributes=node_attributes)

# Add nodes for input/output molecules and create edges to activity nodes
for activity in gocam.activities:
_add_input_output_nodes(
activity.has_input, {"name": "has input", "represents": "RO:0002233"}
)
_add_input_output_nodes(
activity.has_output, {"name": "has output", "represents": "RO:0002234"}
)
_add_input_output_nodes(
activity.has_primary_input,
{"name": "has primary input", "represents": "RO:0004009"},
)
_add_input_output_nodes(
activity.has_primary_output,
{"name": "has primary output", "represents": "RO:0004008"},
)

# Add edges for causal associations between activity nodes
for activity in gocam.activities:
for association in activity.causal_associations:
if association.downstream_activity in activity_nodes:
Expand Down Expand Up @@ -83,6 +214,7 @@ def _get_object_label(object_id: str) -> str:
attributes=edge_attributes,
)

# Set visual properties for the network
cx2_network.set_visual_properties(VISUAL_PROPERTIES)
cx2_network.set_opaque_aspect("visualEditorProperties", [VISUAL_EDITOR_PROPERTIES])

Expand Down
Loading

0 comments on commit 8bf9673

Please sign in to comment.