Skip to content

Commit

Permalink
Merge pull request #282 from geneontology/json-export-linkml
Browse files Browse the repository at this point in the history
Json export in LinkML
  • Loading branch information
sierra-moxon authored Nov 2, 2022
2 parents 223fefe + 0a2a9d1 commit 27bff84
Show file tree
Hide file tree
Showing 16 changed files with 1,845 additions and 54 deletions.
60 changes: 60 additions & 0 deletions .github/workflows/gen-project-linkml.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Built from:
# https://docs.github.com/en/actions/guides/building-and-testing-python
# https://github.com/snok/install-poetry#workflows-and-tips

name: Build and test LinkML output of GO domain/range constraints as JSON

on: [pull_request]

jobs:
test:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10"]

steps:

#----------------------------------------------
# check-out repo and set-up python
#----------------------------------------------
- name: Check out repository
uses: actions/checkout@v2

- name: Set up Python ${{ "{{" }} matrix.python-version {{ "}}" }}
uses: actions/setup-python@v2
with:
python-version: ${{ "{{" }} matrix.python-version {{ "}}" }}

#----------------------------------------------
# install & configure poetry
#----------------------------------------------
- name: Install Poetry
uses: snok/[email protected]

#----------------------------------------------
# install dependencies if cache does not exist
#----------------------------------------------
- name: Install dependencies
run: poetry install --no-interaction --no-root

#----------------------------------------------
# install your root project, if required
#----------------------------------------------
- name: Install library
run: poetry install --no-interaction

#----------------------------------------------
# regerate LinkML artifacts
#----------------------------------------------
- name: Regenerate LinkML artifacts
run: |
make clean-artifacts
make -B gen-artifacts
if [ $? -eq 0 ]; then
echo "LinkML artifacts generated successfully"
else
echo "LinkML artifacts generation failed"
exit 1
fi
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ python/tests/__pycache__/
java/.idea/
.DS_Store

.venv/
*.log
scala/target/*
scala/test/*
Expand Down
12 changes: 12 additions & 0 deletions python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,16 @@ p-%: tests/data/p-%.ttl
f-%: tests/data/f-%.ttl
python ./gocam_validator.py $< && exit -1 || echo FAILED AS EXPECTED

gen-pydantic:
rm -f shex_json_linkml.py && gen-pydantic schema/shex_json_linkml.yaml > shex_json_linkml.py

gen-jsonschema:
rm -f target/jsonschema/shex_json_linkml.json && gen-json-schema schema/shex_json_linkml.yaml > target/jsonschema/shex_json_linkml.json

gen-typescript:
rm -f target/typescript/shex_json_linkml.ts && gen-typescript schema/shex_json_linkml.yaml > target/typescript/shex_json_linkml.ts

linkml: gen-pydantic gen-jsonschema gen-typescript


.PRECIOUS: tests/data/%.ttl
122 changes: 69 additions & 53 deletions python/json_export.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
from os import path
import json
import requests
from typing import List
from ontobio.rdfgen.assoc_rdfgen import prefix_context
from prefixcommons.curie_util import contract_uri
from pyshexc.parser_impl import generate_shexj
from typing import Optional, List, Union
from ShExJSG.ShExJ import Shape, ShapeAnd, ShapeOr, ShapeNot, TripleConstraint, shapeExprLabel, shapeExpr, shapeExprLabel, tripleExpr, tripleExprLabel, OneOf, EachOf
from ShExJSG.ShExJ import Shape, ShapeAnd, ShapeOr, ShapeNot, TripleConstraint, shapeExpr, \
shapeExprLabel, tripleExpr, tripleExprLabel, OneOf, EachOf
from pyshex import PrefixLibrary
from shex_json_linkml import Association
from pprint import pprint
from pathlib import Path


def get_suffix(uri):
suffix = contract_uri(uri, cmaps=[prefix_context])
if len(suffix) > 0:
return suffix[0]

return path.basename(uri)


class NoctuaFormShex:
Expand All @@ -18,93 +29,98 @@ def __init__(self):
self.shex = generate_shexj.parse(shex_response.text)
pref = PrefixLibrary(shex_response.text)
self.pref_dict = {
k:self.get_suffix(str(v)) for (k,v) in dict(pref).items()
if str(v).startswith('http://purl.obolibrary.org/obo/')}
del self.pref_dict['OBO']

def get_suffix(self, uri):
suffix = contract_uri(uri, cmaps=[prefix_context])
if len(suffix) > 0:
return suffix[0]

return path.basename(uri)

k: get_suffix(str(v)) for (k, v) in dict(pref).items()
if str(v).startswith('http://purl.obolibrary.org/obo/')}
del self.pref_dict['OBO'] # remove this filter and make sure that it works because it needs to be
# working for every shape.

def get_shape_name(self, uri, clean=False):
name = path.basename(uri).upper()
name = path.basename(uri).upper()
if '/go/' in uri:
name = 'GO'+name
return self.pref_dict.get(name, None if clean else uri )
name = 'GO' + name
return self.pref_dict.get(name, None if clean else uri)

def gen_lookup_table(self):
table = {v: {
'label':k
} for (k,v) in self.pref_dict.items()}
'label': k
} for (k, v) in self.pref_dict.items()}
return table


def _load_expr(self, expr: Optional[Union[shapeExprLabel, shapeExpr]], preds=None) -> List:
if(preds == None):

if preds is None:
preds = {}
if isinstance(expr, str) and isinstance(preds, list):
# ('Adding: ' + expr + ' to ' + str(preds))
preds.append(self.get_shape_name(expr))
if isinstance(expr, (ShapeOr, ShapeAnd)):
for expr2 in expr.shapeExprs:
self._load_expr(expr2, preds)
elif isinstance(expr, ShapeNot):
self._load_expr(expr.shapeExpr, preds)
elif isinstance(expr, Shape):
if expr.expression is not None:
self._load_triple_expr(expr.expression, preds)

elif isinstance(expr, Shape) and expr.expression is not None:
self._load_triple_expr(expr.expression, preds)

# throw an error here if pred list is empty
return preds


def _load_triple_expr(self, expr: Union[tripleExpr, tripleExprLabel], preds=None) -> None:

def _load_triple_expr(self, expr: Union[tripleExpr, tripleExprLabel], preds=None) -> None:

if isinstance(expr, (OneOf, EachOf)):
for expr2 in expr.expressions:
self._load_triple_expr(expr2, preds)
elif isinstance(expr, TripleConstraint):
if expr.valueExpr is not None:
pred = self.get_suffix(expr.predicate)

if pred not in self.pref_dict.values():
return

preds[pred] = {}
preds[pred]['range'] = []

if expr.max != None:
preds[pred]['cardinality'] = expr.max

self._load_expr(expr.valueExpr, preds[pred]['range'])


elif isinstance(expr, TripleConstraint) and expr.valueExpr is not None:
pred = get_suffix(expr.predicate)

if pred not in self.pref_dict.values():
return

preds[pred] = {}
preds[pred]['range'] = []

if expr.max is not None:
preds[pred]['cardinality'] = expr.max

self._load_expr(expr.valueExpr, preds[pred]['range'])

def parse(self):
goshapes = []

shapes = self.shex.shapes

for shape in shapes:
print(shape)
print("")
shape_name = self.get_shape_name(shape['id'], True)
if shape_name == None:

if shape_name is None:
continue

print('Parsing Shape: ' + shape['id'])

goshape = Association()
goshape.subject = shape_name
goshape.relationship = ""
# print('Parsing Shape: ' + shape['id'])
self.json_shapes[shape_name] = {}

shexps = shape.shapeExprs or []
shexps = shape.shapeExprs or []

for expr in shexps:
self.json_shapes[shape_name] = self._load_expr(expr)



goshapes.append(goshape)
# print(goshapes)


nfShex = NoctuaFormShex()
nfShex.parse()

with open("shex_dump.json", "w") as sf:
base_path = Path(__file__).parent
json_shapes_file_path = (base_path / "../shapes/json/shex_dump.json").resolve()
look_table_file_path = (base_path / "../shapes/json/look_table.json").resolve()


with open(json_shapes_file_path, "w") as sf:
json.dump(nfShex.json_shapes, sf, indent=2)

with open("look_table.json", "w") as sf:
with open(look_table_file_path, "w") as sf:
json.dump(nfShex.gen_lookup_table(), sf, indent=2)
Loading

0 comments on commit 27bff84

Please sign in to comment.