Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
804252e
Saving off some work on the context builder
kfairbanks Aug 5, 2022
3c4dd11
Adds most of code to process DatatypeProperities
kfairbanks Aug 5, 2022
c54ca60
Adds logic to address Requirement1 of issue #423
kfairbanks Aug 5, 2022
ae338a9
Initial attempt at processing and adding ObjectProperties to context
kfairbanks Aug 5, 2022
eb32c44
Adding key strings to context for Req4 of issue #423
kfairbanks Aug 5, 2022
a2bb0f9
WIP save. Attempt to address req 4 in issue #423
kfairbanks Aug 8, 2022
6ed0378
Changes prints to logger.debug() statements
kfairbanks Aug 8, 2022
6444557
Linter changes
kfairbanks Aug 9, 2022
a278da6
Bug Fixes
kfairbanks Aug 9, 2022
b2742c1
Adding initial testing for issue #423
kfairbanks Aug 9, 2022
6aa1898
Removing testing logic from main context builder script
kfairbanks Aug 9, 2022
9ce2a4f
Adds release statements
kfairbanks Aug 10, 2022
cce5505
Adds support for "concise" version of context to satisfy req 5 of iss…
kfairbanks Aug 10, 2022
735e089
Moving json string building from context building class to object typ…
kfairbanks Aug 10, 2022
8b8905e
Incorporate context builder tests into CI Makefile workflow
ajnelson-nist Aug 11, 2022
15d3669
Change prefixes to match 'uco-' pattern
ajnelson-nist Aug 11, 2022
cb78b82
Add pytest
ajnelson-nist Aug 11, 2022
f2b48ef
Do not rely on picking up prefixes from graph
ajnelson-nist Aug 11, 2022
f1b0f3d
Add necessarily-local kb prefix
ajnelson-nist Aug 11, 2022
458da1e
Test non-UCO concepts load from graph parse
ajnelson-nist Aug 11, 2022
3865516
Add debug dump for review
ajnelson-nist Aug 11, 2022
ec3b35d
Adds 'uco-' prefix to specific prefixes during generation
kfairbanks Aug 12, 2022
845b7f5
Merge branch 'issue_423-Makefile' into issue_423
kfairbanks Aug 13, 2022
5fa38a0
Alters Makefile to fix concise vs minimal context generation
kfairbanks Aug 15, 2022
7eee88d
A bit of formatting and clean up
kfairbanks Aug 16, 2022
d808aa3
Perform static type review
ajnelson-nist Aug 18, 2022
9e44d41
Prevent premature descent into new test directory
ajnelson-nist Aug 18, 2022
9bd9106
Scope hard-coded sample to 'minimal' context dictionary test
ajnelson-nist Aug 18, 2022
7457464
Start "concise" test
ajnelson-nist Aug 18, 2022
a754894
Swap out @ symbols
ajnelson-nist Aug 18, 2022
f783c67
Scope test
ajnelson-nist Aug 18, 2022
5826eab
Enable very-verbose output
ajnelson-nist Aug 18, 2022
64fa17b
Add hash sample test with exact-parse comparison
ajnelson-nist Aug 18, 2022
e5a7104
Remove '@' from graph
ajnelson-nist Aug 18, 2022
4cdae7a
Use concise object reference
ajnelson-nist Aug 18, 2022
67a836d
Remove constraint on semi-open vocabulary functionality with context …
ajnelson-nist Aug 23, 2022
be1317e
Troubleshooting hash pytest by hardcoding values in the context
kfairbanks Aug 23, 2022
b7ec029
Adding classes for concise contexts
kfairbanks Aug 23, 2022
17bd266
Add needed class designation
ajnelson-nist Aug 23, 2022
32fea27
Merge branch 'issue_423' of github.com:kfairbanks/UCO into issue_423
kfairbanks Aug 23, 2022
e43cfea
Fix prefix
ajnelson-nist Aug 23, 2022
91380ab
Merge branch 'issue_423' of github.com:kfairbanks/UCO into issue_423
kfairbanks Aug 23, 2022
474d91e
Adds UCO Class processing to context builder
kfairbanks Aug 23, 2022
df5620f
Adds datatype processing to context builder
kfairbanks Aug 23, 2022
a594c1e
Takes out cheats in context
kfairbanks Aug 23, 2022
cb0ca03
Adjust test framework to parameterize for context-independent file
ajnelson-nist Aug 23, 2022
c8ce67f
Exercise against sample that uses imported, non-UCO classes
ajnelson-nist Aug 23, 2022
937bad0
Expand search for class IRIs
ajnelson-nist Aug 23, 2022
6f03b0d
Fixes a bug in class_iri processing to pass concise hash test
kfairbanks Aug 24, 2022
f7be4e5
Adds datatypes to minimal context
kfairbanks Aug 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
253 changes: 253 additions & 0 deletions src/uco-json-ld-compaction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
#
# Release Statement?
#

"""
Purpose statement

1) json-ld context to support compaction of all IRI base paths through defined
prefixes
2) json-ld context to support compaction of all property type assertions
3) json-ld context to support assertion of properties with potential
cardinalities >1 as set arrrays
4) json-ld context to support compaction of json-ld specific key strings @id,
@type, @value and @graph to simple json key strings id, type, value, and graph such that the body of content can be viewed as simple json and the context can be utilized to expand it into fully codified json-ld

"""

__version__ = "0.0.1"

import argparse
import logging
from multiprocessing import context
import os
import typing
import pathlib
import sys
import re
import rdflib

_logger = logging.getLogger(os.path.basename(__file__))

"""
27 def main():
28 g = rdflib.Graph()
29 for in_graph in args.in_graph:
30 g.parse(in_graph, format="turtle")
31 g.serialize(args.out_graph, format="turtle")
"""

class context_builder:
def __init__(self):
self.ttl_file_list=None
self.prefix_dict=None
self.top_srcdir=None
self.iri_dict=None
self.datatype_properties_dict={}

def get_ttl_files(self, subdirs=[]) -> list:
"""
Finds all turtle (.ttl) files in directory structure
@subdirs - Optional list used to restrict search to particular directories.
"""
if self.ttl_file_list is not None:
return self.ttl_file_list

#Shamelessly stolen from populate_node_kind.py
# 0. Self-orient.
self.top_srcdir = pathlib.Path(os.path.dirname(__file__)) / ".."
top_srcdir=self.top_srcdir
# Sanity check.
assert (top_srcdir / ".git").exists(), "Hard-coded top_srcdir discovery is no longer correct."

# 1. Load all ontology files into dictionary of graphs.

# The extra filtering step loop to keep from picking up CI files. Path.glob returns dot files, unlike shell's glob.
# The uco.ttl file is also skipped because the Python output removes supplementary prefix statements.
ontology_filepaths : typing.List[pathlib.Path] = []

file_list=[]
_logger.debug(top_srcdir)

if len(subdirs) < 1:
for x in (top_srcdir).rglob("*.ttl"):
if ".check-" in str(x):
continue
if "uco.ttl" in str(x):
continue
#_logger.debug(x)
file_list.append(x)
self.ttl_file_list=file_list
else:
for dir in subdirs:
for x in (top_srcdir / dir).rglob("*.ttl"):
if ".check-" in str(x):
continue
if "uco.ttl" in str(x):
continue
#_logger.debug(x)
file_list.append(x)
self.ttl_file_list=file_list

return self.ttl_file_list

def get_iris(self)->list:
"""
Returns sorted list of IRIs
"""
k_list=list(self.iri_dict.keys())
#print(k_list)
k_list.sort()
irs_list=[]
for k in k_list:
#print(f"\"{k}\":{self.iri_dict[k]}")
irs_list.append(f"\"{k}\":{self.iri_dict[k]}")
return irs_list

def __add_to_iri_dict(self, in_prefix):
"""INTERNAL function: Adds unique key value pairs to dict
that will be used to generate context. Dies if inconsistent
key value pair is found.
@in_prefix - an input prefix triple
"""
if self.iri_dict is None:
self.iri_dict={}

iri_dict = self.iri_dict
t_split=in_prefix.split()
#Taking the ':' off the end of the key
k=t_split[1][:-1]
v=t_split[2]
if k in iri_dict.keys():
#_logger.debug(f"'{k}' already exists")
if iri_dict[k]!=v:
_logger.error(f"Mismatched values:\t{iri_dict[k]}!={v}")
sys.exit()
else:
iri_dict[k]=v

def __process_DatatypePropertiesHelper(self, in_file=None):
"""
Does the actual work using rdflib
@in_file - ttl file to get object properties from
"""
graph = rdflib.Graph()
graph.parse(in_file, format="turtle")
"Make sure to do an itter that looks for rdflib.OWL.class"
#limit = 4
#count = 0
for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
print(triple)
print(triple[0].split('/'))
s_triple=triple[0].split('/')
root=s_triple[-1]
ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
print(ns_prefix, root)

if root in self.datatype_properties_dict.keys():
print(f"None Unique Entry Found:\t {ns_prefix}:{root}")
self.datatype_properties_dict[root].append(ns_prefix)
else:
self.datatype_properties_dict[root]=[ns_prefix]

return
#count += 1
#if count >= limit:
# return

def process_DatatypeProperties(self):
for ttl_file in self.ttl_file_list:
self.__process_DatatypePropertiesHelper(in_file=ttl_file)

def get_prefixes(self):
"""
Finds all prefix lines in list of ttl files. Adds them to an
an internal dict
"""
ttl_file_list = self.get_ttl_files()
if len(ttl_file_list) < 1:
_logger.error("No ttls files to process")
sys.exit()

for ttl_file in ttl_file_list:
with open(ttl_file,'r') as file:
for line in file:
if re.search("^\@prefix",line):
#_logger.debug(line.strip())
self.__add_to_iri_dict(in_prefix=line.strip())



def main():
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument('--debug', action="store_true")
#argument_parser.add_argument('-i', '--in_graph', help="Input graph to be simplified")
args = argument_parser.parse_args()

logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

_logger.debug("Debug Mode enabled")

cb = context_builder()
for i in (cb.get_ttl_files(subdirs=['ontology'])):
_logger.debug(f" Input ttl: {i}")

cb.get_prefixes()
#for i in cb.get_iris():
# print(i)

cb.process_DatatypeProperties()

"""
If we cannot find rdf range, skip
if rdf range is a blank node, skip
"""
dt_list = list(cb.datatype_properties_dict.keys())
dt_list.sort()
for key in dt_list:
#Non-unique roots
if len(cb.datatype_properties_dict[key]) > 1:
print(f"{key}:{cb.datatype_properties_dict[key]}")
for ns in cb.datatype_properties_dict[key]:
con_str=f"\"{ns}:{key}\":{{"
con_str+="\n\t\"@id\":\"%s:%s\"," % (ns,key)
con_str+="\n\t\"@type\":\"@id\""
con_str+="\n\t},"
print(con_str)
#Unique roots
else:
pass

#from pprint import pprint
#pprint(cb.datatype_properties_dict)
graph = rdflib.Graph()
graph.parse("../tests/uco_monolithic.ttl", format="turtle")
graph.serialize("_uco_monolithic.json-ld", format="json-ld")
graph.serialize("_uco_monolithic.json-ld", format="json-ld")
sys.exit()
#context keyword in graph parse and graph serialize
#black formater FLAKE8 for isort
#check the case-uilities python



graph = rdflib.Graph()
graph.parse("../tests/uco_monolithic.ttl", format="turtle")
"Make sure to do an itter that looks for rdflib.OWL.class"
limit = 4
count = 0
for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
print(triple[0].fragment)
print(triple)
count += 1
if count >= limit:
sys.exit()

#print(f"{args.in_graph}")
#g = rdflib.Graph()
#g.parse(args.in_graph, format="turtle")
#g.serialize("temp.json-ld", format="json-ld")


if __name__ == "__main__":
main()
Loading