From 804252e19b13e5847ba7a0ad0e8d1e5d30a15350 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Fri, 5 Aug 2022 09:18:37 -0400
Subject: [PATCH 01/47] Saving off some work on the context builder
---
src/uco_jsonld_context_builder.py | 249 ++++++++++++++++++++++++++++++
1 file changed, 249 insertions(+)
create mode 100644 src/uco_jsonld_context_builder.py
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
new file mode 100644
index 00000000..1a06d530
--- /dev/null
+++ b/src/uco_jsonld_context_builder.py
@@ -0,0 +1,249 @@
+#
+# Release Statement?
+#
+
+"""
+Purpose statement
+
+1) json-ld context to support compaction of all IRI base paths through defined
+ prefixes
+2) json-ld context to support compaction of all property type assertions
+3) json-ld context to support assertion of properties with potential
+ cardinalities >1 as set arrrays
+4) json-ld context to support compaction of json-ld specific key strings @id,
+ @type, @value and @graph to simple json key strings id, type, value, and graph such that the body of content can be viewed as simple json and the context can be utilized to expand it into fully codified json-ld
+
+"""
+
+__version__ = "0.0.1"
+
+import argparse
+import logging
+from multiprocessing import context
+import os
+import typing
+import pathlib
+import sys
+import re
+import rdflib
+
+_logger = logging.getLogger(os.path.basename(__file__))
+
+"""
+ 27 def main():
+ 28 g = rdflib.Graph()
+ 29 for in_graph in args.in_graph:
+ 30 g.parse(in_graph, format="turtle")
+ 31 g.serialize(args.out_graph, format="turtle")
+"""
+
+class context_builder:
+ def __init__(self):
+ self.ttl_file_list=None
+ self.prefix_dict=None
+ self.top_srcdir=None
+ self.iri_dict=None
+ self.datatype_properties_dict={}
+
+ def get_ttl_files(self, subdirs=[]) -> list:
+ """
+ Finds all turtle (.ttl) files in directory structure
+ @subdirs - Optional list used to restrict search to particular directories.
+ """
+ if self.ttl_file_list is not None:
+ return self.ttl_file_list
+
+ #Shamelessly stolen from populate_node_kind.py
+ # 0. Self-orient.
+ self.top_srcdir = pathlib.Path(os.path.dirname(__file__)) / ".."
+ top_srcdir=self.top_srcdir
+ # Sanity check.
+ assert (top_srcdir / ".git").exists(), "Hard-coded top_srcdir discovery is no longer correct."
+
+ # 1. Load all ontology files into dictionary of graphs.
+
+ # The extra filtering step loop to keep from picking up CI files. Path.glob returns dot files, unlike shell's glob.
+ # The uco.ttl file is also skipped because the Python output removes supplementary prefix statements.
+ ontology_filepaths : typing.List[pathlib.Path] = []
+
+ file_list=[]
+ _logger.debug(top_srcdir)
+
+ if len(subdirs) < 1:
+ for x in (top_srcdir).rglob("*.ttl"):
+ if ".check-" in str(x):
+ continue
+ if "uco.ttl" in str(x):
+ continue
+ #_logger.debug(x)
+ file_list.append(x)
+ self.ttl_file_list=file_list
+ else:
+ for dir in subdirs:
+ for x in (top_srcdir / dir).rglob("*.ttl"):
+ if ".check-" in str(x):
+ continue
+ if "uco.ttl" in str(x):
+ continue
+ #_logger.debug(x)
+ file_list.append(x)
+ self.ttl_file_list=file_list
+
+ return self.ttl_file_list
+
+ def get_iris(self)->list:
+ """
+ Returns sorted list of IRIs
+ """
+ k_list=list(self.iri_dict.keys())
+ #print(k_list)
+ k_list.sort()
+ irs_list=[]
+ for k in k_list:
+ #print(f"\"{k}\":{self.iri_dict[k]}")
+ irs_list.append(f"\"{k}\":{self.iri_dict[k]}")
+ return irs_list
+
+ def __add_to_iri_dict(self, in_prefix):
+ """INTERNAL function: Adds unique key value pairs to dict
+ that will be used to generate context. Dies if inconsistent
+ key value pair is found.
+ @in_prefix - an input prefix triple
+ """
+ if self.iri_dict is None:
+ self.iri_dict={}
+
+ iri_dict = self.iri_dict
+ t_split=in_prefix.split()
+ #Taking the ':' off the end of the key
+ k=t_split[1][:-1]
+ v=t_split[2]
+ if k in iri_dict.keys():
+ #_logger.debug(f"'{k}' already exists")
+ if iri_dict[k]!=v:
+ _logger.error(f"Mismatched values:\t{iri_dict[k]}!={v}")
+ sys.exit()
+ else:
+ iri_dict[k]=v
+
+ def __process_DatatypePropertiesHelper(self, in_file=None):
+ """
+ Does the actual work using rdflib
+ @in_file - ttl file to get object properties from
+ """
+ graph = rdflib.Graph()
+ graph.parse(in_file, format="turtle")
+ "Make sure to do an itter that looks for rdflib.OWL.class"
+ #limit = 4
+ #count = 0
+ for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
+ print(triple)
+ print(triple[0].split('/'))
+ s_triple=triple[0].split('/')
+ root=s_triple[-1]
+ ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
+ print(ns_prefix, root)
+
+ if root in self.datatype_properties_dict.keys():
+ print(f"None Unique Entry Found:\t {ns_prefix}:{root}")
+ self.datatype_properties_dict[root].append(ns_prefix)
+ else:
+ self.datatype_properties_dict[root]=[ns_prefix]
+
+ return
+ #count += 1
+ #if count >= limit:
+ # return
+
+ def process_DatatypeProperties(self):
+ for ttl_file in self.ttl_file_list:
+ self.__process_DatatypePropertiesHelper(in_file=ttl_file)
+
+ def get_prefixes(self):
+ """
+ Finds all prefix lines in list of ttl files. Adds them to an
+ an internal dict
+ """
+ ttl_file_list = self.get_ttl_files()
+ if len(ttl_file_list) < 1:
+ _logger.error("No ttls files to process")
+ sys.exit()
+
+ for ttl_file in ttl_file_list:
+ with open(ttl_file,'r') as file:
+ for line in file:
+ if re.search("^\@prefix",line):
+ #_logger.debug(line.strip())
+ self.__add_to_iri_dict(in_prefix=line.strip())
+
+
+
+def main():
+ argument_parser = argparse.ArgumentParser()
+ argument_parser.add_argument('--debug', action="store_true")
+ #argument_parser.add_argument('-i', '--in_graph', help="Input graph to be simplified")
+ args = argument_parser.parse_args()
+
+ logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
+
+ _logger.debug("Debug Mode enabled")
+
+ cb = context_builder()
+ for i in (cb.get_ttl_files(subdirs=['ontology'])):
+ _logger.debug(f" Input ttl: {i}")
+
+ cb.get_prefixes()
+ #for i in cb.get_iris():
+ # print(i)
+
+ cb.process_DatatypeProperties()
+
+"""
+If we cannot find rdf range, skip
+if rdf range is a blank node, skip
+"""
+ dt_list = list(cb.datatype_properties_dict.keys())
+ dt_list.sort()
+ for key in dt_list:
+ #Non-unique roots
+ if len(cb.datatype_properties_dict[key]) > 1:
+ print(f"{key}:{cb.datatype_properties_dict[key]}")
+ for ns in cb.datatype_properties_dict[key]:
+ con_str=f"\"{ns}:{key}\":{{"
+ con_str+="\n\t\"@id\":\"%s:%s\"," % (ns,key)
+ con_str+="\n\t\"@type\":\"@id\""
+ con_str+="\n\t},"
+ print(con_str)
+ #Unique roots
+ else:
+ pass
+
+ #from pprint import pprint
+ #pprint(cb.datatype_properties_dict)
+ sys.exit()
+ #context keyword in graph parse and graph serialize
+ #black formater FLAKE8 for isort
+ #check the case-uilities python
+
+
+
+ graph = rdflib.Graph()
+ graph.parse("../tests/uco_monolithic.ttl", format="turtle")
+ "Make sure to do an itter that looks for rdflib.OWL.class"
+ limit = 4
+ count = 0
+ for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
+ print(triple[0].fragment)
+ print(triple)
+ count += 1
+ if count >= limit:
+ sys.exit()
+
+ #print(f"{args.in_graph}")
+ #g = rdflib.Graph()
+ #g.parse(args.in_graph, format="turtle")
+ #g.serialize("temp.json-ld", format="json-ld")
+
+
+if __name__ == "__main__":
+ main()
From 3c4dd1156be36dbfc9923944903444901e130f09 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Fri, 5 Aug 2022 13:45:00 -0400
Subject: [PATCH 02/47] Adds most of code to process DatatypeProperities
---
src/uco_jsonld_context_builder.py | 88 +++++++++++++++++++++++--------
1 file changed, 66 insertions(+), 22 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 1a06d530..6a4c0a52 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -11,7 +11,9 @@
3) json-ld context to support assertion of properties with potential
cardinalities >1 as set arrrays
4) json-ld context to support compaction of json-ld specific key strings @id,
- @type, @value and @graph to simple json key strings id, type, value, and graph such that the body of content can be viewed as simple json and the context can be utilized to expand it into fully codified json-ld
+ @type, @value and @graph to simple json key strings id, type, value, and
+ graph such that the body of content can be viewed as simple json and the
+ context can be utilized to expand it into fully codified json-ld
"""
@@ -26,23 +28,26 @@
import sys
import re
import rdflib
+from rdflib.namespace import Namespace, NamespaceManager
_logger = logging.getLogger(os.path.basename(__file__))
-"""
- 27 def main():
- 28 g = rdflib.Graph()
- 29 for in_graph in args.in_graph:
- 30 g.parse(in_graph, format="turtle")
- 31 g.serialize(args.out_graph, format="turtle")
-"""
+class DatatypePropertyInfo:
+ "Class to hold DatatypePropertyInfo which will be used to build context"
+ def __init__(self):
+ self.ns_prefix = None
+ self.root_property_name = None
+ self.prefixed_datatype_name = None
+ self.shacl_count_lt_1 = False
+
-class context_builder:
+class ContextBuilder:
def __init__(self):
self.ttl_file_list=None
self.prefix_dict=None
self.top_srcdir=None
self.iri_dict=None
+ #A dict of DataTypePropertyInfo Objects
self.datatype_properties_dict={}
def get_ttl_files(self, subdirs=[]) -> list:
@@ -136,20 +141,38 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
"Make sure to do an itter that looks for rdflib.OWL.class"
#limit = 4
#count = 0
+ #test_list=[]
+ #If we cannot find rdf range, skip
+ #If rdf range is a blank node, skip
for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
+ dtp_obj = DatatypePropertyInfo()
print(triple)
- print(triple[0].split('/'))
+ #print(triple[0].split('/'))
s_triple=triple[0].split('/')
root=s_triple[-1]
ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
print(ns_prefix, root)
+ dtp_obj.ns_prefix=ns_prefix
+ dtp_obj.root_property_name=root
+ for triple2 in graph.triples((triple[0],rdflib.RDFS.range, None)):
+ #Testing for Blank Nodes
+ if isinstance(triple2[-1],rdflib.term.BNode):
+ continue
+ rdf_rang_str = str(triple2[-1].n3(graph.namespace_manager))
+ dtp_obj.prefixed_datatype_name=rdf_rang_str
+ #print(f"\t{triple2}")
+ #print(f"\t{triple2[-1].n3(graph.namespace_manager)}\t{type(triple2[-1])}")
+ #if str(rdf_rang_str) not in test_list:
+ # test_list.append(rdf_rang_str)
+
if root in self.datatype_properties_dict.keys():
- print(f"None Unique Entry Found:\t {ns_prefix}:{root}")
- self.datatype_properties_dict[root].append(ns_prefix)
+ _logger.debug(f"None Unique Entry Found:\t {ns_prefix}:{root}")
+ self.datatype_properties_dict[root].append(dtp_obj)
else:
- self.datatype_properties_dict[root]=[ns_prefix]
+ self.datatype_properties_dict[root]=[dtp_obj]
+ #print(f"***\n{test_list}\n***")
return
#count += 1
#if count >= limit:
@@ -188,7 +211,7 @@ def main():
_logger.debug("Debug Mode enabled")
- cb = context_builder()
+ cb = ContextBuilder()
for i in (cb.get_ttl_files(subdirs=['ontology'])):
_logger.debug(f" Input ttl: {i}")
@@ -198,12 +221,34 @@ def main():
cb.process_DatatypeProperties()
-"""
-If we cannot find rdf range, skip
-if rdf range is a blank node, skip
-"""
dt_list = list(cb.datatype_properties_dict.keys())
dt_list.sort()
+ last_dtp_obj = cb.datatype_properties_dict[dt_list[-1]][-1]
+ for key in dt_list:
+ #if len(cb.datatype_properties_dict[key]) > 1:
+ for dtp_obj in cb.datatype_properties_dict[key]:
+ con_str=f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
+ con_str+=f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
+ if (dtp_obj.prefixed_datatype_name is not None):
+ con_str+=",\n"
+ con_str+=f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
+ else:
+ con_str+="\n"
+ if dtp_obj != last_dtp_obj:
+ con_str+="},\n"
+ else:
+ con_str+="}\n"
+ print(dtp_obj.root_property_name)
+ print(con_str)
+ #else:
+ # dtp_obj = cb.datatype_properties_dict[key][0]
+ # con_str=f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
+ # con_str+=f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\"\n"
+ # con_str+=f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
+ # con_str+="}"
+
+
+ """Come back to this for concise output""
for key in dt_list:
#Non-unique roots
if len(cb.datatype_properties_dict[key]) > 1:
@@ -217,16 +262,14 @@ def main():
#Unique roots
else:
pass
-
+ """
+ return
#from pprint import pprint
#pprint(cb.datatype_properties_dict)
- sys.exit()
#context keyword in graph parse and graph serialize
#black formater FLAKE8 for isort
#check the case-uilities python
-
-
graph = rdflib.Graph()
graph.parse("../tests/uco_monolithic.ttl", format="turtle")
"Make sure to do an itter that looks for rdflib.OWL.class"
@@ -234,6 +277,7 @@ def main():
count = 0
for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
print(triple[0].fragment)
+ print(triple[0].n3(graph.namespace_manager))
print(triple)
count += 1
if count >= limit:
From c54ca601c7766a9efa5e297db64e4d40becdb7d8 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Fri, 5 Aug 2022 14:20:48 -0400
Subject: [PATCH 03/47] Adds logic to address Requirement1 of issue #423
---
src/uco_jsonld_context_builder.py | 73 ++++++++++++++++++++++++-------
1 file changed, 57 insertions(+), 16 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 6a4c0a52..35385e83 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -49,6 +49,17 @@ def __init__(self):
self.iri_dict=None
#A dict of DataTypePropertyInfo Objects
self.datatype_properties_dict={}
+ #The string that will hold the processed context
+ self.context_str = ""
+
+ def init_context_str(self) -> None:
+ self.context_str="{\n\t\"@context\":{\n"""
+
+ def close_context_str(self) -> None:
+ self.context_str=self.context_str.strip()
+ if self.context_str[-1] == ',':
+ self.context_str=self.context_str[:-1]
+ self.context_str+="\n\t}\n}"
def get_ttl_files(self, subdirs=[]) -> list:
"""
@@ -98,7 +109,7 @@ def get_ttl_files(self, subdirs=[]) -> list:
def get_iris(self)->list:
"""
- Returns sorted list of IRIs
+ Returns sorted list of IRIs as prefix:value strings
"""
k_list=list(self.iri_dict.keys())
#print(k_list)
@@ -106,9 +117,14 @@ def get_iris(self)->list:
irs_list=[]
for k in k_list:
#print(f"\"{k}\":{self.iri_dict[k]}")
- irs_list.append(f"\"{k}\":{self.iri_dict[k]}")
+ irs_list.append(f"\"{k}\":\"{self.iri_dict[k]}\"")
return irs_list
+ def add_prefixes_to_cntxt(self) -> None:
+ """Adds detected prefixes to the context string"""
+ for i in self.get_iris():
+ self.context_str+=f"{i},\n"
+
def __add_to_iri_dict(self, in_prefix):
"""INTERNAL function: Adds unique key value pairs to dict
that will be used to generate context. Dies if inconsistent
@@ -139,19 +155,16 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
graph = rdflib.Graph()
graph.parse(in_file, format="turtle")
"Make sure to do an itter that looks for rdflib.OWL.class"
- #limit = 4
- #count = 0
- #test_list=[]
#If we cannot find rdf range, skip
#If rdf range is a blank node, skip
for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
dtp_obj = DatatypePropertyInfo()
- print(triple)
+ #print(triple)
#print(triple[0].split('/'))
s_triple=triple[0].split('/')
root=s_triple[-1]
ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
- print(ns_prefix, root)
+ #print(ns_prefix, root)
dtp_obj.ns_prefix=ns_prefix
dtp_obj.root_property_name=root
for triple2 in graph.triples((triple[0],rdflib.RDFS.range, None)):
@@ -160,8 +173,6 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
continue
rdf_rang_str = str(triple2[-1].n3(graph.namespace_manager))
dtp_obj.prefixed_datatype_name=rdf_rang_str
- #print(f"\t{triple2}")
- #print(f"\t{triple2[-1].n3(graph.namespace_manager)}\t{type(triple2[-1])}")
#if str(rdf_rang_str) not in test_list:
# test_list.append(rdf_rang_str)
@@ -171,18 +182,13 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
self.datatype_properties_dict[root].append(dtp_obj)
else:
self.datatype_properties_dict[root]=[dtp_obj]
-
- #print(f"***\n{test_list}\n***")
return
- #count += 1
- #if count >= limit:
- # return
def process_DatatypeProperties(self):
for ttl_file in self.ttl_file_list:
self.__process_DatatypePropertiesHelper(in_file=ttl_file)
- def get_prefixes(self):
+ def process_prefixes(self):
"""
Finds all prefix lines in list of ttl files. Adds them to an
an internal dict
@@ -199,6 +205,33 @@ def get_prefixes(self):
#_logger.debug(line.strip())
self.__add_to_iri_dict(in_prefix=line.strip())
+ def print_minimal_datatype_properties(self)->str:
+ """Prints DataType Properties in a format suitable for the contect"""
+ dtp_str_sect=""
+ dt_list = list(self.datatype_properties_dict.keys())
+ dt_list.sort()
+ last_dtp_obj = self.datatype_properties_dict[dt_list[-1]][-1]
+ for key in dt_list:
+ #if len(cb.datatype_properties_dict[key]) > 1:
+ for dtp_obj in self.datatype_properties_dict[key]:
+ con_str=f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
+ con_str+=f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
+ if (dtp_obj.prefixed_datatype_name is not None):
+ con_str+=",\n"
+ con_str+=f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
+ else:
+ con_str+="\n"
+ if dtp_obj != last_dtp_obj:
+ con_str+="},\n"
+ else:
+ con_str+="}\n"
+ #print(dtp_obj.root_property_name)
+ #print(con_str)
+ dtp_str_sect+=con_str
+
+ #print(dtp_str_sect)
+ return(dtp_str_sect)
+
def main():
@@ -215,11 +248,19 @@ def main():
for i in (cb.get_ttl_files(subdirs=['ontology'])):
_logger.debug(f" Input ttl: {i}")
- cb.get_prefixes()
+ cb.process_prefixes()
#for i in cb.get_iris():
# print(i)
cb.process_DatatypeProperties()
+ cb.init_context_str()
+ cb.add_prefixes_to_cntxt()
+ cb.close_context_str()
+ print(cb.context_str)
+
+ #cb.print_minimal_datatype_properties()
+
+ return
dt_list = list(cb.datatype_properties_dict.keys())
dt_list.sort()
From ae338a973e143ca1f3357a87a9df6059d0f06e54 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Fri, 5 Aug 2022 15:19:04 -0400
Subject: [PATCH 04/47] Initial attempt at processing and adding
ObjectProperties to context
---
src/uco_jsonld_context_builder.py | 99 ++++++++++++++++++++++++++++---
1 file changed, 92 insertions(+), 7 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 35385e83..3e082e7b 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -32,13 +32,20 @@
_logger = logging.getLogger(os.path.basename(__file__))
+class ObjectPropertyInfo:
+ """Class to hold ObjectProperty info which will be used to build context"""
+ def __init__(self):
+ self.ns_prefix = None
+ self.root_class_name = None
+ self.shacl_count_lte_1 = False
+
class DatatypePropertyInfo:
- "Class to hold DatatypePropertyInfo which will be used to build context"
+ """Class to hold DatatypeProperty info which will be used to build context"""
def __init__(self):
self.ns_prefix = None
self.root_property_name = None
self.prefixed_datatype_name = None
- self.shacl_count_lt_1 = False
+ self.shacl_count_lte_1 = False
class ContextBuilder:
@@ -49,6 +56,8 @@ def __init__(self):
self.iri_dict=None
#A dict of DataTypePropertyInfo Objects
self.datatype_properties_dict={}
+ #A dict of ObjectPropertyInfo Objects
+ self.object_properties_dict={}
#The string that will hold the processed context
self.context_str = ""
@@ -188,6 +197,49 @@ def process_DatatypeProperties(self):
for ttl_file in self.ttl_file_list:
self.__process_DatatypePropertiesHelper(in_file=ttl_file)
+ def __process_ObjectPropertiesHelper(self, in_file=None):
+ """
+ Does the actual work using rdflib
+ @in_file - ttl file to get object properties from
+ """
+ graph = rdflib.Graph()
+ graph.parse(in_file, format="turtle")
+ #Make sure to do an iter that looks for rdflib.OWL.class"
+ #If we cannot find rdf range, skip
+ #If rdf range is a blank node, skip
+ for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.ObjectProperty)):
+ op_obj = ObjectPropertyInfo()
+ print(triple)
+ #print(triple[0].split('/'))
+ s_triple=triple[0].split('/')
+ root=s_triple[-1]
+ ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
+ print(ns_prefix, root)
+ op_obj.ns_prefix=ns_prefix
+ op_obj.root_class_name=root
+
+ #for triple2 in graph.triples((triple[0],rdflib.RDFS.range, None)):
+ # #Testing for Blank Nodes
+ # if isinstance(triple2[-1],rdflib.term.BNode):
+ # continue
+ # rdf_rang_str = str(triple2[-1].n3(graph.namespace_manager))
+ # print(f"\t{rdf_rang_str}")
+ # op_obj.prefixed_datatype_name=rdf_rang_str
+ # #if str(rdf_rang_str) not in test_list:
+ # # test_list.append(rdf_rang_str)
+
+
+ if root in self.object_properties_dict.keys():
+ _logger.debug(f"None Unique Entry Found:\t {ns_prefix}:{root}")
+ self.object_properties_dict[root].append(op_obj)
+ else:
+ self.object_properties_dict[root]=[op_obj]
+ return
+
+ def process_ObjectProperties(self):
+ for ttl_file in self.ttl_file_list:
+ self.__process_ObjectPropertiesHelper(in_file=ttl_file)
+
def process_prefixes(self):
"""
Finds all prefix lines in list of ttl files. Adds them to an
@@ -228,10 +280,43 @@ def print_minimal_datatype_properties(self)->str:
#print(dtp_obj.root_property_name)
#print(con_str)
dtp_str_sect+=con_str
-
#print(dtp_str_sect)
return(dtp_str_sect)
+ def add_minimal_datatype_props_to_cntxt(self) -> None:
+ dtp_str_sect=""
+ dt_list = list(self.datatype_properties_dict.keys())
+ dt_list.sort()
+ #last_dtp_obj = self.datatype_properties_dict[dt_list[-1]][-1]
+ for key in dt_list:
+ for dtp_obj in self.datatype_properties_dict[key]:
+ con_str=f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
+ con_str+=f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
+ if (dtp_obj.prefixed_datatype_name is not None):
+ con_str+=",\n"
+ con_str+=f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
+ else:
+ con_str+="\n"
+ con_str+="},\n"
+
+ dtp_str_sect+=con_str
+
+ self.context_str+=dtp_str_sect
+
+ def add_minimal_object_props_to_cntxt(self) -> None:
+ op_str_sect=""
+ op_list = list(self.object_properties_dict.keys())
+ op_list.sort()
+ for key in op_list:
+ for op_obj in self.object_properties_dict[key]:
+ con_str=f"\"{op_obj.ns_prefix}:{op_obj.root_class_name}\":{{\n"
+ con_str+="\t\"@type\":\"@id\"\n"
+ con_str+="},\n"
+
+ op_str_sect+=con_str
+ self.context_str+=op_str_sect
+
+
def main():
@@ -249,18 +334,18 @@ def main():
_logger.debug(f" Input ttl: {i}")
cb.process_prefixes()
- #for i in cb.get_iris():
- # print(i)
-
cb.process_DatatypeProperties()
+ cb.process_ObjectProperties()
cb.init_context_str()
cb.add_prefixes_to_cntxt()
+ cb.add_minimal_object_props_to_cntxt()
+ cb.add_minimal_datatype_props_to_cntxt()
cb.close_context_str()
print(cb.context_str)
- #cb.print_minimal_datatype_properties()
return
+ #cb.print_minimal_datatype_properties()
dt_list = list(cb.datatype_properties_dict.keys())
dt_list.sort()
From eb32c4460cf810c16ff53f4335dab167de6c2e83 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Fri, 5 Aug 2022 15:44:24 -0400
Subject: [PATCH 05/47] Adding key strings to context for Req4 of issue #423
---
src/uco_jsonld_context_builder.py | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 3e082e7b..39b890f0 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -209,12 +209,12 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
#If rdf range is a blank node, skip
for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.ObjectProperty)):
op_obj = ObjectPropertyInfo()
- print(triple)
+ #print(triple)
#print(triple[0].split('/'))
s_triple=triple[0].split('/')
root=s_triple[-1]
ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
- print(ns_prefix, root)
+ #print(ns_prefix, root)
op_obj.ns_prefix=ns_prefix
op_obj.root_class_name=root
@@ -284,6 +284,7 @@ def print_minimal_datatype_properties(self)->str:
return(dtp_str_sect)
def add_minimal_datatype_props_to_cntxt(self) -> None:
+ """Adds Datatype Properties to context string"""
dtp_str_sect=""
dt_list = list(self.datatype_properties_dict.keys())
dt_list.sort()
@@ -304,6 +305,7 @@ def add_minimal_datatype_props_to_cntxt(self) -> None:
self.context_str+=dtp_str_sect
def add_minimal_object_props_to_cntxt(self) -> None:
+ """Adds Object Properties to context string"""
op_str_sect=""
op_list = list(self.object_properties_dict.keys())
op_list.sort()
@@ -315,6 +317,16 @@ def add_minimal_object_props_to_cntxt(self) -> None:
op_str_sect+=con_str
self.context_str+=op_str_sect
+
+ def add_key_strings_to_cntxt(self) -> None:
+ """Adds id, type, and graph key strings to context string"""
+ ks_str=""
+ ks_str+="\t\"uco-core:id\":\"@id\",\n"
+ ks_str+="\t\"uco-core:type\":\"@type\",\n"
+ ks_str+="\t\"value\":\"@value\",\n"
+ ks_str+="\t\"graph\":\"@graph\",\n"
+
+ self.context_str+=ks_str
@@ -340,6 +352,7 @@ def main():
cb.add_prefixes_to_cntxt()
cb.add_minimal_object_props_to_cntxt()
cb.add_minimal_datatype_props_to_cntxt()
+ cb.add_key_strings_to_cntxt()
cb.close_context_str()
print(cb.context_str)
From a2bb0f9aeb45527a73a0ed528af907f1be9d80b4 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Mon, 8 Aug 2022 00:56:05 -0400
Subject: [PATCH 06/47] WIP save. Attempt to address req 4 in issue #423
---
src/uco_jsonld_context_builder.py | 99 ++++++++++++++++++++++++++++---
1 file changed, 92 insertions(+), 7 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 39b890f0..800db9a8 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -37,7 +37,8 @@ class ObjectPropertyInfo:
def __init__(self):
self.ns_prefix = None
self.root_class_name = None
- self.shacl_count_lte_1 = False
+ self.shacl_count_lte_1 = None
+ self.shacl_property_bnode = None
class DatatypePropertyInfo:
"""Class to hold DatatypeProperty info which will be used to build context"""
@@ -45,7 +46,8 @@ def __init__(self):
self.ns_prefix = None
self.root_property_name = None
self.prefixed_datatype_name = None
- self.shacl_count_lte_1 = False
+ self.shacl_count_lte_1 = None
+ self.shacl_property_bnode = None
class ContextBuilder:
@@ -168,8 +170,8 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
#If rdf range is a blank node, skip
for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
dtp_obj = DatatypePropertyInfo()
- #print(triple)
- #print(triple[0].split('/'))
+ print(triple)
+ print(triple[0].split('/'))
s_triple=triple[0].split('/')
root=s_triple[-1]
ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
@@ -179,11 +181,27 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
for triple2 in graph.triples((triple[0],rdflib.RDFS.range, None)):
#Testing for Blank Nodes
if isinstance(triple2[-1],rdflib.term.BNode):
+ print(f"\tBlank: {triple2}\n")
continue
+ print(f"\ttriple2: f{triple2}\n")
rdf_rang_str = str(triple2[-1].n3(graph.namespace_manager))
dtp_obj.prefixed_datatype_name=rdf_rang_str
#if str(rdf_rang_str) not in test_list:
# test_list.append(rdf_rang_str)
+
+ for sh_triple in graph.triples((None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
+ print(f"\t**sh_triple:{sh_triple}")
+ dtp_obj.shacl_property_bnode=sh_triple[0]
+ for sh_triple2 in graph.triples((dtp_obj.shacl_property_bnode,rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
+ print(f"\t\t***sh_triple:{sh_triple2}")
+ print(f"\t\t***sh_triple:{sh_triple2[2]}")
+ if int(sh_triple2[2]) <= 1:
+ if dtp_obj.shacl_count_lte_1 is not None:
+ print(f"\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
+ dtp_obj.shacl_count_lte_1 = True
+ else:
+ print(f"\t\t***Large max_count: {sh_triple2[2]}")
+
if root in self.datatype_properties_dict.keys():
@@ -217,6 +235,24 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
#print(ns_prefix, root)
op_obj.ns_prefix=ns_prefix
op_obj.root_class_name=root
+
+ for sh_triple in graph.triples((None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
+ print(f"**obj_sh_triple:{sh_triple}")
+ op_obj.shacl_property_bnode=sh_triple[0]
+ for sh_triple2 in graph.triples((op_obj.shacl_property_bnode,rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
+ print(f"\t\t***sh_triple:{sh_triple2}")
+ print(f"\t\t***sh_triple:{sh_triple2[2]}")
+ if int(sh_triple2[2]) <= 1:
+ if op_obj.shacl_count_lte_1 is not None:
+ print(f"\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
+ #print("\t\t**MaxCount Double Definition?")
+ op_obj.shacl_count_lte_1 = True
+ else:
+ print(f"\t\t***Large max_count: {sh_triple2[2]}")
+
+
+ #for sh_triple in graph.triples((triple[0],rdflib.sh.property, None)):
+ # print(f"**sh_triple:{sh_triple}")
#for triple2 in graph.triples((triple[0],rdflib.RDFS.range, None)):
# #Testing for Blank Nodes
@@ -312,7 +348,12 @@ def add_minimal_object_props_to_cntxt(self) -> None:
for key in op_list:
for op_obj in self.object_properties_dict[key]:
con_str=f"\"{op_obj.ns_prefix}:{op_obj.root_class_name}\":{{\n"
- con_str+="\t\"@type\":\"@id\"\n"
+ con_str+="\t\"@type\":\"@id\""
+ if op_obj.shacl_count_lte_1 is not True:
+ con_str+=",\n\t\"@container\":\"@set\"\n"
+ else:
+ con_str+="\n"
+
con_str+="},\n"
op_str_sect+=con_str
@@ -335,16 +376,22 @@ def main():
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument('--debug', action="store_true")
#argument_parser.add_argument('-i', '--in_graph', help="Input graph to be simplified")
+ argument_parser.add_argument('-o', '--output', help="Output file for context")
args = argument_parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
_logger.debug("Debug Mode enabled")
+
+ out_f = None
+ if args.output is not None:
+ out_f = open(args.output,'w')
cb = ContextBuilder()
for i in (cb.get_ttl_files(subdirs=['ontology'])):
_logger.debug(f" Input ttl: {i}")
+
cb.process_prefixes()
cb.process_DatatypeProperties()
cb.process_ObjectProperties()
@@ -354,11 +401,49 @@ def main():
cb.add_minimal_datatype_props_to_cntxt()
cb.add_key_strings_to_cntxt()
cb.close_context_str()
- print(cb.context_str)
+ #return
+ #print(cb.context_str)
+ if out_f is not None:
+ out_f.write(cb.context_str)
+ out_f.flush()
+ out_f.close()
+
+ return
+
+ graph = rdflib.Graph()
+ graph.parse("../tests/uco_monolithic.ttl", format="turtle")
+ "Make sure to do an itter that looks for rdflib.OWL.class"
+ limit = 10000
+ count = 0
+ #for triple in graph.triples((None,None,rdflib.OWL.Class)):
+ #for sh_triple in graph.triples(None,"rdflib.term.URIRef('http://www.w3.org/ns/shacl#property')", None):
+ #for sh_triple in graph.triples(None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None):
+ print("###")
+ for sh_triple in graph.triples((None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None)):
+ print(f"**sh_triple:{sh_triple}")
+ print("###")
+
+ for triple in graph.triples((None,None,None)):
+ #print(triple[0].fragment)
+ #print(triple[0].n3(graph.namespace_manager))
+ print(triple)
+ sh_prop_node = None
+ for sh_triple in graph.triples((triple[0],rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None)):
+ print(f"\t**sh_triple:{sh_triple[2].n3(graph.namespace_manager)}")
+ sh_prop_node = sh_triple[2]
+ for triple3 in graph.triples((sh_prop_node,rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'),None)):
+ print(f"\t***sh_prop_triple:{triple3}")
+ print(f"\t***sh_prop_triple:{triple3[2]}")
+
+ #for t in list(triple):
+ # print(f"{t.n3(graph.namespace_manager)}")
+ #print(triple)
+ count += 1
+ if count >= limit:
+ sys.exit()
return
- #cb.print_minimal_datatype_properties()
dt_list = list(cb.datatype_properties_dict.keys())
dt_list.sort()
From 6ed0378cdeaa3d181d382dd4e3b68838c6f3ccc3 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Mon, 8 Aug 2022 09:06:08 -0400
Subject: [PATCH 07/47] Changes prints to logger.debug() statements
---
src/uco_jsonld_context_builder.py | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 800db9a8..6d3891ab 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -170,8 +170,8 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
#If rdf range is a blank node, skip
for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
dtp_obj = DatatypePropertyInfo()
- print(triple)
- print(triple[0].split('/'))
+ _logger.debug(triple)
+ _logger.debug(triple[0].split('/'))
s_triple=triple[0].split('/')
root=s_triple[-1]
ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
@@ -181,26 +181,26 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
for triple2 in graph.triples((triple[0],rdflib.RDFS.range, None)):
#Testing for Blank Nodes
if isinstance(triple2[-1],rdflib.term.BNode):
- print(f"\tBlank: {triple2}\n")
+ _logger.debug(f"\tBlank: {triple2}\n")
continue
- print(f"\ttriple2: f{triple2}\n")
+ _logger.debug(f"\ttriple2: f{triple2}\n")
rdf_rang_str = str(triple2[-1].n3(graph.namespace_manager))
dtp_obj.prefixed_datatype_name=rdf_rang_str
#if str(rdf_rang_str) not in test_list:
# test_list.append(rdf_rang_str)
for sh_triple in graph.triples((None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
- print(f"\t**sh_triple:{sh_triple}")
+ _logger.debug(f"\t\t**sh_triple:{sh_triple}")
dtp_obj.shacl_property_bnode=sh_triple[0]
for sh_triple2 in graph.triples((dtp_obj.shacl_property_bnode,rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
- print(f"\t\t***sh_triple:{sh_triple2}")
- print(f"\t\t***sh_triple:{sh_triple2[2]}")
+ _logger.debug(f"\t\t***sh_triple:{sh_triple2}")
+ _logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
if int(sh_triple2[2]) <= 1:
if dtp_obj.shacl_count_lte_1 is not None:
- print(f"\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
+ _logger.debug(f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
dtp_obj.shacl_count_lte_1 = True
else:
- print(f"\t\t***Large max_count: {sh_triple2[2]}")
+ _logger.debug(f"\t\t\t***Large max_count: {sh_triple2[2]}")
@@ -227,7 +227,7 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
#If rdf range is a blank node, skip
for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.ObjectProperty)):
op_obj = ObjectPropertyInfo()
- #print(triple)
+ _logger.debug((triple))
#print(triple[0].split('/'))
s_triple=triple[0].split('/')
root=s_triple[-1]
@@ -237,18 +237,18 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
op_obj.root_class_name=root
for sh_triple in graph.triples((None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
- print(f"**obj_sh_triple:{sh_triple}")
+ _logger.debug(f"\t**obj_sh_triple:{sh_triple}")
op_obj.shacl_property_bnode=sh_triple[0]
for sh_triple2 in graph.triples((op_obj.shacl_property_bnode,rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
- print(f"\t\t***sh_triple:{sh_triple2}")
- print(f"\t\t***sh_triple:{sh_triple2[2]}")
+ _logger.debug(f"\t\t***sh_triple:{sh_triple2}")
+ _logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
if int(sh_triple2[2]) <= 1:
if op_obj.shacl_count_lte_1 is not None:
- print(f"\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
+ _logger.debug(f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
#print("\t\t**MaxCount Double Definition?")
op_obj.shacl_count_lte_1 = True
else:
- print(f"\t\t***Large max_count: {sh_triple2[2]}")
+ _logger.debug(f"\t\t\t***Large max_count: {sh_triple2[2]}")
#for sh_triple in graph.triples((triple[0],rdflib.sh.property, None)):
From 64445579acb12169ed84dd19852171b409e92a35 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 9 Aug 2022 07:27:19 -0400
Subject: [PATCH 08/47] Linter changes
---
src/uco_jsonld_context_builder.py | 402 +++++++++++++-----------------
1 file changed, 173 insertions(+), 229 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 6d3891ab..0afbf844 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -8,11 +8,11 @@
1) json-ld context to support compaction of all IRI base paths through defined
prefixes
2) json-ld context to support compaction of all property type assertions
-3) json-ld context to support assertion of properties with potential
+3) json-ld context to support assertion of properties with potential
cardinalities >1 as set arrrays
4) json-ld context to support compaction of json-ld specific key strings @id,
- @type, @value and @graph to simple json key strings id, type, value, and
- graph such that the body of content can be viewed as simple json and the
+ @type, @value and @graph to simple json key strings id, type, value, and
+ graph such that the body of content can be viewed as simple json and the
context can be utilized to expand it into fully codified json-ld
"""
@@ -21,79 +21,87 @@
import argparse
import logging
-from multiprocessing import context
import os
import typing
import pathlib
import sys
import re
+
import rdflib
from rdflib.namespace import Namespace, NamespaceManager
_logger = logging.getLogger(os.path.basename(__file__))
+
class ObjectPropertyInfo:
- """Class to hold ObjectProperty info which will be used to build context"""
+ """Class to hold ObjectProperty info which will be used to build
+ context"""
def __init__(self):
self.ns_prefix = None
self.root_class_name = None
self.shacl_count_lte_1 = None
self.shacl_property_bnode = None
+
class DatatypePropertyInfo:
- """Class to hold DatatypeProperty info which will be used to build context"""
+ """Class to hold DatatypeProperty info which will be used to build
+ context"""
def __init__(self):
self.ns_prefix = None
self.root_property_name = None
self.prefixed_datatype_name = None
self.shacl_count_lte_1 = None
self.shacl_property_bnode = None
-
+
class ContextBuilder:
def __init__(self):
- self.ttl_file_list=None
- self.prefix_dict=None
- self.top_srcdir=None
- self.iri_dict=None
- #A dict of DataTypePropertyInfo Objects
- self.datatype_properties_dict={}
- #A dict of ObjectPropertyInfo Objects
- self.object_properties_dict={}
- #The string that will hold the processed context
+ self.ttl_file_list = None
+ self.prefix_dict = None
+ self.top_srcdir = None
+ self.iri_dict = None
+ # A dict of DataTypePropertyInfo Objects
+ self.datatype_properties_dict = {}
+ # A dict of ObjectPropertyInfo Objects
+ self.object_properties_dict = {}
+ # The string that will hold the processed context
self.context_str = ""
def init_context_str(self) -> None:
- self.context_str="{\n\t\"@context\":{\n"""
+ self.context_str = "{\n\t\"@context\":{\n"""
def close_context_str(self) -> None:
- self.context_str=self.context_str.strip()
+ self.context_str = self.context_str.strip()
if self.context_str[-1] == ',':
- self.context_str=self.context_str[:-1]
- self.context_str+="\n\t}\n}"
+ self.context_str = self.context_str[:-1]
+ self.context_str += "\n\t}\n}"
def get_ttl_files(self, subdirs=[]) -> list:
"""
Finds all turtle (.ttl) files in directory structure
- @subdirs - Optional list used to restrict search to particular directories.
+ @subdirs - Optional list used to restrict search to particular
+ directories.
"""
if self.ttl_file_list is not None:
return self.ttl_file_list
- #Shamelessly stolen from populate_node_kind.py
+ # Shamelessly stolen from populate_node_kind.py
# 0. Self-orient.
self.top_srcdir = pathlib.Path(os.path.dirname(__file__)) / ".."
- top_srcdir=self.top_srcdir
+ top_srcdir = self.top_srcdir
# Sanity check.
- assert (top_srcdir / ".git").exists(), "Hard-coded top_srcdir discovery is no longer correct."
+ assert (top_srcdir / ".git").exists(), \
+ "Hard-coded top_srcdir discovery is no longer correct."
# 1. Load all ontology files into dictionary of graphs.
- # The extra filtering step loop to keep from picking up CI files. Path.glob returns dot files, unlike shell's glob.
- # The uco.ttl file is also skipped because the Python output removes supplementary prefix statements.
+ # The extra filtering step loop to keep from picking up CI files.
+ # Path.glob returns dot files, unlike shell's glob.
+ # The uco.ttl file is also skipped because the Python output removes
+ # supplementary prefix statements.
ontology_filepaths : typing.List[pathlib.Path] = []
- file_list=[]
+ file_list = []
_logger.debug(top_srcdir)
if len(subdirs) < 1:
@@ -102,9 +110,9 @@ def get_ttl_files(self, subdirs=[]) -> list:
continue
if "uco.ttl" in str(x):
continue
- #_logger.debug(x)
+ # _logger.debug(x)
file_list.append(x)
- self.ttl_file_list=file_list
+ self.ttl_file_list = file_list
else:
for dir in subdirs:
for x in (top_srcdir / dir).rglob("*.ttl"):
@@ -112,29 +120,29 @@ def get_ttl_files(self, subdirs=[]) -> list:
continue
if "uco.ttl" in str(x):
continue
- #_logger.debug(x)
+ # _logger.debug(x)
file_list.append(x)
- self.ttl_file_list=file_list
+ self.ttl_file_list = file_list
return self.ttl_file_list
- def get_iris(self)->list:
+ def get_iris(self) -> list:
"""
Returns sorted list of IRIs as prefix:value strings
"""
- k_list=list(self.iri_dict.keys())
- #print(k_list)
+ k_list = list(self.iri_dict.keys())
+ # print(k_list)
k_list.sort()
- irs_list=[]
+ irs_list = []
for k in k_list:
- #print(f"\"{k}\":{self.iri_dict[k]}")
+ # print(f"\"{k}\":{self.iri_dict[k]}")
irs_list.append(f"\"{k}\":\"{self.iri_dict[k]}\"")
return irs_list
def add_prefixes_to_cntxt(self) -> None:
"""Adds detected prefixes to the context string"""
for i in self.get_iris():
- self.context_str+=f"{i},\n"
+ self.context_str += f"{i},\n"
def __add_to_iri_dict(self, in_prefix):
"""INTERNAL function: Adds unique key value pairs to dict
@@ -143,20 +151,20 @@ def __add_to_iri_dict(self, in_prefix):
@in_prefix - an input prefix triple
"""
if self.iri_dict is None:
- self.iri_dict={}
+ self.iri_dict = {}
iri_dict = self.iri_dict
- t_split=in_prefix.split()
- #Taking the ':' off the end of the key
- k=t_split[1][:-1]
- v=t_split[2]
+ t_split = in_prefix.split()
+ # Taking the ':' off the end of the key
+ k = t_split[1][:-1]
+ v = t_split[2]
if k in iri_dict.keys():
- #_logger.debug(f"'{k}' already exists")
- if iri_dict[k]!=v:
+ # _logger.debug(f"'{k}' already exists")
+ if iri_dict[k] != v:
_logger.error(f"Mismatched values:\t{iri_dict[k]}!={v}")
sys.exit()
else:
- iri_dict[k]=v
+ iri_dict[k] = v
def __process_DatatypePropertiesHelper(self, in_file=None):
"""
@@ -166,33 +174,33 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
graph = rdflib.Graph()
graph.parse(in_file, format="turtle")
"Make sure to do an itter that looks for rdflib.OWL.class"
- #If we cannot find rdf range, skip
- #If rdf range is a blank node, skip
- for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
+ # If we cannot find rdf range, skip
+ # If rdf range is a blank node, skip
+ for triple in graph.triples((None, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)):
dtp_obj = DatatypePropertyInfo()
_logger.debug(triple)
_logger.debug(triple[0].split('/'))
- s_triple=triple[0].split('/')
- root=s_triple[-1]
- ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
- #print(ns_prefix, root)
- dtp_obj.ns_prefix=ns_prefix
- dtp_obj.root_property_name=root
- for triple2 in graph.triples((triple[0],rdflib.RDFS.range, None)):
- #Testing for Blank Nodes
- if isinstance(triple2[-1],rdflib.term.BNode):
+ s_triple = triple[0].split('/')
+ root = s_triple[-1]
+ ns_prefix = f"{s_triple[-3]}-{s_triple[-2]}"
+ # print(ns_prefix, root)
+ dtp_obj.ns_prefix = ns_prefix
+ dtp_obj.root_property_name = root
+ for triple2 in graph.triples((triple[0], rdflib.RDFS.range, None)):
+ # Testing for Blank Nodes
+ if isinstance(triple2[-1], rdflib.term.BNode):
_logger.debug(f"\tBlank: {triple2}\n")
continue
_logger.debug(f"\ttriple2: f{triple2}\n")
rdf_rang_str = str(triple2[-1].n3(graph.namespace_manager))
- dtp_obj.prefixed_datatype_name=rdf_rang_str
- #if str(rdf_rang_str) not in test_list:
- # test_list.append(rdf_rang_str)
+ dtp_obj.prefixed_datatype_name = rdf_rang_str
+ # if str(rdf_rang_str) not in test_list:
+ # test_list.append(rdf_rang_str)
- for sh_triple in graph.triples((None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
+ for sh_triple in graph.triples((None, rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
_logger.debug(f"\t\t**sh_triple:{sh_triple}")
- dtp_obj.shacl_property_bnode=sh_triple[0]
- for sh_triple2 in graph.triples((dtp_obj.shacl_property_bnode,rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
+ dtp_obj.shacl_property_bnode = sh_triple[0]
+ for sh_triple2 in graph.triples((dtp_obj.shacl_property_bnode, rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
_logger.debug(f"\t\t***sh_triple:{sh_triple2}")
_logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
if int(sh_triple2[2]) <= 1:
@@ -202,13 +210,11 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
else:
_logger.debug(f"\t\t\t***Large max_count: {sh_triple2[2]}")
-
-
if root in self.datatype_properties_dict.keys():
_logger.debug(f"None Unique Entry Found:\t {ns_prefix}:{root}")
self.datatype_properties_dict[root].append(dtp_obj)
else:
- self.datatype_properties_dict[root]=[dtp_obj]
+ self.datatype_properties_dict[root] = [dtp_obj]
return
def process_DatatypeProperties(self):
@@ -222,54 +228,39 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
"""
graph = rdflib.Graph()
graph.parse(in_file, format="turtle")
- #Make sure to do an iter that looks for rdflib.OWL.class"
- #If we cannot find rdf range, skip
- #If rdf range is a blank node, skip
- for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.ObjectProperty)):
+ # Make sure to do an iter that looks for rdflib.OWL.class"
+ # If we cannot find rdf range, skip
+ # If rdf range is a blank node, skip
+ for triple in graph.triples((None, rdflib.RDF.type, rdflib.OWL.ObjectProperty)):
op_obj = ObjectPropertyInfo()
_logger.debug((triple))
- #print(triple[0].split('/'))
- s_triple=triple[0].split('/')
- root=s_triple[-1]
- ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
- #print(ns_prefix, root)
- op_obj.ns_prefix=ns_prefix
- op_obj.root_class_name=root
-
- for sh_triple in graph.triples((None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
+ # print(triple[0].split('/'))
+ s_triple = triple[0].split('/')
+ root = s_triple[-1]
+ ns_prefix = f"{s_triple[-3]}-{s_triple[-2]}"
+ # print(ns_prefix, root)
+ op_obj.ns_prefix = ns_prefix
+ op_obj.root_class_name = root
+
+ for sh_triple in graph.triples((None, rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
_logger.debug(f"\t**obj_sh_triple:{sh_triple}")
- op_obj.shacl_property_bnode=sh_triple[0]
- for sh_triple2 in graph.triples((op_obj.shacl_property_bnode,rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
+ op_obj.shacl_property_bnode = sh_triple[0]
+ for sh_triple2 in graph.triples((op_obj.shacl_property_bnode, rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
_logger.debug(f"\t\t***sh_triple:{sh_triple2}")
_logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
if int(sh_triple2[2]) <= 1:
if op_obj.shacl_count_lte_1 is not None:
_logger.debug(f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
- #print("\t\t**MaxCount Double Definition?")
+ # print("\t\t**MaxCount Double Definition?")
op_obj.shacl_count_lte_1 = True
else:
_logger.debug(f"\t\t\t***Large max_count: {sh_triple2[2]}")
-
- #for sh_triple in graph.triples((triple[0],rdflib.sh.property, None)):
- # print(f"**sh_triple:{sh_triple}")
-
- #for triple2 in graph.triples((triple[0],rdflib.RDFS.range, None)):
- # #Testing for Blank Nodes
- # if isinstance(triple2[-1],rdflib.term.BNode):
- # continue
- # rdf_rang_str = str(triple2[-1].n3(graph.namespace_manager))
- # print(f"\t{rdf_rang_str}")
- # op_obj.prefixed_datatype_name=rdf_rang_str
- # #if str(rdf_rang_str) not in test_list:
- # # test_list.append(rdf_rang_str)
-
-
if root in self.object_properties_dict.keys():
_logger.debug(f"None Unique Entry Found:\t {ns_prefix}:{root}")
self.object_properties_dict[root].append(op_obj)
else:
- self.object_properties_dict[root]=[op_obj]
+ self.object_properties_dict[root] = [op_obj]
return
def process_ObjectProperties(self):
@@ -287,95 +278,93 @@ def process_prefixes(self):
sys.exit()
for ttl_file in ttl_file_list:
- with open(ttl_file,'r') as file:
+ with open(ttl_file, 'r') as file:
for line in file:
- if re.search("^\@prefix",line):
- #_logger.debug(line.strip())
+ if re.search("^\@prefix", line):
+ # _logger.debug(line.strip())
self.__add_to_iri_dict(in_prefix=line.strip())
- def print_minimal_datatype_properties(self)->str:
+ def print_minimal_datatype_properties(self) -> str:
"""Prints DataType Properties in a format suitable for the contect"""
- dtp_str_sect=""
+ dtp_str_sect = ""
dt_list = list(self.datatype_properties_dict.keys())
dt_list.sort()
last_dtp_obj = self.datatype_properties_dict[dt_list[-1]][-1]
for key in dt_list:
- #if len(cb.datatype_properties_dict[key]) > 1:
+ # if len(cb.datatype_properties_dict[key]) > 1:
for dtp_obj in self.datatype_properties_dict[key]:
- con_str=f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
- con_str+=f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
+ con_str = f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
+ con_str += f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
if (dtp_obj.prefixed_datatype_name is not None):
- con_str+=",\n"
- con_str+=f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
+ con_str += ",\n"
+ con_str += f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
else:
- con_str+="\n"
+ con_str += "\n"
if dtp_obj != last_dtp_obj:
- con_str+="},\n"
+ con_str += "},\n"
else:
- con_str+="}\n"
- #print(dtp_obj.root_property_name)
- #print(con_str)
- dtp_str_sect+=con_str
- #print(dtp_str_sect)
- return(dtp_str_sect)
+ con_str += "}\n"
+ # print(dtp_obj.root_property_name)
+ # print(con_str)
+ dtp_str_sect += con_str
+ # print(dtp_str_sect)
+ return dtp_str_sect
def add_minimal_datatype_props_to_cntxt(self) -> None:
"""Adds Datatype Properties to context string"""
- dtp_str_sect=""
+ dtp_str_sect = ""
dt_list = list(self.datatype_properties_dict.keys())
dt_list.sort()
- #last_dtp_obj = self.datatype_properties_dict[dt_list[-1]][-1]
+ # last_dtp_obj = self.datatype_properties_dict[dt_list[-1]][-1]
for key in dt_list:
for dtp_obj in self.datatype_properties_dict[key]:
- con_str=f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
- con_str+=f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
+ con_str = f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
+ con_str += f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
if (dtp_obj.prefixed_datatype_name is not None):
- con_str+=",\n"
- con_str+=f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
+ con_str += ",\n"
+ con_str += f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
else:
- con_str+="\n"
- con_str+="},\n"
-
- dtp_str_sect+=con_str
+ con_str += "\n"
+ con_str += "},\n"
- self.context_str+=dtp_str_sect
+ dtp_str_sect += con_str
+
+ self.context_str += dtp_str_sect
def add_minimal_object_props_to_cntxt(self) -> None:
"""Adds Object Properties to context string"""
- op_str_sect=""
+ op_str_sect = ""
op_list = list(self.object_properties_dict.keys())
op_list.sort()
for key in op_list:
for op_obj in self.object_properties_dict[key]:
- con_str=f"\"{op_obj.ns_prefix}:{op_obj.root_class_name}\":{{\n"
- con_str+="\t\"@type\":\"@id\""
+ con_str = f"\"{op_obj.ns_prefix}:{op_obj.root_class_name}\":{{\n"
+ con_str += "\t\"@type\":\"@id\""
if op_obj.shacl_count_lte_1 is not True:
- con_str+=",\n\t\"@container\":\"@set\"\n"
+ con_str += ",\n\t\"@container\":\"@set\"\n"
else:
- con_str+="\n"
+ con_str += "\n"
- con_str+="},\n"
+ con_str += "},\n"
- op_str_sect+=con_str
- self.context_str+=op_str_sect
+ op_str_sect += con_str
+ self.context_str += op_str_sect
def add_key_strings_to_cntxt(self) -> None:
"""Adds id, type, and graph key strings to context string"""
- ks_str=""
- ks_str+="\t\"uco-core:id\":\"@id\",\n"
- ks_str+="\t\"uco-core:type\":\"@type\",\n"
- ks_str+="\t\"value\":\"@value\",\n"
- ks_str+="\t\"graph\":\"@graph\",\n"
-
- self.context_str+=ks_str
-
+ ks_str = ""
+ ks_str += "\t\"uco-core:id\":\"@id\",\n"
+ ks_str += "\t\"uco-core:type\":\"@type\",\n"
+ ks_str += "\t\"value\":\"@value\",\n"
+ ks_str += "\t\"graph\":\"@graph\",\n"
+ self.context_str += ks_str
def main():
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument('--debug', action="store_true")
- #argument_parser.add_argument('-i', '--in_graph', help="Input graph to be simplified")
+ # argument_parser.add_argument('-i', '--in_graph', help="Input graph to be simplified")
argument_parser.add_argument('-o', '--output', help="Output file for context")
args = argument_parser.parse_args()
@@ -385,12 +374,11 @@ def main():
out_f = None
if args.output is not None:
- out_f = open(args.output,'w')
-
+ out_f = open(args.output, 'w')
+
cb = ContextBuilder()
for i in (cb.get_ttl_files(subdirs=['ontology'])):
_logger.debug(f" Input ttl: {i}")
-
cb.process_prefixes()
cb.process_DatatypeProperties()
@@ -402,115 +390,71 @@ def main():
cb.add_key_strings_to_cntxt()
cb.close_context_str()
- #return
- #print(cb.context_str)
if out_f is not None:
out_f.write(cb.context_str)
out_f.flush()
out_f.close()
-
-
+ else:
+ print(cb.context_str)
return
+ # Testing
+ graph = rdflib.Graph()
+ graph.parse("../tests/uco_monolithic.ttl", format="turtle")
+ graph.serialize("_uco_monolithic.json-ld", format="json-ld")
+ graph2 = rdflib.ConjunctiveGraph()
+ import json
+ tmp_c = json.loads(cb.context_str)
+ # graph2.parse("_uco_monolithic.json-ld", format="json-ld", context_data=tmp_c)
+ # graph2.parse("../tests/uco_monolithic.ttl", format="turtle", context_data=tmp_c)
+ graph2.parse("../tests/uco_monolithic.ttl", format="turtle")
+ # graph.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=False)
+ # graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
+ graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
+ # graph2.serialize("__uco_monolithic.json-ld", format="json-ld", auto_compact=True)
+
+ # for triple in graph.triples((None,None,rdflib.OWL.Class)):
+ # # print(triple[0].fragment)
+ # print(triple[0].n3(graph.namespace_manager))
+ # print(f"\t{triple}")
+
+ return
graph = rdflib.Graph()
graph.parse("../tests/uco_monolithic.ttl", format="turtle")
"Make sure to do an itter that looks for rdflib.OWL.class"
limit = 10000
count = 0
- #for triple in graph.triples((None,None,rdflib.OWL.Class)):
- #for sh_triple in graph.triples(None,"rdflib.term.URIRef('http://www.w3.org/ns/shacl#property')", None):
- #for sh_triple in graph.triples(None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None):
+ # for triple in graph.triples((None,None,rdflib.OWL.Class)):
+ # for sh_triple in graph.triples(None,"rdflib.term.URIRef('http://www.w3.org/ns/shacl#property')", None):
+ # for sh_triple in graph.triples(None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None):
print("###")
- for sh_triple in graph.triples((None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None)):
+ for sh_triple in graph.triples((None, rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None)):
print(f"**sh_triple:{sh_triple}")
print("###")
- for triple in graph.triples((None,None,None)):
- #print(triple[0].fragment)
- #print(triple[0].n3(graph.namespace_manager))
+ for triple in graph.triples((None ,None, None)):
+ # print(triple[0].fragment)
+ # print(triple[0].n3(graph.namespace_manager))
print(triple)
sh_prop_node = None
- for sh_triple in graph.triples((triple[0],rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None)):
+ for sh_triple in graph.triples((triple[0], rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None)):
print(f"\t**sh_triple:{sh_triple[2].n3(graph.namespace_manager)}")
sh_prop_node = sh_triple[2]
- for triple3 in graph.triples((sh_prop_node,rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'),None)):
+ for triple3 in graph.triples((sh_prop_node, rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
print(f"\t***sh_prop_triple:{triple3}")
print(f"\t***sh_prop_triple:{triple3[2]}")
- #for t in list(triple):
- # print(f"{t.n3(graph.namespace_manager)}")
- #print(triple)
+ # for t in list(triple):
+ # print(f"{t.n3(graph.namespace_manager)}")
+ # print(triple)
count += 1
if count >= limit:
sys.exit()
return
- dt_list = list(cb.datatype_properties_dict.keys())
- dt_list.sort()
- last_dtp_obj = cb.datatype_properties_dict[dt_list[-1]][-1]
- for key in dt_list:
- #if len(cb.datatype_properties_dict[key]) > 1:
- for dtp_obj in cb.datatype_properties_dict[key]:
- con_str=f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
- con_str+=f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
- if (dtp_obj.prefixed_datatype_name is not None):
- con_str+=",\n"
- con_str+=f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
- else:
- con_str+="\n"
- if dtp_obj != last_dtp_obj:
- con_str+="},\n"
- else:
- con_str+="}\n"
- print(dtp_obj.root_property_name)
- print(con_str)
- #else:
- # dtp_obj = cb.datatype_properties_dict[key][0]
- # con_str=f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
- # con_str+=f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\"\n"
- # con_str+=f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
- # con_str+="}"
-
-
- """Come back to this for concise output""
- for key in dt_list:
- #Non-unique roots
- if len(cb.datatype_properties_dict[key]) > 1:
- print(f"{key}:{cb.datatype_properties_dict[key]}")
- for ns in cb.datatype_properties_dict[key]:
- con_str=f"\"{ns}:{key}\":{{"
- con_str+="\n\t\"@id\":\"%s:%s\"," % (ns,key)
- con_str+="\n\t\"@type\":\"@id\""
- con_str+="\n\t},"
- print(con_str)
- #Unique roots
- else:
- pass
- """
- return
- #from pprint import pprint
- #pprint(cb.datatype_properties_dict)
- #context keyword in graph parse and graph serialize
- #black formater FLAKE8 for isort
- #check the case-uilities python
-
- graph = rdflib.Graph()
- graph.parse("../tests/uco_monolithic.ttl", format="turtle")
- "Make sure to do an itter that looks for rdflib.OWL.class"
- limit = 4
- count = 0
- for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
- print(triple[0].fragment)
- print(triple[0].n3(graph.namespace_manager))
- print(triple)
- count += 1
- if count >= limit:
- sys.exit()
-
- #print(f"{args.in_graph}")
- #g = rdflib.Graph()
- #g.parse(args.in_graph, format="turtle")
- #g.serialize("temp.json-ld", format="json-ld")
+ # TODO: context keyword in graph parse and graph serialize
+ # TODO: black formater FLAKE8 for isort
+ # TODO: check the case-uilities python
if __name__ == "__main__":
From a278da61636c88153af5e1dc40f25b7517cdec71 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 9 Aug 2022 09:59:20 -0400
Subject: [PATCH 09/47] Bug Fixes Changes how prefixes are parsed Changes key
terms at end of context Changes how shacl nodes are found
---
src/uco_jsonld_context_builder.py | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 0afbf844..8163c1f6 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -126,7 +126,7 @@ def get_ttl_files(self, subdirs=[]) -> list:
return self.ttl_file_list
- def get_iris(self) -> list:
+ def get_iris(self) -> typing.List[str]:
"""
Returns sorted list of IRIs as prefix:value strings
"""
@@ -158,6 +158,7 @@ def __add_to_iri_dict(self, in_prefix):
# Taking the ':' off the end of the key
k = t_split[1][:-1]
v = t_split[2]
+ v=v.strip()[1:-1]
if k in iri_dict.keys():
# _logger.debug(f"'{k}' already exists")
if iri_dict[k] != v:
@@ -197,10 +198,10 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
# if str(rdf_rang_str) not in test_list:
# test_list.append(rdf_rang_str)
- for sh_triple in graph.triples((None, rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
+ for sh_triple in graph.triples((None, rdflib.SH.path, triple[0])):
_logger.debug(f"\t\t**sh_triple:{sh_triple}")
dtp_obj.shacl_property_bnode = sh_triple[0]
- for sh_triple2 in graph.triples((dtp_obj.shacl_property_bnode, rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
+ for sh_triple2 in graph.triples((dtp_obj.shacl_property_bnode, rdflib.SH.maxCount, None)):
_logger.debug(f"\t\t***sh_triple:{sh_triple2}")
_logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
if int(sh_triple2[2]) <= 1:
@@ -242,10 +243,10 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
op_obj.ns_prefix = ns_prefix
op_obj.root_class_name = root
- for sh_triple in graph.triples((None, rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'), triple[0])):
+ for sh_triple in graph.triples((None, rdflib.SH.path, triple[0])):
_logger.debug(f"\t**obj_sh_triple:{sh_triple}")
op_obj.shacl_property_bnode = sh_triple[0]
- for sh_triple2 in graph.triples((op_obj.shacl_property_bnode, rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
+ for sh_triple2 in graph.triples((op_obj.shacl_property_bnode, rdflib.SH.maxCount, None)):
_logger.debug(f"\t\t***sh_triple:{sh_triple2}")
_logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
if int(sh_triple2[2]) <= 1:
@@ -353,8 +354,8 @@ def add_minimal_object_props_to_cntxt(self) -> None:
def add_key_strings_to_cntxt(self) -> None:
"""Adds id, type, and graph key strings to context string"""
ks_str = ""
- ks_str += "\t\"uco-core:id\":\"@id\",\n"
- ks_str += "\t\"uco-core:type\":\"@type\",\n"
+ ks_str += "\t\"id\":\"@id\",\n"
+ ks_str += "\t\"type\":\"@type\",\n"
ks_str += "\t\"value\":\"@value\",\n"
ks_str += "\t\"graph\":\"@graph\",\n"
@@ -396,23 +397,23 @@ def main():
out_f.close()
else:
print(cb.context_str)
- return
+ #return
# Testing
graph = rdflib.Graph()
graph.parse("../tests/uco_monolithic.ttl", format="turtle")
graph.serialize("_uco_monolithic.json-ld", format="json-ld")
- graph2 = rdflib.ConjunctiveGraph()
+ graph2 = rdflib.Graph()
import json
tmp_c = json.loads(cb.context_str)
- # graph2.parse("_uco_monolithic.json-ld", format="json-ld", context_data=tmp_c)
+ graph2.parse("_uco_monolithic.json-ld", format="json-ld", context_data=tmp_c)
# graph2.parse("../tests/uco_monolithic.ttl", format="turtle", context_data=tmp_c)
- graph2.parse("../tests/uco_monolithic.ttl", format="turtle")
+ # graph2.parse("../tests/uco_monolithic.ttl", format="turtle")
# graph.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=False)
# graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
# graph2.serialize("__uco_monolithic.json-ld", format="json-ld", auto_compact=True)
-
+ return
# for triple in graph.triples((None,None,rdflib.OWL.Class)):
# # print(triple[0].fragment)
# print(triple[0].n3(graph.namespace_manager))
From b2742c17c36a1620314947809beb793ad8b5ac0d Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 9 Aug 2022 12:13:38 -0400
Subject: [PATCH 10/47] Adding initial testing for issue #423
---
.../action_result_NO_CONTEXT.json | 93 +++++++++++++++++++
tests/context_builder/context_tester.py | 29 ++++++
2 files changed, 122 insertions(+)
create mode 100644 tests/context_builder/action_result_NO_CONTEXT.json
create mode 100644 tests/context_builder/context_tester.py
diff --git a/tests/context_builder/action_result_NO_CONTEXT.json b/tests/context_builder/action_result_NO_CONTEXT.json
new file mode 100644
index 00000000..c6cd83ba
--- /dev/null
+++ b/tests/context_builder/action_result_NO_CONTEXT.json
@@ -0,0 +1,93 @@
+{
+ "@graph": [
+ {
+ "@id": "kb:action-1",
+ "@type": "action:Action",
+ "rdfs:comment": "This node is some action that has some ObservableObjects as results. By the ontology, the results need to be some UcoObject or subclass of UcoObject. They are serialized here as ObservableObjects, and are redundantly assigned types of some of their superclasses. For completeness-tracking, let the id slug's number be a binary number tracking which superclasses are present, 2^0=core:UcoObject, 2^1=core:Item, 2^2=observable:Observable.",
+ "action:result": [
+ {
+ "@id": "kb:node-0"
+ },
+ {
+ "@id": "kb:node-1"
+ },
+ {
+ "@id": "kb:node-2"
+ },
+ {
+ "@id": "kb:node-3"
+ },
+ {
+ "@id": "kb:node-4"
+ },
+ {
+ "@id": "kb:node-5"
+ },
+ {
+ "@id": "kb:node-6"
+ },
+ {
+ "@id": "kb:node-7"
+ }
+ ]
+ },
+ {
+ "@id": "kb:node-0",
+ "@type": "observable:ObservableObject"
+ },
+ {
+ "@id": "kb:node-1",
+ "@type": [
+ "core:UcoObject",
+ "observable:ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-2",
+ "@type": [
+ "core:Item",
+ "observable:ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-3",
+ "@type": [
+ "core:UcoObject",
+ "core:Item",
+ "observable:ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-4",
+ "@type": [
+ "observable:Observable",
+ "observable:ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-5",
+ "@type": [
+ "core:UcoObject",
+ "observable:Observable",
+ "observable:ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-6",
+ "@type": [
+ "core:Item",
+ "observable:Observable",
+ "observable:ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-7",
+ "@type": [
+ "core:UcoObject",
+ "core:Item",
+ "observable:Observable",
+ "observable:ObservableObject"
+ ]
+ }
+ ]
+}
diff --git a/tests/context_builder/context_tester.py b/tests/context_builder/context_tester.py
new file mode 100644
index 00000000..2a84c7f7
--- /dev/null
+++ b/tests/context_builder/context_tester.py
@@ -0,0 +1,29 @@
+#!python
+
+import json
+import rdflib
+import sys
+import subprocess
+
+test_file = "action_result_NO_CONTEXT.json"
+output_file = "temp_cntxt.json"
+# Execute Context builder
+cmd = "python ../../src/uco_jsonld_context_builder.py --output " + output_file
+print(cmd)
+subprocess.run(cmd.split())
+with open(output_file, 'r') as file:
+ tmp_c = json.load(file)
+# print(tmp_c)
+graph = rdflib.Graph()
+graph.parse(test_file, format="json-ld")
+graph.serialize("test_out_no_cntxt.json-ld", format="json-ld")
+graph2 = rdflib.Graph()
+graph2.parse(test_file, format="json-ld", context_data=tmp_c)
+graph.serialize("test_out_cntxt.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
+# graph2.parse("../tests/uco_monolithic.ttl", format="turtle", context_data=tmp_c)
+# graph2.parse("../tests/uco_monolithic.ttl", format="turtle")
+# graph.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=False)
+# graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
+#graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
+# graph2.serialize("__uco_monolithic.json-ld", format="json-ld", auto_compact=True)
+sys.exit()
From 6aa1898ac4723cc549f60fada758d19ea43a23e9 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 9 Aug 2022 12:54:47 -0400
Subject: [PATCH 11/47] Removing testing logic from main context builder script
---
src/uco_jsonld_context_builder.py | 72 +++----------------------
tests/context_builder/context_tester.py | 28 ++++++----
2 files changed, 26 insertions(+), 74 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 8163c1f6..bb47cb14 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -76,7 +76,7 @@ def close_context_str(self) -> None:
self.context_str = self.context_str[:-1]
self.context_str += "\n\t}\n}"
- def get_ttl_files(self, subdirs=[]) -> list:
+ def get_ttl_files(self, subdirs=[]) -> typing.List[pathlib.Path]:
"""
Finds all turtle (.ttl) files in directory structure
@subdirs - Optional list used to restrict search to particular
@@ -99,8 +99,6 @@ def get_ttl_files(self, subdirs=[]) -> list:
# Path.glob returns dot files, unlike shell's glob.
# The uco.ttl file is also skipped because the Python output removes
# supplementary prefix statements.
- ontology_filepaths : typing.List[pathlib.Path] = []
-
file_list = []
_logger.debug(top_srcdir)
@@ -158,7 +156,8 @@ def __add_to_iri_dict(self, in_prefix):
# Taking the ':' off the end of the key
k = t_split[1][:-1]
v = t_split[2]
- v=v.strip()[1:-1]
+ # Taking the angle brackets of the IRIs
+ v = v.strip()[1:-1]
if k in iri_dict.keys():
# _logger.debug(f"'{k}' already exists")
if iri_dict[k] != v:
@@ -217,7 +216,7 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
else:
self.datatype_properties_dict[root] = [dtp_obj]
return
-
+
def process_DatatypeProperties(self):
for ttl_file in self.ttl_file_list:
self.__process_DatatypePropertiesHelper(in_file=ttl_file)
@@ -252,18 +251,17 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
if int(sh_triple2[2]) <= 1:
if op_obj.shacl_count_lte_1 is not None:
_logger.debug(f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
- # print("\t\t**MaxCount Double Definition?")
op_obj.shacl_count_lte_1 = True
else:
_logger.debug(f"\t\t\t***Large max_count: {sh_triple2[2]}")
-
+
if root in self.object_properties_dict.keys():
_logger.debug(f"None Unique Entry Found:\t {ns_prefix}:{root}")
self.object_properties_dict[root].append(op_obj)
else:
self.object_properties_dict[root] = [op_obj]
return
-
+
def process_ObjectProperties(self):
for ttl_file in self.ttl_file_list:
self.__process_ObjectPropertiesHelper(in_file=ttl_file)
@@ -281,8 +279,8 @@ def process_prefixes(self):
for ttl_file in ttl_file_list:
with open(ttl_file, 'r') as file:
for line in file:
- if re.search("^\@prefix", line):
- # _logger.debug(line.strip())
+ if re.search("^@prefix", line):
+ _logger.debug(f"Prefix: {ttl_file}\t{line.strip()}")
self.__add_to_iri_dict(in_prefix=line.strip())
def print_minimal_datatype_properties(self) -> str:
@@ -397,60 +395,6 @@ def main():
out_f.close()
else:
print(cb.context_str)
- #return
-
- # Testing
- graph = rdflib.Graph()
- graph.parse("../tests/uco_monolithic.ttl", format="turtle")
- graph.serialize("_uco_monolithic.json-ld", format="json-ld")
- graph2 = rdflib.Graph()
- import json
- tmp_c = json.loads(cb.context_str)
- graph2.parse("_uco_monolithic.json-ld", format="json-ld", context_data=tmp_c)
- # graph2.parse("../tests/uco_monolithic.ttl", format="turtle", context_data=tmp_c)
- # graph2.parse("../tests/uco_monolithic.ttl", format="turtle")
- # graph.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=False)
- # graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
- graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
- # graph2.serialize("__uco_monolithic.json-ld", format="json-ld", auto_compact=True)
- return
- # for triple in graph.triples((None,None,rdflib.OWL.Class)):
- # # print(triple[0].fragment)
- # print(triple[0].n3(graph.namespace_manager))
- # print(f"\t{triple}")
-
- return
- graph = rdflib.Graph()
- graph.parse("../tests/uco_monolithic.ttl", format="turtle")
- "Make sure to do an itter that looks for rdflib.OWL.class"
- limit = 10000
- count = 0
- # for triple in graph.triples((None,None,rdflib.OWL.Class)):
- # for sh_triple in graph.triples(None,"rdflib.term.URIRef('http://www.w3.org/ns/shacl#property')", None):
- # for sh_triple in graph.triples(None,rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None):
- print("###")
- for sh_triple in graph.triples((None, rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None)):
- print(f"**sh_triple:{sh_triple}")
- print("###")
-
- for triple in graph.triples((None ,None, None)):
- # print(triple[0].fragment)
- # print(triple[0].n3(graph.namespace_manager))
- print(triple)
- sh_prop_node = None
- for sh_triple in graph.triples((triple[0], rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'), None)):
- print(f"\t**sh_triple:{sh_triple[2].n3(graph.namespace_manager)}")
- sh_prop_node = sh_triple[2]
- for triple3 in graph.triples((sh_prop_node, rdflib.term.URIRef('http://www.w3.org/ns/shacl#maxCount'), None)):
- print(f"\t***sh_prop_triple:{triple3}")
- print(f"\t***sh_prop_triple:{triple3[2]}")
-
- # for t in list(triple):
- # print(f"{t.n3(graph.namespace_manager)}")
- # print(triple)
- count += 1
- if count >= limit:
- sys.exit()
return
# TODO: context keyword in graph parse and graph serialize
diff --git a/tests/context_builder/context_tester.py b/tests/context_builder/context_tester.py
index 2a84c7f7..aba38df1 100644
--- a/tests/context_builder/context_tester.py
+++ b/tests/context_builder/context_tester.py
@@ -4,26 +4,34 @@
import rdflib
import sys
import subprocess
+import os
+# Test graph file in JSON format
test_file = "action_result_NO_CONTEXT.json"
-output_file = "temp_cntxt.json"
+# File to which context will be written
+output_file = "_temp_cntxt.json"
+# Serialization of graph without using context
+no_cntxt_out = "_test_out_no_cntxt.json-ld"
+# Serialization of graph using context
+cntxt_out = "_test_out_cntxt.json-ld"
+
# Execute Context builder
cmd = "python ../../src/uco_jsonld_context_builder.py --output " + output_file
print(cmd)
subprocess.run(cmd.split())
+
with open(output_file, 'r') as file:
tmp_c = json.load(file)
-# print(tmp_c)
+
graph = rdflib.Graph()
graph.parse(test_file, format="json-ld")
-graph.serialize("test_out_no_cntxt.json-ld", format="json-ld")
+graph.serialize(no_cntxt_out, format="json-ld")
graph2 = rdflib.Graph()
graph2.parse(test_file, format="json-ld", context_data=tmp_c)
-graph.serialize("test_out_cntxt.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
-# graph2.parse("../tests/uco_monolithic.ttl", format="turtle", context_data=tmp_c)
-# graph2.parse("../tests/uco_monolithic.ttl", format="turtle")
-# graph.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=False)
-# graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
-#graph2.serialize("__uco_monolithic.json-ld", context_data=tmp_c, format="json-ld", auto_compact=True)
-# graph2.serialize("__uco_monolithic.json-ld", format="json-ld", auto_compact=True)
+graph.serialize(cntxt_out, context_data=tmp_c, format="json-ld", auto_compact=True)
+
+# Clean up
+# os.remove(output_file)
+# os.remove(no_cntxt_out)
+# os.remove(cntxt_out)
sys.exit()
From 9ce2a4fd3151bc88ec68ced7c0ed7ee2c72818b2 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Wed, 10 Aug 2022 12:57:55 -0400
Subject: [PATCH 12/47] Adds release statements
---
src/uco_jsonld_context_builder.py | 25 +++++++++++++++++++------
tests/context_builder/context_tester.py | 8 ++++++++
2 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index bb47cb14..e7e4e5c9 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -1,5 +1,10 @@
+#!python
#
-# Release Statement?
+# NOTICE
+# This software was produced for the U.S. Government under contract FA8702-22-C-0001,
+# and is subject to the Rights in Data-General Clause 52.227-14, Alt. IV (DEC 2007)
+# ©2022 The MITRE Corporation. All Rights Reserved.
+# Released under PRS 18-4297.
#
"""
@@ -275,7 +280,7 @@ def process_prefixes(self):
if len(ttl_file_list) < 1:
_logger.error("No ttls files to process")
sys.exit()
-
+
for ttl_file in ttl_file_list:
with open(ttl_file, 'r') as file:
for line in file:
@@ -345,7 +350,7 @@ def add_minimal_object_props_to_cntxt(self) -> None:
con_str += "\n"
con_str += "},\n"
-
+
op_str_sect += con_str
self.context_str += op_str_sect
@@ -363,13 +368,20 @@ def add_key_strings_to_cntxt(self) -> None:
def main():
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument('--debug', action="store_true")
- # argument_parser.add_argument('-i', '--in_graph', help="Input graph to be simplified")
- argument_parser.add_argument('-o', '--output', help="Output file for context")
+ argument_parser.add_argument('--concise', action="store_true",
+ help="Creates a \"concise\" context. This is more compact than the \
+ default behavior which creates a \"minimal\" context")
+ argument_parser.add_argument('-o', '--output', help="Output file for context.\
+ Will print to stdout by default.")
args = argument_parser.parse_args()
+ print(args)
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
+ if (args.concise):
+ logging.error("\tConsice context has not been implemented yet.")
+ sys.exit()
- _logger.debug("Debug Mode enabled")
+ _logger.debug("\t***Debug Mode enabled***")
out_f = None
if args.output is not None:
@@ -395,6 +407,7 @@ def main():
out_f.close()
else:
print(cb.context_str)
+
return
# TODO: context keyword in graph parse and graph serialize
diff --git a/tests/context_builder/context_tester.py b/tests/context_builder/context_tester.py
index aba38df1..e99267f8 100644
--- a/tests/context_builder/context_tester.py
+++ b/tests/context_builder/context_tester.py
@@ -1,4 +1,12 @@
#!python
+#
+# NOTICE
+# This software was produced for the U.S. Government under contract FA8702-22-C-0001,
+# and is subject to the Rights in Data-General Clause 52.227-14, Alt. IV (DEC 2007)
+# ©2022 The MITRE Corporation. All Rights Reserved.
+# Released under PRS 18-4297.
+#
+
import json
import rdflib
From cce55058b2d0e07d989bf9974522661ee53da9fc Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Wed, 10 Aug 2022 14:20:09 -0400
Subject: [PATCH 13/47] Adds support for "concise" version of context to
satisfy req 5 of issue #423
---
src/uco_jsonld_context_builder.py | 70 ++++++++++++++++++--
tests/context_builder/context_tester.py | 87 ++++++++++++++++---------
2 files changed, 119 insertions(+), 38 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index e7e4e5c9..299464ac 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -335,6 +335,35 @@ def add_minimal_datatype_props_to_cntxt(self) -> None:
self.context_str += dtp_str_sect
+ def add_concise_datatype_props_to_cntxt(self) -> None:
+ """Adds Datatype Properties to context string"""
+ dtp_str_sect = ""
+ dtp_list = list(self.datatype_properties_dict.keys())
+ dtp_list.sort()
+ for key in dtp_list:
+ if len(self.datatype_properties_dict[key]) > 1:
+ for dtp_obj in self.datatype_properties_dict[key]:
+ # print(dtp_obj.ns_prefix, key)
+ con_str = f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
+ con_str += "\t\"@type\":\"@id\""
+ if dtp_obj.shacl_count_lte_1 is not True:
+ con_str += ",\n\t\"@container\":\"@set\"\n"
+ else:
+ con_str += "\n"
+ con_str += "},\n"
+ dtp_str_sect += con_str
+ else:
+ for dtp_obj in self.datatype_properties_dict[key]:
+ con_str = f"\"{dtp_obj.root_property_name}\":{{\n"
+ con_str += "\t\"@type\":\"@id\""
+ if dtp_obj.shacl_count_lte_1 is not True:
+ con_str += ",\n\t\"@container\":\"@set\"\n"
+ else:
+ con_str += "\n"
+ con_str += "},\n"
+ dtp_str_sect += con_str
+ self.context_str += dtp_str_sect
+
def add_minimal_object_props_to_cntxt(self) -> None:
"""Adds Object Properties to context string"""
op_str_sect = ""
@@ -353,6 +382,35 @@ def add_minimal_object_props_to_cntxt(self) -> None:
op_str_sect += con_str
self.context_str += op_str_sect
+
+ def add_concise_object_props_to_cntxt(self) -> None:
+ """Adds Object Properties to context string"""
+ op_str_sect = ""
+ op_list = list(self.object_properties_dict.keys())
+ op_list.sort()
+ for key in op_list:
+ if len(self.object_properties_dict[key]) > 1:
+ for op_obj in self.object_properties_dict[key]:
+ # print(op_obj.ns_prefix, op_obj.root_class_name)
+ con_str = f"\"{op_obj.ns_prefix}:{op_obj.root_class_name}\":{{\n"
+ con_str += "\t\"@type\":\"@id\""
+ if op_obj.shacl_count_lte_1 is not True:
+ con_str += ",\n\t\"@container\":\"@set\"\n"
+ else:
+ con_str += "\n"
+ con_str += "},\n"
+ op_str_sect += con_str
+ else:
+ for op_obj in self.object_properties_dict[key]:
+ con_str = f"\"{op_obj.root_class_name}\":{{\n"
+ con_str += "\t\"@type\":\"@id\""
+ if op_obj.shacl_count_lte_1 is not True:
+ con_str += ",\n\t\"@container\":\"@set\"\n"
+ else:
+ con_str += "\n"
+ con_str += "},\n"
+ op_str_sect += con_str
+ self.context_str += op_str_sect
def add_key_strings_to_cntxt(self) -> None:
"""Adds id, type, and graph key strings to context string"""
@@ -375,11 +433,7 @@ def main():
Will print to stdout by default.")
args = argument_parser.parse_args()
- print(args)
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
- if (args.concise):
- logging.error("\tConsice context has not been implemented yet.")
- sys.exit()
_logger.debug("\t***Debug Mode enabled***")
@@ -396,8 +450,12 @@ def main():
cb.process_ObjectProperties()
cb.init_context_str()
cb.add_prefixes_to_cntxt()
- cb.add_minimal_object_props_to_cntxt()
- cb.add_minimal_datatype_props_to_cntxt()
+ if args.concise:
+ cb.add_concise_object_props_to_cntxt()
+ cb.add_concise_datatype_props_to_cntxt()
+ else:
+ cb.add_minimal_object_props_to_cntxt()
+ cb.add_minimal_datatype_props_to_cntxt()
cb.add_key_strings_to_cntxt()
cb.close_context_str()
diff --git a/tests/context_builder/context_tester.py b/tests/context_builder/context_tester.py
index e99267f8..4eccd4cb 100644
--- a/tests/context_builder/context_tester.py
+++ b/tests/context_builder/context_tester.py
@@ -1,45 +1,68 @@
#!python
#
# NOTICE
-# This software was produced for the U.S. Government under contract FA8702-22-C-0001,
-# and is subject to the Rights in Data-General Clause 52.227-14, Alt. IV (DEC 2007)
+# This software was produced for the U.S. Government under contract
+# FA8702-22-C-0001, and is subject to the Rights in Data-General
+# Clause 52.227-14, Alt. IV (DEC 2007)
+#
# ©2022 The MITRE Corporation. All Rights Reserved.
# Released under PRS 18-4297.
#
+import argparse
import json
import rdflib
-import sys
import subprocess
import os
-# Test graph file in JSON format
-test_file = "action_result_NO_CONTEXT.json"
-# File to which context will be written
-output_file = "_temp_cntxt.json"
-# Serialization of graph without using context
-no_cntxt_out = "_test_out_no_cntxt.json-ld"
-# Serialization of graph using context
-cntxt_out = "_test_out_cntxt.json-ld"
-
-# Execute Context builder
-cmd = "python ../../src/uco_jsonld_context_builder.py --output " + output_file
-print(cmd)
-subprocess.run(cmd.split())
-
-with open(output_file, 'r') as file:
- tmp_c = json.load(file)
-
-graph = rdflib.Graph()
-graph.parse(test_file, format="json-ld")
-graph.serialize(no_cntxt_out, format="json-ld")
-graph2 = rdflib.Graph()
-graph2.parse(test_file, format="json-ld", context_data=tmp_c)
-graph.serialize(cntxt_out, context_data=tmp_c, format="json-ld", auto_compact=True)
-
-# Clean up
-# os.remove(output_file)
-# os.remove(no_cntxt_out)
-# os.remove(cntxt_out)
-sys.exit()
+
+def main():
+
+ arg_parser = argparse.ArgumentParser()
+ arg_parser.add_argument("--skip-clean", action="store_true",
+ help="Keeps intermediate test files instead of \
+ automatic deletion")
+ arg_parser.add_argument('--concise', action="store_true",
+ help="Perform testing on \"concise\" context instead of \"minimal\"")
+ args = arg_parser.parse_args()
+ print(args)
+
+ # Test graph file in JSON format
+ test_file = "action_result_NO_CONTEXT.json"
+ # File to which context will be written
+ output_file = "_temp_cntxt.json"
+ # Serialization of graph without using context
+ no_cntxt_out = "_test_out_no_cntxt.json-ld"
+ # Serialization of graph using context
+ cntxt_out = "_test_out_cntxt.json-ld"
+ # Execute Context builder
+ if args.concise:
+ cmd = "python ../../src/uco_jsonld_context_builder.py\
+ --concise --output " + output_file
+ else:
+ cmd = "python ../../src/uco_jsonld_context_builder.py\
+ --output " + output_file
+
+ print(cmd)
+ subprocess.run(cmd.split())
+ with open(output_file, 'r') as file:
+ tmp_c = json.load(file)
+ graph = rdflib.Graph()
+ graph.parse(test_file, format="json-ld")
+ graph.serialize(no_cntxt_out, format="json-ld")
+ graph2 = rdflib.Graph()
+ graph2.parse(test_file, format="json-ld", context_data=tmp_c)
+ graph.serialize(cntxt_out, context_data=tmp_c,
+ format="json-ld", auto_compact=True)
+
+ # Clean up
+ if not args.skip_clean:
+ os.remove(output_file)
+ os.remove(no_cntxt_out)
+ os.remove(cntxt_out)
+ return
+
+
+if __name__ == '__main__':
+ main()
From 735e0899f9dbead5c0d3e1266b192e8472cb1ceb Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Wed, 10 Aug 2022 16:25:11 -0400
Subject: [PATCH 14/47] Moving json string building from context building class
to object type classes
---
src/uco_jsonld_context_builder.py | 133 ++++++++++++------------------
1 file changed, 51 insertions(+), 82 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 299464ac..c3367419 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -46,6 +46,27 @@ def __init__(self):
self.root_class_name = None
self.shacl_count_lte_1 = None
self.shacl_property_bnode = None
+
+ def __get_json(self, hdr) -> str:
+ json_str = hdr
+ json_str += "\t\"@type\":\"@id\""
+ if self.shacl_count_lte_1 is not True:
+ json_str += ",\n\t\"@container\":\"@set\"\n"
+ else:
+ json_str += "\n"
+
+ json_str += "},\n"
+ return json_str
+
+ def get_minimal_json(self) -> str:
+ hdr_str = f"\"{self.ns_prefix}:{self.root_class_name}\":{{\n"
+ json_str = self.__get_json(hdr=hdr_str)
+ return json_str
+
+ def get_concise_json(self) -> str:
+ hdr_str = f"\"{self.root_class_name}\":{{\n"
+ json_str = self.__get_json(hdr=hdr_str)
+ return json_str
class DatatypePropertyInfo:
@@ -58,6 +79,30 @@ def __init__(self):
self.shacl_count_lte_1 = None
self.shacl_property_bnode = None
+ def __get_json(self, hdr) -> str:
+ json_str = hdr
+ json_str += \
+ f"\t\"@id\":\"{self.ns_prefix}:{self.root_property_name}\""
+ if (self.prefixed_datatype_name is not None):
+ json_str += ",\n"
+ json_str += f"\t\"@type\":\"{self.prefixed_datatype_name}\""
+ if self.shacl_count_lte_1 is not True:
+ json_str += ",\n\t\"@container\":\"@set\"\n"
+ else:
+ json_str += "\n"
+ json_str += "},\n"
+ return json_str
+
+ def get_minimal_json(self) -> str:
+ hdr_str = f"\"{self.ns_prefix}:{self.root_property_name}\":{{\n"
+ json_str = self.__get_json(hdr=hdr_str)
+ return json_str
+
+ def get_concise_json(self) -> str:
+ hdr_str = f"\"{self.root_property_name}\":{{\n"
+ json_str = self.__get_json(hdr=hdr_str)
+ return json_str
+
class ContextBuilder:
def __init__(self):
@@ -199,8 +244,6 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
_logger.debug(f"\ttriple2: f{triple2}\n")
rdf_rang_str = str(triple2[-1].n3(graph.namespace_manager))
dtp_obj.prefixed_datatype_name = rdf_rang_str
- # if str(rdf_rang_str) not in test_list:
- # test_list.append(rdf_rang_str)
for sh_triple in graph.triples((None, rdflib.SH.path, triple[0])):
_logger.debug(f"\t\t**sh_triple:{sh_triple}")
@@ -288,32 +331,6 @@ def process_prefixes(self):
_logger.debug(f"Prefix: {ttl_file}\t{line.strip()}")
self.__add_to_iri_dict(in_prefix=line.strip())
- def print_minimal_datatype_properties(self) -> str:
- """Prints DataType Properties in a format suitable for the contect"""
- dtp_str_sect = ""
- dt_list = list(self.datatype_properties_dict.keys())
- dt_list.sort()
- last_dtp_obj = self.datatype_properties_dict[dt_list[-1]][-1]
- for key in dt_list:
- # if len(cb.datatype_properties_dict[key]) > 1:
- for dtp_obj in self.datatype_properties_dict[key]:
- con_str = f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
- con_str += f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
- if (dtp_obj.prefixed_datatype_name is not None):
- con_str += ",\n"
- con_str += f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
- else:
- con_str += "\n"
- if dtp_obj != last_dtp_obj:
- con_str += "},\n"
- else:
- con_str += "}\n"
- # print(dtp_obj.root_property_name)
- # print(con_str)
- dtp_str_sect += con_str
- # print(dtp_str_sect)
- return dtp_str_sect
-
def add_minimal_datatype_props_to_cntxt(self) -> None:
"""Adds Datatype Properties to context string"""
dtp_str_sect = ""
@@ -322,17 +339,7 @@ def add_minimal_datatype_props_to_cntxt(self) -> None:
# last_dtp_obj = self.datatype_properties_dict[dt_list[-1]][-1]
for key in dt_list:
for dtp_obj in self.datatype_properties_dict[key]:
- con_str = f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
- con_str += f"\t\"@id\":\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\""
- if (dtp_obj.prefixed_datatype_name is not None):
- con_str += ",\n"
- con_str += f"\t\"@type\":\"{dtp_obj.prefixed_datatype_name}\"\n"
- else:
- con_str += "\n"
- con_str += "},\n"
-
- dtp_str_sect += con_str
-
+ dtp_str_sect += dtp_obj.get_minimal_json()
self.context_str += dtp_str_sect
def add_concise_datatype_props_to_cntxt(self) -> None:
@@ -343,25 +350,10 @@ def add_concise_datatype_props_to_cntxt(self) -> None:
for key in dtp_list:
if len(self.datatype_properties_dict[key]) > 1:
for dtp_obj in self.datatype_properties_dict[key]:
- # print(dtp_obj.ns_prefix, key)
- con_str = f"\"{dtp_obj.ns_prefix}:{dtp_obj.root_property_name}\":{{\n"
- con_str += "\t\"@type\":\"@id\""
- if dtp_obj.shacl_count_lte_1 is not True:
- con_str += ",\n\t\"@container\":\"@set\"\n"
- else:
- con_str += "\n"
- con_str += "},\n"
- dtp_str_sect += con_str
+ dtp_str_sect += dtp_obj.get_minimal_json()
else:
for dtp_obj in self.datatype_properties_dict[key]:
- con_str = f"\"{dtp_obj.root_property_name}\":{{\n"
- con_str += "\t\"@type\":\"@id\""
- if dtp_obj.shacl_count_lte_1 is not True:
- con_str += ",\n\t\"@container\":\"@set\"\n"
- else:
- con_str += "\n"
- con_str += "},\n"
- dtp_str_sect += con_str
+ dtp_str_sect += dtp_obj.get_concise_json()
self.context_str += dtp_str_sect
def add_minimal_object_props_to_cntxt(self) -> None:
@@ -371,16 +363,7 @@ def add_minimal_object_props_to_cntxt(self) -> None:
op_list.sort()
for key in op_list:
for op_obj in self.object_properties_dict[key]:
- con_str = f"\"{op_obj.ns_prefix}:{op_obj.root_class_name}\":{{\n"
- con_str += "\t\"@type\":\"@id\""
- if op_obj.shacl_count_lte_1 is not True:
- con_str += ",\n\t\"@container\":\"@set\"\n"
- else:
- con_str += "\n"
-
- con_str += "},\n"
-
- op_str_sect += con_str
+ op_str_sect += op_obj.get_minimal_json()
self.context_str += op_str_sect
def add_concise_object_props_to_cntxt(self) -> None:
@@ -392,24 +375,10 @@ def add_concise_object_props_to_cntxt(self) -> None:
if len(self.object_properties_dict[key]) > 1:
for op_obj in self.object_properties_dict[key]:
# print(op_obj.ns_prefix, op_obj.root_class_name)
- con_str = f"\"{op_obj.ns_prefix}:{op_obj.root_class_name}\":{{\n"
- con_str += "\t\"@type\":\"@id\""
- if op_obj.shacl_count_lte_1 is not True:
- con_str += ",\n\t\"@container\":\"@set\"\n"
- else:
- con_str += "\n"
- con_str += "},\n"
- op_str_sect += con_str
+ op_str_sect += op_obj.get_minimal_json()
else:
for op_obj in self.object_properties_dict[key]:
- con_str = f"\"{op_obj.root_class_name}\":{{\n"
- con_str += "\t\"@type\":\"@id\""
- if op_obj.shacl_count_lte_1 is not True:
- con_str += ",\n\t\"@container\":\"@set\"\n"
- else:
- con_str += "\n"
- con_str += "},\n"
- op_str_sect += con_str
+ op_str_sect += op_obj.get_concise_json()
self.context_str += op_str_sect
def add_key_strings_to_cntxt(self) -> None:
From 8b8905e746a42203af7ba91ef021ba9e17503753 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Thu, 11 Aug 2022 11:43:42 -0400
Subject: [PATCH 15/47] Incorporate context builder tests into CI Makefile
workflow
References:
* https://github.com/ucoProject/UCO/issues/423
Signed-off-by: Alex Nelson
---
tests/Makefile | 3 +++
tests/context_builder/Makefile | 39 ++++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+)
create mode 100644 tests/context_builder/Makefile
diff --git a/tests/Makefile b/tests/Makefile
index ec5684ca..4aa0cc7a 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -54,6 +54,9 @@ check: \
$(MAKE) \
--directory examples \
check
+ $(MAKE) \
+ --directory context_builder \
+ check
clean:
@$(MAKE) \
diff --git a/tests/context_builder/Makefile b/tests/context_builder/Makefile
new file mode 100644
index 00000000..4dbb5d3c
--- /dev/null
+++ b/tests/context_builder/Makefile
@@ -0,0 +1,39 @@
+#!/usr/bin/make -f
+
+# This software was developed at the National Institute of Standards
+# and Technology by employees of the Federal Government in the course
+# of their official duties. Pursuant to title 17 Section 105 of the
+# United States Code this software is not subject to copyright
+# protection and is in the public domain. NIST assumes no
+# responsibility whatsoever for its use by other parties, and makes
+# no guarantees, expressed or implied, about its quality,
+# reliability, or any other characteristic.
+#
+# We would appreciate acknowledgement if the software is used.
+
+SHELL := /bin/bash
+
+top_srcdir := $(shell cd ../.. ; pwd)
+
+tests_srcdir := $(top_srcdir)/tests
+
+all:
+
+.PHONY: \
+ check-minimal \
+ check-concise
+
+check: \
+ check-minimal \
+ check-concise
+
+check-concise: \
+ $(tests_srcdir)/.venv.done.log
+ source $(tests_srcdir)/venv/bin/activate \
+ && python3 context_tester.py \
+ --concise
+
+check-minimal: \
+ $(tests_srcdir)/.venv.done.log
+ source $(tests_srcdir)/venv/bin/activate \
+ && python3 context_tester.py
From 15d3669662cbc17f798b8f3f85b86f4531ac5509 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Thu, 11 Aug 2022 12:03:26 -0400
Subject: [PATCH 16/47] Change prefixes to match 'uco-' pattern
Signed-off-by: Alex Nelson
---
.../action_result_NO_CONTEXT.json | 44 +++++++++----------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/tests/context_builder/action_result_NO_CONTEXT.json b/tests/context_builder/action_result_NO_CONTEXT.json
index c6cd83ba..5e5407ec 100644
--- a/tests/context_builder/action_result_NO_CONTEXT.json
+++ b/tests/context_builder/action_result_NO_CONTEXT.json
@@ -2,9 +2,9 @@
"@graph": [
{
"@id": "kb:action-1",
- "@type": "action:Action",
+ "@type": "uco-action:Action",
"rdfs:comment": "This node is some action that has some ObservableObjects as results. By the ontology, the results need to be some UcoObject or subclass of UcoObject. They are serialized here as ObservableObjects, and are redundantly assigned types of some of their superclasses. For completeness-tracking, let the id slug's number be a binary number tracking which superclasses are present, 2^0=core:UcoObject, 2^1=core:Item, 2^2=observable:Observable.",
- "action:result": [
+ "uco-action:result": [
{
"@id": "kb:node-0"
},
@@ -33,60 +33,60 @@
},
{
"@id": "kb:node-0",
- "@type": "observable:ObservableObject"
+ "@type": "uco-observable:ObservableObject"
},
{
"@id": "kb:node-1",
"@type": [
- "core:UcoObject",
- "observable:ObservableObject"
+ "uco-core:UcoObject",
+ "uco-observable:ObservableObject"
]
},
{
"@id": "kb:node-2",
"@type": [
- "core:Item",
- "observable:ObservableObject"
+ "uco-core:Item",
+ "uco-observable:ObservableObject"
]
},
{
"@id": "kb:node-3",
"@type": [
- "core:UcoObject",
- "core:Item",
- "observable:ObservableObject"
+ "uco-core:UcoObject",
+ "uco-core:Item",
+ "uco-observable:ObservableObject"
]
},
{
"@id": "kb:node-4",
"@type": [
- "observable:Observable",
- "observable:ObservableObject"
+ "uco-observable:Observable",
+ "uco-observable:ObservableObject"
]
},
{
"@id": "kb:node-5",
"@type": [
- "core:UcoObject",
- "observable:Observable",
- "observable:ObservableObject"
+ "uco-core:UcoObject",
+ "uco-observable:Observable",
+ "uco-observable:ObservableObject"
]
},
{
"@id": "kb:node-6",
"@type": [
- "core:Item",
- "observable:Observable",
- "observable:ObservableObject"
+ "uco-core:Item",
+ "uco-observable:Observable",
+ "uco-observable:ObservableObject"
]
},
{
"@id": "kb:node-7",
"@type": [
- "core:UcoObject",
- "core:Item",
- "observable:Observable",
- "observable:ObservableObject"
+ "uco-core:UcoObject",
+ "uco-core:Item",
+ "uco-observable:Observable",
+ "uco-observable:ObservableObject"
]
}
]
From cb78b82ead1187417083a16e19566755b56cb58c Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Thu, 11 Aug 2022 12:03:51 -0400
Subject: [PATCH 17/47] Add pytest
Signed-off-by: Alex Nelson
---
tests/context_builder/Makefile | 34 ++++++++++++++++++--
tests/context_builder/test_context.py | 46 +++++++++++++++++++++++++++
2 files changed, 78 insertions(+), 2 deletions(-)
create mode 100644 tests/context_builder/test_context.py
diff --git a/tests/context_builder/Makefile b/tests/context_builder/Makefile
index 4dbb5d3c..3aec88cb 100644
--- a/tests/context_builder/Makefile
+++ b/tests/context_builder/Makefile
@@ -26,14 +26,44 @@ all:
check: \
check-minimal \
check-concise
+ source $(tests_srcdir)/venv/bin/activate \
+ && pytest \
+ --log-level=DEBUG
check-concise: \
- $(tests_srcdir)/.venv.done.log
+ context-concise.json
source $(tests_srcdir)/venv/bin/activate \
&& python3 context_tester.py \
--concise
check-minimal: \
- $(tests_srcdir)/.venv.done.log
+ context-minimal.json
source $(tests_srcdir)/venv/bin/activate \
&& python3 context_tester.py
+
+context-concise.json: \
+ $(tests_srcdir)/.venv.done.log \
+ $(top_srcdir)/src/uco_jsonld_context_builder.py
+ source $(tests_srcdir)/venv/bin/activate \
+ && python3 $(top_srcdir)/src/uco_jsonld_context_builder.py \
+ --output __$@
+ # Normalize generated file.
+ python3 -m json.tool \
+ __$@ \
+ _$@
+ rm __$@
+ mv _$@ $@
+
+context-minimal.json: \
+ $(tests_srcdir)/.venv.done.log \
+ $(top_srcdir)/src/uco_jsonld_context_builder.py
+ source $(tests_srcdir)/venv/bin/activate \
+ && python3 $(top_srcdir)/src/uco_jsonld_context_builder.py \
+ --concise \
+ --output __$@
+ # Normalize generated file.
+ python3 -m json.tool \
+ __$@ \
+ _$@
+ rm __$@
+ mv _$@ $@
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
new file mode 100644
index 00000000..d63129a4
--- /dev/null
+++ b/tests/context_builder/test_context.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+
+# This software was developed at the National Institute of Standards
+# and Technology by employees of the Federal Government in the course
+# of their official duties. Pursuant to title 17 Section 105 of the
+# United States Code this software is not subject to copyright
+# protection and is in the public domain. NIST assumes no
+# responsibility whatsoever for its use by other parties, and makes
+# no guarantees, expressed or implied, about its quality,
+# reliability, or any other characteristic.
+#
+# We would appreciate acknowledgement if the software is used.
+
+import json
+from typing import Any, Dict
+
+from rdflib import Graph
+
+
+def _test_graph_context_query(input_graph_file: str, input_context_file: str) -> None:
+ expected = 8
+ computed = 0
+
+ context_object: Dict[str, Any]
+ with open(input_context_file, "r") as context_fh:
+ context_object = json.load(context_fh)
+
+ graph = Graph()
+ graph.parse(input_graph_file, context=context_object)
+ for result in graph.query("""\
+SELECT ?nResult
+WHERE {
+ ?nAction uco-action:result ?nResult .
+}
+"""):
+ computed += 1
+
+ assert expected == computed
+
+
+def test_context_concise() -> None:
+ _test_graph_context_query("action_result_NO_CONTEXT.json", "context-concise.json")
+
+
+def test_context_minimal() -> None:
+ _test_graph_context_query("action_result_NO_CONTEXT.json", "context-minimal.json")
From f2b48ef82b3aed7a7a3fd24d29750fb73a48086a Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Thu, 11 Aug 2022 12:08:19 -0400
Subject: [PATCH 18/47] Do not rely on picking up prefixes from graph
Signed-off-by: Alex Nelson
---
tests/context_builder/test_context.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index d63129a4..ec4e59f6 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -28,6 +28,7 @@ def _test_graph_context_query(input_graph_file: str, input_context_file: str) ->
graph = Graph()
graph.parse(input_graph_file, context=context_object)
for result in graph.query("""\
+PREFIX uco-action:
SELECT ?nResult
WHERE {
?nAction uco-action:result ?nResult .
From f1b0f3d3a8592e6ab419daa66f20486ec499a0a7 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Thu, 11 Aug 2022 12:16:08 -0400
Subject: [PATCH 19/47] Add necessarily-local kb prefix
Signed-off-by: Alex Nelson
---
tests/context_builder/action_result_NO_CONTEXT.json | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tests/context_builder/action_result_NO_CONTEXT.json b/tests/context_builder/action_result_NO_CONTEXT.json
index 5e5407ec..eb25154b 100644
--- a/tests/context_builder/action_result_NO_CONTEXT.json
+++ b/tests/context_builder/action_result_NO_CONTEXT.json
@@ -1,4 +1,7 @@
{
+ "@context": {
+ "kb": "http://example.org/kb/"
+ },
"@graph": [
{
"@id": "kb:action-1",
From 458da1ee740ae063562ee14ec26b605fbc31c777 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Thu, 11 Aug 2022 12:16:25 -0400
Subject: [PATCH 20/47] Test non-UCO concepts load from graph parse
Signed-off-by: Alex Nelson
---
tests/context_builder/test_context.py | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index ec4e59f6..844a9f3f 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -14,7 +14,7 @@
import json
from typing import Any, Dict
-from rdflib import Graph
+from rdflib import Graph, RDF, RDFS
def _test_graph_context_query(input_graph_file: str, input_context_file: str) -> None:
@@ -27,6 +27,24 @@ def _test_graph_context_query(input_graph_file: str, input_context_file: str) ->
graph = Graph()
graph.parse(input_graph_file, context=context_object)
+
+ # The graph should at least include 8 statements of the form
+ # 'x uco-action:result y .' Actual length includes the rdfs:comment
+ # and type declarations, but is otherwise unimportant.
+ assert 8 < len(graph), "Graph failed to parse into triples."
+
+ # The rdf:types must be supported by the context parse.
+ count_of_types = 0
+ for triple in graph.triples((None, RDF.type, None)):
+ count_of_types += 1
+ assert 0 < count_of_types, "Graph failed to parse non-UCO concept from RDF."
+
+ # The rdfs:comment must be supported by the context parse.
+ count_of_comments = 0
+ for triple in graph.triples((None, RDFS.comment, None)):
+ count_of_comments += 1
+ assert 0 < count_of_comments, "Graph failed to parse non-UCO concept from RDFS."
+
for result in graph.query("""\
PREFIX uco-action:
SELECT ?nResult
From 386551643d815549a8794d8ec4e68ed6f78c694c Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Thu, 11 Aug 2022 12:21:38 -0400
Subject: [PATCH 21/47] Add debug dump for review
Signed-off-by: Alex Nelson
---
tests/context_builder/test_context.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index 844a9f3f..885183e8 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -12,6 +12,7 @@
# We would appreciate acknowledgement if the software is used.
import json
+import logging
from typing import Any, Dict
from rdflib import Graph, RDF, RDFS
@@ -54,7 +55,13 @@ def _test_graph_context_query(input_graph_file: str, input_context_file: str) ->
"""):
computed += 1
- assert expected == computed
+ try:
+ assert expected == computed
+ except AssertionError:
+ # Provide a debug dump of the graph before forwarding assertion error.
+ for triple in sorted(graph.triples((None, None, None))):
+ logging.debug(triple)
+ raise
def test_context_concise() -> None:
From ec3b35dcf74558ae2133c03321d748a8545e9710 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Fri, 12 Aug 2022 12:47:14 -0400
Subject: [PATCH 22/47] Adds 'uco-' prefix to specific prefixes during
generation
---
src/uco_jsonld_context_builder.py | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index c3367419..007d809c 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -184,7 +184,13 @@ def get_iris(self) -> typing.List[str]:
irs_list = []
for k in k_list:
# print(f"\"{k}\":{self.iri_dict[k]}")
- irs_list.append(f"\"{k}\":\"{self.iri_dict[k]}\"")
+ # prepend "uco-" to specific IRIs
+ v = self.iri_dict[k]
+ #_logger.debug(v.split('/'))
+ if ('uco'in v.split('/')) and ('ontology.unifiedcyberontology.org' in v.split('/')):
+ irs_list.append(f"\"uco-{k}\":\"{v}\"")
+ else:
+ irs_list.append(f"\"{k}\":\"{v}\"")
return irs_list
def add_prefixes_to_cntxt(self) -> None:
@@ -192,7 +198,7 @@ def add_prefixes_to_cntxt(self) -> None:
for i in self.get_iris():
self.context_str += f"{i},\n"
- def __add_to_iri_dict(self, in_prefix):
+ def __add_to_iri_dict(self, in_prefix: str):
"""INTERNAL function: Adds unique key value pairs to dict
that will be used to generate context. Dies if inconsistent
key value pair is found.
From 5fa38a0a557e9b7493ae58a474648e64a3d0e341 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Mon, 15 Aug 2022 09:10:45 -0400
Subject: [PATCH 23/47] Alters Makefile to fix concise vs minimal context
generation
---
tests/context_builder/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/context_builder/Makefile b/tests/context_builder/Makefile
index 3aec88cb..e6834780 100644
--- a/tests/context_builder/Makefile
+++ b/tests/context_builder/Makefile
@@ -46,6 +46,7 @@ context-concise.json: \
$(top_srcdir)/src/uco_jsonld_context_builder.py
source $(tests_srcdir)/venv/bin/activate \
&& python3 $(top_srcdir)/src/uco_jsonld_context_builder.py \
+ --concise \
--output __$@
# Normalize generated file.
python3 -m json.tool \
@@ -59,7 +60,6 @@ context-minimal.json: \
$(top_srcdir)/src/uco_jsonld_context_builder.py
source $(tests_srcdir)/venv/bin/activate \
&& python3 $(top_srcdir)/src/uco_jsonld_context_builder.py \
- --concise \
--output __$@
# Normalize generated file.
python3 -m json.tool \
From 7eee88df90d7a98c26cd709f7f16a387179a882d Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 16 Aug 2022 10:41:38 -0400
Subject: [PATCH 24/47] A bit of formatting and clean up
---
src/uco_jsonld_context_builder.py | 118 ++++++++++++++----------
tests/context_builder/context_tester.py | 12 ++-
tests/context_builder/test_context.py | 15 ++-
3 files changed, 89 insertions(+), 56 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 007d809c..c816485e 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -33,45 +33,46 @@
import re
import rdflib
-from rdflib.namespace import Namespace, NamespaceManager
_logger = logging.getLogger(os.path.basename(__file__))
class ObjectPropertyInfo:
"""Class to hold ObjectProperty info which will be used to build
- context"""
+ context"""
+
def __init__(self):
self.ns_prefix = None
self.root_class_name = None
self.shacl_count_lte_1 = None
self.shacl_property_bnode = None
-
+
def __get_json(self, hdr) -> str:
json_str = hdr
- json_str += "\t\"@type\":\"@id\""
+ json_str += '\t"@type":"@id"'
if self.shacl_count_lte_1 is not True:
- json_str += ",\n\t\"@container\":\"@set\"\n"
+ json_str += ',\n\t"@container":"@set"\n'
else:
json_str += "\n"
json_str += "},\n"
- return json_str
+ return json_str
def get_minimal_json(self) -> str:
- hdr_str = f"\"{self.ns_prefix}:{self.root_class_name}\":{{\n"
+ hdr_str = f'"{self.ns_prefix}:{self.root_class_name}":{{\n'
json_str = self.__get_json(hdr=hdr_str)
return json_str
def get_concise_json(self) -> str:
- hdr_str = f"\"{self.root_class_name}\":{{\n"
+ hdr_str = f'"{self.root_class_name}":{{\n'
json_str = self.__get_json(hdr=hdr_str)
return json_str
class DatatypePropertyInfo:
"""Class to hold DatatypeProperty info which will be used to build
- context"""
+ context"""
+
def __init__(self):
self.ns_prefix = None
self.root_property_name = None
@@ -81,25 +82,24 @@ def __init__(self):
def __get_json(self, hdr) -> str:
json_str = hdr
- json_str += \
- f"\t\"@id\":\"{self.ns_prefix}:{self.root_property_name}\""
- if (self.prefixed_datatype_name is not None):
+ json_str += f'\t"@id":"{self.ns_prefix}:{self.root_property_name}"'
+ if self.prefixed_datatype_name is not None:
json_str += ",\n"
- json_str += f"\t\"@type\":\"{self.prefixed_datatype_name}\""
+ json_str += f'\t"@type":"{self.prefixed_datatype_name}"'
if self.shacl_count_lte_1 is not True:
- json_str += ",\n\t\"@container\":\"@set\"\n"
+ json_str += ',\n\t"@container":"@set"\n'
else:
json_str += "\n"
json_str += "},\n"
- return json_str
+ return json_str
def get_minimal_json(self) -> str:
- hdr_str = f"\"{self.ns_prefix}:{self.root_property_name}\":{{\n"
+ hdr_str = f'"{self.ns_prefix}:{self.root_property_name}":{{\n'
json_str = self.__get_json(hdr=hdr_str)
return json_str
def get_concise_json(self) -> str:
- hdr_str = f"\"{self.root_property_name}\":{{\n"
+ hdr_str = f'"{self.root_property_name}":{{\n'
json_str = self.__get_json(hdr=hdr_str)
return json_str
@@ -118,11 +118,11 @@ def __init__(self):
self.context_str = ""
def init_context_str(self) -> None:
- self.context_str = "{\n\t\"@context\":{\n"""
+ self.context_str = '{\n\t"@context":{\n' ""
def close_context_str(self) -> None:
self.context_str = self.context_str.strip()
- if self.context_str[-1] == ',':
+ if self.context_str[-1] == ",":
self.context_str = self.context_str[:-1]
self.context_str += "\n\t}\n}"
@@ -140,8 +140,9 @@ def get_ttl_files(self, subdirs=[]) -> typing.List[pathlib.Path]:
self.top_srcdir = pathlib.Path(os.path.dirname(__file__)) / ".."
top_srcdir = self.top_srcdir
# Sanity check.
- assert (top_srcdir / ".git").exists(), \
- "Hard-coded top_srcdir discovery is no longer correct."
+ assert (
+ top_srcdir / ".git"
+ ).exists(), "Hard-coded top_srcdir discovery is no longer correct."
# 1. Load all ontology files into dictionary of graphs.
@@ -186,11 +187,13 @@ def get_iris(self) -> typing.List[str]:
# print(f"\"{k}\":{self.iri_dict[k]}")
# prepend "uco-" to specific IRIs
v = self.iri_dict[k]
- #_logger.debug(v.split('/'))
- if ('uco'in v.split('/')) and ('ontology.unifiedcyberontology.org' in v.split('/')):
- irs_list.append(f"\"uco-{k}\":\"{v}\"")
+ # _logger.debug(v.split('/'))
+ if ("uco" in v.split("/")) and (
+ "ontology.unifiedcyberontology.org" in v.split("/")
+ ):
+ irs_list.append(f'"uco-{k}":"{v}"')
else:
- irs_list.append(f"\"{k}\":\"{v}\"")
+ irs_list.append(f'"{k}":"{v}"')
return irs_list
def add_prefixes_to_cntxt(self) -> None:
@@ -232,11 +235,13 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
"Make sure to do an itter that looks for rdflib.OWL.class"
# If we cannot find rdf range, skip
# If rdf range is a blank node, skip
- for triple in graph.triples((None, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)):
+ for triple in graph.triples(
+ (None, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)
+ ):
dtp_obj = DatatypePropertyInfo()
_logger.debug(triple)
- _logger.debug(triple[0].split('/'))
- s_triple = triple[0].split('/')
+ _logger.debug(triple[0].split("/"))
+ s_triple = triple[0].split("/")
root = s_triple[-1]
ns_prefix = f"{s_triple[-3]}-{s_triple[-2]}"
# print(ns_prefix, root)
@@ -254,12 +259,16 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
for sh_triple in graph.triples((None, rdflib.SH.path, triple[0])):
_logger.debug(f"\t\t**sh_triple:{sh_triple}")
dtp_obj.shacl_property_bnode = sh_triple[0]
- for sh_triple2 in graph.triples((dtp_obj.shacl_property_bnode, rdflib.SH.maxCount, None)):
+ for sh_triple2 in graph.triples(
+ (dtp_obj.shacl_property_bnode, rdflib.SH.maxCount, None)
+ ):
_logger.debug(f"\t\t***sh_triple:{sh_triple2}")
_logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
if int(sh_triple2[2]) <= 1:
if dtp_obj.shacl_count_lte_1 is not None:
- _logger.debug(f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
+ _logger.debug(
+ f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}"
+ )
dtp_obj.shacl_count_lte_1 = True
else:
_logger.debug(f"\t\t\t***Large max_count: {sh_triple2[2]}")
@@ -289,7 +298,7 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
op_obj = ObjectPropertyInfo()
_logger.debug((triple))
# print(triple[0].split('/'))
- s_triple = triple[0].split('/')
+ s_triple = triple[0].split("/")
root = s_triple[-1]
ns_prefix = f"{s_triple[-3]}-{s_triple[-2]}"
# print(ns_prefix, root)
@@ -299,12 +308,16 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
for sh_triple in graph.triples((None, rdflib.SH.path, triple[0])):
_logger.debug(f"\t**obj_sh_triple:{sh_triple}")
op_obj.shacl_property_bnode = sh_triple[0]
- for sh_triple2 in graph.triples((op_obj.shacl_property_bnode, rdflib.SH.maxCount, None)):
+ for sh_triple2 in graph.triples(
+ (op_obj.shacl_property_bnode, rdflib.SH.maxCount, None)
+ ):
_logger.debug(f"\t\t***sh_triple:{sh_triple2}")
_logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
if int(sh_triple2[2]) <= 1:
if op_obj.shacl_count_lte_1 is not None:
- _logger.debug(f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}")
+ _logger.debug(
+ f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}"
+ )
op_obj.shacl_count_lte_1 = True
else:
_logger.debug(f"\t\t\t***Large max_count: {sh_triple2[2]}")
@@ -331,7 +344,7 @@ def process_prefixes(self):
sys.exit()
for ttl_file in ttl_file_list:
- with open(ttl_file, 'r') as file:
+ with open(ttl_file, "r") as file:
for line in file:
if re.search("^@prefix", line):
_logger.debug(f"Prefix: {ttl_file}\t{line.strip()}")
@@ -371,7 +384,7 @@ def add_minimal_object_props_to_cntxt(self) -> None:
for op_obj in self.object_properties_dict[key]:
op_str_sect += op_obj.get_minimal_json()
self.context_str += op_str_sect
-
+
def add_concise_object_props_to_cntxt(self) -> None:
"""Adds Object Properties to context string"""
op_str_sect = ""
@@ -390,22 +403,29 @@ def add_concise_object_props_to_cntxt(self) -> None:
def add_key_strings_to_cntxt(self) -> None:
"""Adds id, type, and graph key strings to context string"""
ks_str = ""
- ks_str += "\t\"id\":\"@id\",\n"
- ks_str += "\t\"type\":\"@type\",\n"
- ks_str += "\t\"value\":\"@value\",\n"
- ks_str += "\t\"graph\":\"@graph\",\n"
+ ks_str += '\t"id":"@id",\n'
+ ks_str += '\t"type":"@type",\n'
+ ks_str += '\t"value":"@value",\n'
+ ks_str += '\t"graph":"@graph",\n'
self.context_str += ks_str
def main():
argument_parser = argparse.ArgumentParser()
- argument_parser.add_argument('--debug', action="store_true")
- argument_parser.add_argument('--concise', action="store_true",
- help="Creates a \"concise\" context. This is more compact than the \
- default behavior which creates a \"minimal\" context")
- argument_parser.add_argument('-o', '--output', help="Output file for context.\
- Will print to stdout by default.")
+ argument_parser.add_argument("--debug", action="store_true")
+ argument_parser.add_argument(
+ "--concise",
+ action="store_true",
+ help='Creates a "concise" context. This is more compact than the \
+ default behavior which creates a "minimal" context',
+ )
+ argument_parser.add_argument(
+ "-o",
+ "--output",
+ help="Output file for context.\
+ Will print to stdout by default.",
+ )
args = argument_parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
@@ -414,10 +434,10 @@ def main():
out_f = None
if args.output is not None:
- out_f = open(args.output, 'w')
+ out_f = open(args.output, "w")
cb = ContextBuilder()
- for i in (cb.get_ttl_files(subdirs=['ontology'])):
+ for i in cb.get_ttl_files(subdirs=["ontology"]):
_logger.debug(f" Input ttl: {i}")
cb.process_prefixes()
@@ -443,10 +463,6 @@ def main():
return
- # TODO: context keyword in graph parse and graph serialize
- # TODO: black formater FLAKE8 for isort
- # TODO: check the case-uilities python
-
if __name__ == "__main__":
main()
diff --git a/tests/context_builder/context_tester.py b/tests/context_builder/context_tester.py
index 4eccd4cb..9833e6bf 100644
--- a/tests/context_builder/context_tester.py
+++ b/tests/context_builder/context_tester.py
@@ -23,19 +23,23 @@ def main():
arg_parser.add_argument("--skip-clean", action="store_true",
help="Keeps intermediate test files instead of \
automatic deletion")
+ arg_parser.add_argument("--input", default="action_result_NO_CONTEXT.json",
+ help="input file for testing")
arg_parser.add_argument('--concise', action="store_true",
help="Perform testing on \"concise\" context instead of \"minimal\"")
args = arg_parser.parse_args()
- print(args)
# Test graph file in JSON format
- test_file = "action_result_NO_CONTEXT.json"
+ # test_file = "action_result_NO_CONTEXT.json"
+ test_file = args.input
# File to which context will be written
output_file = "_temp_cntxt.json"
# Serialization of graph without using context
- no_cntxt_out = "_test_out_no_cntxt.json-ld"
+ # no_cntxt_out = "_test_out_no_cntxt.json-ld"
+ no_cntxt_out = f"_out_no_cntxt_{test_file}"
# Serialization of graph using context
- cntxt_out = "_test_out_cntxt.json-ld"
+ # cntxt_out = "_test_out_cntxt.json-ld"
+ cntxt_out = f"_out_ctxt_{test_file}"
# Execute Context builder
if args.concise:
cmd = "python ../../src/uco_jsonld_context_builder.py\
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index 885183e8..60a2aa9b 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -53,8 +53,10 @@ def _test_graph_context_query(input_graph_file: str, input_context_file: str) ->
?nAction uco-action:result ?nResult .
}
"""):
+# ?nAction uco-action:result ?nResult .
computed += 1
-
+ for triple in sorted(graph.triples((None, None, None))):
+ logging.debug(triple)
try:
assert expected == computed
except AssertionError:
@@ -70,3 +72,14 @@ def test_context_concise() -> None:
def test_context_minimal() -> None:
_test_graph_context_query("action_result_NO_CONTEXT.json", "context-minimal.json")
+
+
+#def test_context_concise2() -> None:
+# _test_graph_context_query("action_result_concise_NO_CONTEXT.json", "context-concise.json")
+
+
+# def test_device_context_concise() -> None:
+# _test_graph_context_query("device_NO_CONTEXT.json", "context-concise.json")
+
+# def test_device_context_minimal() -> None:
+# _test_graph_context_query("device_NO_CONTEXT.json", "context-minimal.json")
From d808aa33036d7af57d1b2a456e4adff432f18c18 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Wed, 17 Aug 2022 22:46:02 -0400
Subject: [PATCH 25/47] Perform static type review
This is the result of working through a few rounds of `mypy --strict`,
which now passes when rdflib >= 6.2.0 is installed in the virtual
environment.
Some minor logic errors were caught. At least one significant error was
found and flagged.
References:
* https://github.com/ucoProject/UCO/issues/423
Signed-off-by: Alex Nelson
---
src/uco_jsonld_context_builder.py | 68 ++++++++++++++-----------
tests/context_builder/context_tester.py | 2 +-
2 files changed, 38 insertions(+), 32 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index c816485e..10a4645d 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -41,13 +41,13 @@ class ObjectPropertyInfo:
"""Class to hold ObjectProperty info which will be used to build
context"""
- def __init__(self):
- self.ns_prefix = None
- self.root_class_name = None
- self.shacl_count_lte_1 = None
+ def __init__(self) -> None:
+ self.ns_prefix: typing.Optional[str] = None
+ self.root_class_name: typing.Optional[str] = None
+ self.shacl_count_lte_1: typing.Optional[bool] = None
self.shacl_property_bnode = None
- def __get_json(self, hdr) -> str:
+ def __get_json(self, hdr: str) -> str:
json_str = hdr
json_str += '\t"@type":"@id"'
if self.shacl_count_lte_1 is not True:
@@ -73,14 +73,14 @@ class DatatypePropertyInfo:
"""Class to hold DatatypeProperty info which will be used to build
context"""
- def __init__(self):
- self.ns_prefix = None
- self.root_property_name = None
- self.prefixed_datatype_name = None
- self.shacl_count_lte_1 = None
+ def __init__(self) -> None:
+ self.ns_prefix: typing.Optional[str] = None
+ self.root_property_name: typing.Optional[str] = None
+ self.prefixed_datatype_name: typing.Optional[str] = None
+ self.shacl_count_lte_1: typing.Optional[bool] = None
self.shacl_property_bnode = None
- def __get_json(self, hdr) -> str:
+ def __get_json(self, hdr: str) -> str:
json_str = hdr
json_str += f'\t"@id":"{self.ns_prefix}:{self.root_property_name}"'
if self.prefixed_datatype_name is not None:
@@ -105,15 +105,14 @@ def get_concise_json(self) -> str:
class ContextBuilder:
- def __init__(self):
- self.ttl_file_list = None
+ def __init__(self) -> None:
+ self.ttl_file_list: typing.Optional[typing.List[pathlib.Path]] = None
self.prefix_dict = None
- self.top_srcdir = None
- self.iri_dict = None
- # A dict of DataTypePropertyInfo Objects
- self.datatype_properties_dict = {}
- # A dict of ObjectPropertyInfo Objects
- self.object_properties_dict = {}
+ self.top_srcdir: typing.Optional[pathlib.Path] = None
+ self.iri_dict: typing.Optional[typing.Dict[str, str]] = None
+ # TODO ERROR MITIGATION: These two dicts should be keyed by IRI (str() cast) rather than IRI fragment.
+ self.datatype_properties_dict: typing.Dict[str, typing.List[DatatypePropertyInfo]] = dict()
+ self.object_properties_dict: typing.Dict[str, typing.List[ObjectPropertyInfo]] = dict()
# The string that will hold the processed context
self.context_str = ""
@@ -126,12 +125,14 @@ def close_context_str(self) -> None:
self.context_str = self.context_str[:-1]
self.context_str += "\n\t}\n}"
- def get_ttl_files(self, subdirs=[]) -> typing.List[pathlib.Path]:
+ def get_ttl_files(self, subdirs: typing.List[str]=[]) -> typing.List[pathlib.Path]:
"""
Finds all turtle (.ttl) files in directory structure
@subdirs - Optional list used to restrict search to particular
directories.
"""
+ # TODO - It seems some of the purpose of get_ttl_files() may be mooted by using tests/uco_monolithic.ttl, a temporary build artifact.
+
if self.ttl_file_list is not None:
return self.ttl_file_list
@@ -171,7 +172,7 @@ def get_ttl_files(self, subdirs=[]) -> typing.List[pathlib.Path]:
continue
# _logger.debug(x)
file_list.append(x)
- self.ttl_file_list = file_list
+ self.ttl_file_list = file_list
return self.ttl_file_list
@@ -179,6 +180,7 @@ def get_iris(self) -> typing.List[str]:
"""
Returns sorted list of IRIs as prefix:value strings
"""
+ assert self.iri_dict is not None
k_list = list(self.iri_dict.keys())
# print(k_list)
k_list.sort()
@@ -201,7 +203,7 @@ def add_prefixes_to_cntxt(self) -> None:
for i in self.get_iris():
self.context_str += f"{i},\n"
- def __add_to_iri_dict(self, in_prefix: str):
+ def __add_to_iri_dict(self, in_prefix: str) -> None:
"""INTERNAL function: Adds unique key value pairs to dict
that will be used to generate context. Dies if inconsistent
key value pair is found.
@@ -215,7 +217,7 @@ def __add_to_iri_dict(self, in_prefix: str):
# Taking the ':' off the end of the key
k = t_split[1][:-1]
v = t_split[2]
- # Taking the angle brackets of the IRIs
+ # Taking the angle brackets off the IRIs
v = v.strip()[1:-1]
if k in iri_dict.keys():
# _logger.debug(f"'{k}' already exists")
@@ -225,7 +227,7 @@ def __add_to_iri_dict(self, in_prefix: str):
else:
iri_dict[k] = v
- def __process_DatatypePropertiesHelper(self, in_file=None):
+ def __process_DatatypePropertiesHelper(self, in_file: str) -> None:
"""
Does the actual work using rdflib
@in_file - ttl file to get object properties from
@@ -242,6 +244,8 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
_logger.debug(triple)
_logger.debug(triple[0].split("/"))
s_triple = triple[0].split("/")
+ # (rdflib calls this "fragment" rather than root)
+ # TODO LIKELY ERROR: This assumes fragments are unique within UCO, which is not true in UCO 0.9.0.
root = s_triple[-1]
ns_prefix = f"{s_triple[-3]}-{s_triple[-2]}"
# print(ns_prefix, root)
@@ -280,11 +284,12 @@ def __process_DatatypePropertiesHelper(self, in_file=None):
self.datatype_properties_dict[root] = [dtp_obj]
return
- def process_DatatypeProperties(self):
+ def process_DatatypeProperties(self) -> None:
+ assert self.ttl_file_list is not None
for ttl_file in self.ttl_file_list:
- self.__process_DatatypePropertiesHelper(in_file=ttl_file)
+ self.__process_DatatypePropertiesHelper(in_file=str(ttl_file))
- def __process_ObjectPropertiesHelper(self, in_file=None):
+ def __process_ObjectPropertiesHelper(self, in_file: str) -> None:
"""
Does the actual work using rdflib
@in_file - ttl file to get object properties from
@@ -329,11 +334,12 @@ def __process_ObjectPropertiesHelper(self, in_file=None):
self.object_properties_dict[root] = [op_obj]
return
- def process_ObjectProperties(self):
+ def process_ObjectProperties(self) -> None:
+ assert self.ttl_file_list is not None
for ttl_file in self.ttl_file_list:
- self.__process_ObjectPropertiesHelper(in_file=ttl_file)
+ self.__process_ObjectPropertiesHelper(in_file=str(ttl_file))
- def process_prefixes(self):
+ def process_prefixes(self) -> None:
"""
Finds all prefix lines in list of ttl files. Adds them to an
an internal dict
@@ -411,7 +417,7 @@ def add_key_strings_to_cntxt(self) -> None:
self.context_str += ks_str
-def main():
+def main() -> None:
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument("--debug", action="store_true")
argument_parser.add_argument(
diff --git a/tests/context_builder/context_tester.py b/tests/context_builder/context_tester.py
index 9833e6bf..c422123a 100644
--- a/tests/context_builder/context_tester.py
+++ b/tests/context_builder/context_tester.py
@@ -17,7 +17,7 @@
import os
-def main():
+def main() -> None:
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("--skip-clean", action="store_true",
From 9e44d417642d88b3b5441b76aef5add00bf15aaf Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Wed, 17 Aug 2022 22:49:36 -0400
Subject: [PATCH 26/47] Prevent premature descent into new test directory
References:
* https://github.com/ucoProject/UCO/issues/423
Signed-off-by: Alex Nelson
---
tests/Makefile | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/Makefile b/tests/Makefile
index 4aa0cc7a..e2e42009 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -49,6 +49,7 @@ check: \
uco_monolithic.ttl
source venv/bin/activate \
&& pytest \
+ --ignore context_builder \
--ignore examples \
--log-level=DEBUG
$(MAKE) \
From 9bd9106e2d5543d3ef8f7f646772d1011c6f8a06 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Wed, 17 Aug 2022 22:56:42 -0400
Subject: [PATCH 27/47] Scope hard-coded sample to 'minimal' context dictionary
test
References:
* https://github.com/ucoProject/UCO/issues/423
Signed-off-by: Alex Nelson
---
..._NO_CONTEXT.json => action_result_NO_CONTEXT_minimal.json} | 0
tests/context_builder/context_tester.py | 4 ++--
tests/context_builder/test_context.py | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
rename tests/context_builder/{action_result_NO_CONTEXT.json => action_result_NO_CONTEXT_minimal.json} (100%)
diff --git a/tests/context_builder/action_result_NO_CONTEXT.json b/tests/context_builder/action_result_NO_CONTEXT_minimal.json
similarity index 100%
rename from tests/context_builder/action_result_NO_CONTEXT.json
rename to tests/context_builder/action_result_NO_CONTEXT_minimal.json
diff --git a/tests/context_builder/context_tester.py b/tests/context_builder/context_tester.py
index c422123a..cb2c83c1 100644
--- a/tests/context_builder/context_tester.py
+++ b/tests/context_builder/context_tester.py
@@ -23,14 +23,14 @@ def main() -> None:
arg_parser.add_argument("--skip-clean", action="store_true",
help="Keeps intermediate test files instead of \
automatic deletion")
- arg_parser.add_argument("--input", default="action_result_NO_CONTEXT.json",
+ arg_parser.add_argument("--input", default="action_result_NO_CONTEXT_minimal.json",
help="input file for testing")
arg_parser.add_argument('--concise', action="store_true",
help="Perform testing on \"concise\" context instead of \"minimal\"")
args = arg_parser.parse_args()
# Test graph file in JSON format
- # test_file = "action_result_NO_CONTEXT.json"
+ # test_file = "action_result_NO_CONTEXT_minimal.json"
test_file = args.input
# File to which context will be written
output_file = "_temp_cntxt.json"
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index 60a2aa9b..162f3c8a 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -67,11 +67,11 @@ def _test_graph_context_query(input_graph_file: str, input_context_file: str) ->
def test_context_concise() -> None:
- _test_graph_context_query("action_result_NO_CONTEXT.json", "context-concise.json")
+ _test_graph_context_query("action_result_NO_CONTEXT_minimal.json", "context-concise.json")
def test_context_minimal() -> None:
- _test_graph_context_query("action_result_NO_CONTEXT.json", "context-minimal.json")
+ _test_graph_context_query("action_result_NO_CONTEXT_minimal.json", "context-minimal.json")
#def test_context_concise2() -> None:
From 74574649dad7717baea7ad0ee6092f84494a06f3 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Wed, 17 Aug 2022 22:59:47 -0400
Subject: [PATCH 28/47] Start "concise" test
Test known to fail currently.
References:
* https://github.com/ucoProject/UCO/issues/423
Signed-off-by: Alex Nelson
---
.../action_result_NO_CONTEXT_concise.json | 96 +++++++++++++++++++
tests/context_builder/test_context.py | 2 +-
2 files changed, 97 insertions(+), 1 deletion(-)
create mode 100644 tests/context_builder/action_result_NO_CONTEXT_concise.json
diff --git a/tests/context_builder/action_result_NO_CONTEXT_concise.json b/tests/context_builder/action_result_NO_CONTEXT_concise.json
new file mode 100644
index 00000000..1aa1928d
--- /dev/null
+++ b/tests/context_builder/action_result_NO_CONTEXT_concise.json
@@ -0,0 +1,96 @@
+{
+ "@context": {
+ "kb": "http://example.org/kb/"
+ },
+ "@graph": [
+ {
+ "@id": "kb:action-1",
+ "@type": "Action",
+ "rdfs:comment": "This node is some action that has some ObservableObjects as results. By the ontology, the results need to be some UcoObject or subclass of UcoObject. They are serialized here as ObservableObjects, and are redundantly assigned types of some of their superclasses. For completeness-tracking, let the id slug's number be a binary number tracking which superclasses are present, 2^0=core:UcoObject, 2^1=core:Item, 2^2=observable:Observable.",
+ "result": [
+ {
+ "@id": "kb:node-0"
+ },
+ {
+ "@id": "kb:node-1"
+ },
+ {
+ "@id": "kb:node-2"
+ },
+ {
+ "@id": "kb:node-3"
+ },
+ {
+ "@id": "kb:node-4"
+ },
+ {
+ "@id": "kb:node-5"
+ },
+ {
+ "@id": "kb:node-6"
+ },
+ {
+ "@id": "kb:node-7"
+ }
+ ]
+ },
+ {
+ "@id": "kb:node-0",
+ "@type": "ObservableObject"
+ },
+ {
+ "@id": "kb:node-1",
+ "@type": [
+ "UcoObject",
+ "ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-2",
+ "@type": [
+ "Item",
+ "ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-3",
+ "@type": [
+ "UcoObject",
+ "Item",
+ "ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-4",
+ "@type": [
+ "Observable",
+ "ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-5",
+ "@type": [
+ "UcoObject",
+ "Observable",
+ "ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-6",
+ "@type": [
+ "Item",
+ "Observable",
+ "ObservableObject"
+ ]
+ },
+ {
+ "@id": "kb:node-7",
+ "@type": [
+ "UcoObject",
+ "Item",
+ "Observable",
+ "ObservableObject"
+ ]
+ }
+ ]
+}
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index 162f3c8a..2f4e0c6d 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -67,7 +67,7 @@ def _test_graph_context_query(input_graph_file: str, input_context_file: str) ->
def test_context_concise() -> None:
- _test_graph_context_query("action_result_NO_CONTEXT_minimal.json", "context-concise.json")
+ _test_graph_context_query("action_result_NO_CONTEXT_concise.json", "context-concise.json")
def test_context_minimal() -> None:
From a75489449132326544cd71fdd01eaf514bd21074 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Wed, 17 Aug 2022 23:08:45 -0400
Subject: [PATCH 29/47] Swap out @ symbols
This is another necessary proof of functionality for the testing.
References:
* https://github.com/ucoProject/UCO/issues/423
Signed-off-by: Alex Nelson
---
.../action_result_NO_CONTEXT_concise.json | 52 +++++++++----------
.../action_result_NO_CONTEXT_minimal.json | 52 +++++++++----------
2 files changed, 52 insertions(+), 52 deletions(-)
diff --git a/tests/context_builder/action_result_NO_CONTEXT_concise.json b/tests/context_builder/action_result_NO_CONTEXT_concise.json
index 1aa1928d..7ba696b2 100644
--- a/tests/context_builder/action_result_NO_CONTEXT_concise.json
+++ b/tests/context_builder/action_result_NO_CONTEXT_concise.json
@@ -4,88 +4,88 @@
},
"@graph": [
{
- "@id": "kb:action-1",
- "@type": "Action",
+ "id": "kb:action-1",
+ "type": "Action",
"rdfs:comment": "This node is some action that has some ObservableObjects as results. By the ontology, the results need to be some UcoObject or subclass of UcoObject. They are serialized here as ObservableObjects, and are redundantly assigned types of some of their superclasses. For completeness-tracking, let the id slug's number be a binary number tracking which superclasses are present, 2^0=core:UcoObject, 2^1=core:Item, 2^2=observable:Observable.",
"result": [
{
- "@id": "kb:node-0"
+ "id": "kb:node-0"
},
{
- "@id": "kb:node-1"
+ "id": "kb:node-1"
},
{
- "@id": "kb:node-2"
+ "id": "kb:node-2"
},
{
- "@id": "kb:node-3"
+ "id": "kb:node-3"
},
{
- "@id": "kb:node-4"
+ "id": "kb:node-4"
},
{
- "@id": "kb:node-5"
+ "id": "kb:node-5"
},
{
- "@id": "kb:node-6"
+ "id": "kb:node-6"
},
{
- "@id": "kb:node-7"
+ "id": "kb:node-7"
}
]
},
{
- "@id": "kb:node-0",
- "@type": "ObservableObject"
+ "id": "kb:node-0",
+ "type": "ObservableObject"
},
{
- "@id": "kb:node-1",
- "@type": [
+ "id": "kb:node-1",
+ "type": [
"UcoObject",
"ObservableObject"
]
},
{
- "@id": "kb:node-2",
- "@type": [
+ "id": "kb:node-2",
+ "type": [
"Item",
"ObservableObject"
]
},
{
- "@id": "kb:node-3",
- "@type": [
+ "id": "kb:node-3",
+ "type": [
"UcoObject",
"Item",
"ObservableObject"
]
},
{
- "@id": "kb:node-4",
- "@type": [
+ "id": "kb:node-4",
+ "type": [
"Observable",
"ObservableObject"
]
},
{
- "@id": "kb:node-5",
- "@type": [
+ "id": "kb:node-5",
+ "type": [
"UcoObject",
"Observable",
"ObservableObject"
]
},
{
- "@id": "kb:node-6",
- "@type": [
+ "id": "kb:node-6",
+ "type": [
"Item",
"Observable",
"ObservableObject"
]
},
{
- "@id": "kb:node-7",
- "@type": [
+ "id": "kb:node-7",
+ "type": [
"UcoObject",
"Item",
"Observable",
diff --git a/tests/context_builder/action_result_NO_CONTEXT_minimal.json b/tests/context_builder/action_result_NO_CONTEXT_minimal.json
index eb25154b..ef6ad0ba 100644
--- a/tests/context_builder/action_result_NO_CONTEXT_minimal.json
+++ b/tests/context_builder/action_result_NO_CONTEXT_minimal.json
@@ -4,88 +4,88 @@
},
"@graph": [
{
- "@id": "kb:action-1",
- "@type": "uco-action:Action",
+ "id": "kb:action-1",
+ "type": "uco-action:Action",
"rdfs:comment": "This node is some action that has some ObservableObjects as results. By the ontology, the results need to be some UcoObject or subclass of UcoObject. They are serialized here as ObservableObjects, and are redundantly assigned types of some of their superclasses. For completeness-tracking, let the id slug's number be a binary number tracking which superclasses are present, 2^0=core:UcoObject, 2^1=core:Item, 2^2=observable:Observable.",
"uco-action:result": [
{
- "@id": "kb:node-0"
+ "id": "kb:node-0"
},
{
- "@id": "kb:node-1"
+ "id": "kb:node-1"
},
{
- "@id": "kb:node-2"
+ "id": "kb:node-2"
},
{
- "@id": "kb:node-3"
+ "id": "kb:node-3"
},
{
- "@id": "kb:node-4"
+ "id": "kb:node-4"
},
{
- "@id": "kb:node-5"
+ "id": "kb:node-5"
},
{
- "@id": "kb:node-6"
+ "id": "kb:node-6"
},
{
- "@id": "kb:node-7"
+ "id": "kb:node-7"
}
]
},
{
- "@id": "kb:node-0",
- "@type": "uco-observable:ObservableObject"
+ "id": "kb:node-0",
+ "type": "uco-observable:ObservableObject"
},
{
- "@id": "kb:node-1",
- "@type": [
+ "id": "kb:node-1",
+ "type": [
"uco-core:UcoObject",
"uco-observable:ObservableObject"
]
},
{
- "@id": "kb:node-2",
- "@type": [
+ "id": "kb:node-2",
+ "type": [
"uco-core:Item",
"uco-observable:ObservableObject"
]
},
{
- "@id": "kb:node-3",
- "@type": [
+ "id": "kb:node-3",
+ "type": [
"uco-core:UcoObject",
"uco-core:Item",
"uco-observable:ObservableObject"
]
},
{
- "@id": "kb:node-4",
- "@type": [
+ "id": "kb:node-4",
+ "type": [
"uco-observable:Observable",
"uco-observable:ObservableObject"
]
},
{
- "@id": "kb:node-5",
- "@type": [
+ "id": "kb:node-5",
+ "type": [
"uco-core:UcoObject",
"uco-observable:Observable",
"uco-observable:ObservableObject"
]
},
{
- "@id": "kb:node-6",
- "@type": [
+ "id": "kb:node-6",
+ "type": [
"uco-core:Item",
"uco-observable:Observable",
"uco-observable:ObservableObject"
]
},
{
- "@id": "kb:node-7",
- "@type": [
+ "id": "kb:node-7",
+ "type": [
"uco-core:UcoObject",
"uco-core:Item",
"uco-observable:Observable",
From f783c67c7a7be3a9a067444571cdffd2f5ba571d Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Wed, 17 Aug 2022 23:12:12 -0400
Subject: [PATCH 30/47] Scope test
Signed-off-by: Alex Nelson
---
tests/context_builder/test_context.py | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index 2f4e0c6d..cce1d9e3 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -18,7 +18,7 @@
from rdflib import Graph, RDF, RDFS
-def _test_graph_context_query(input_graph_file: str, input_context_file: str) -> None:
+def _test_action_graph_context_query(input_graph_file: str, input_context_file: str) -> None:
expected = 8
computed = 0
@@ -53,7 +53,6 @@ def _test_graph_context_query(input_graph_file: str, input_context_file: str) ->
?nAction uco-action:result ?nResult .
}
"""):
-# ?nAction uco-action:result ?nResult .
computed += 1
for triple in sorted(graph.triples((None, None, None))):
logging.debug(triple)
@@ -66,12 +65,12 @@ def _test_graph_context_query(input_graph_file: str, input_context_file: str) ->
raise
-def test_context_concise() -> None:
- _test_graph_context_query("action_result_NO_CONTEXT_concise.json", "context-concise.json")
+def test_action_context_concise() -> None:
+ _test_action_graph_context_query("action_result_NO_CONTEXT_concise.json", "context-concise.json")
-def test_context_minimal() -> None:
- _test_graph_context_query("action_result_NO_CONTEXT_minimal.json", "context-minimal.json")
+def test_action_context_minimal() -> None:
+ _test_action_graph_context_query("action_result_NO_CONTEXT_minimal.json", "context-minimal.json")
#def test_context_concise2() -> None:
From 5826eab968544d97a1872daef742e401b7a517bd Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Wed, 17 Aug 2022 23:38:28 -0400
Subject: [PATCH 31/47] Enable very-verbose output
Signed-off-by: Alex Nelson
---
tests/context_builder/Makefile | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tests/context_builder/Makefile b/tests/context_builder/Makefile
index e6834780..131a9b24 100644
--- a/tests/context_builder/Makefile
+++ b/tests/context_builder/Makefile
@@ -28,7 +28,9 @@ check: \
check-concise
source $(tests_srcdir)/venv/bin/activate \
&& pytest \
- --log-level=DEBUG
+ --log-level=DEBUG \
+ --verbose \
+ --verbose
check-concise: \
context-concise.json
From 64fa17b20a9d4d16d6622564b5c48e12bfaa5680 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Wed, 17 Aug 2022 23:39:14 -0400
Subject: [PATCH 32/47] Add hash sample test with exact-parse comparison
References:
* https://github.com/ucoProject/UCO/issues/423
Signed-off-by: Alex Nelson
---
.../hash_NO_CONTEXT_concise.json | 13 +++++++
.../hash_NO_CONTEXT_minimal.json | 13 +++++++
tests/context_builder/hash_expanded.json | 16 ++++++++
tests/context_builder/test_context.py | 39 ++++++++++++++++++-
4 files changed, 80 insertions(+), 1 deletion(-)
create mode 100644 tests/context_builder/hash_NO_CONTEXT_concise.json
create mode 100644 tests/context_builder/hash_NO_CONTEXT_minimal.json
create mode 100644 tests/context_builder/hash_expanded.json
diff --git a/tests/context_builder/hash_NO_CONTEXT_concise.json b/tests/context_builder/hash_NO_CONTEXT_concise.json
new file mode 100644
index 00000000..b28d308e
--- /dev/null
+++ b/tests/context_builder/hash_NO_CONTEXT_concise.json
@@ -0,0 +1,13 @@
+{
+ "@context": {
+ "kb": "http://example.org/kb/"
+ },
+ "@graph": [
+ {
+ "id": "kb:hash-1",
+ "type": "Hash",
+ "hashMethod": "SHA1",
+ "hashValue": "da39a3ee5e6b4b0d3255bfef95601890afd80709"
+ }
+ ]
+}
diff --git a/tests/context_builder/hash_NO_CONTEXT_minimal.json b/tests/context_builder/hash_NO_CONTEXT_minimal.json
new file mode 100644
index 00000000..27e96ccb
--- /dev/null
+++ b/tests/context_builder/hash_NO_CONTEXT_minimal.json
@@ -0,0 +1,13 @@
+{
+ "@context": {
+ "kb": "http://example.org/kb/"
+ },
+ "@graph": [
+ {
+ "id": "kb:hash-1",
+ "type": "uco-types:Hash",
+ "uco-types:hashMethod": "SHA1",
+ "uco-types:hashValue": "da39a3ee5e6b4b0d3255bfef95601890afd80709"
+ }
+ ]
+}
diff --git a/tests/context_builder/hash_expanded.json b/tests/context_builder/hash_expanded.json
new file mode 100644
index 00000000..9f0a1450
--- /dev/null
+++ b/tests/context_builder/hash_expanded.json
@@ -0,0 +1,16 @@
+{
+ "@graph": [
+ {
+ "@id": "http://example.org/kb/hash-1",
+ "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash",
+ "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": {
+ "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab",
+ "@value": "SHA1"
+ },
+ "https://ontology.unifiedcyberontology.org/uco/types/hashValue": {
+ "@type": "http://www.w3.org/2001/XMLSchema#hexBinary",
+ "@value": "da39a3ee5e6b4b0d3255bfef95601890afd80709"
+ }
+ }
+ ]
+}
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index cce1d9e3..e60678c1 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -13,9 +13,10 @@
import json
import logging
-from typing import Any, Dict
+from typing import Any, Dict, Set
from rdflib import Graph, RDF, RDFS
+from rdflib.term import Node
def _test_action_graph_context_query(input_graph_file: str, input_context_file: str) -> None:
@@ -73,6 +74,42 @@ def test_action_context_minimal() -> None:
_test_action_graph_context_query("action_result_NO_CONTEXT_minimal.json", "context-minimal.json")
+def _test_hash_graph_context_query(input_graph_file: str, input_context_file: str) -> None:
+ """
+ Run an exact-parse-match test.
+ """
+
+ expected: Set[Tuple[Node, Node, Nonde]] = set()
+ computed: Set[Tuple[Node, Node, Nonde]] = set()
+
+ expected_graph = Graph()
+ computed_graph = Graph()
+
+ expected_graph.parse("hash_expanded.json")
+
+ context_object: Dict[str, Any]
+ with open(input_context_file, "r") as context_fh:
+ context_object = json.load(context_fh)
+
+ computed_graph.parse(input_graph_file, context=context_object)
+
+ for expected_triple in expected_graph:
+ expected.add(expected_triple)
+
+ for computed_triple in computed_graph:
+ computed.add(computed_triple)
+
+ assert expected == computed
+
+
+def test_hash_context_concise() -> None:
+ _test_hash_graph_context_query("hash_NO_CONTEXT_concise.json", "context-concise.json")
+
+
+def test_hash_context_minimal() -> None:
+ _test_hash_graph_context_query("hash_NO_CONTEXT_minimal.json", "context-minimal.json")
+
+
#def test_context_concise2() -> None:
# _test_graph_context_query("action_result_concise_NO_CONTEXT.json", "context-concise.json")
From e5a71046b5d62b9caf1d1003b8109403b9f73fbf Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Thu, 18 Aug 2022 08:21:27 -0400
Subject: [PATCH 33/47] Remove '@' from graph
Signed-off-by: Alex Nelson
---
tests/context_builder/action_result_NO_CONTEXT_concise.json | 2 +-
tests/context_builder/action_result_NO_CONTEXT_minimal.json | 2 +-
tests/context_builder/hash_NO_CONTEXT_concise.json | 2 +-
tests/context_builder/hash_NO_CONTEXT_minimal.json | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/tests/context_builder/action_result_NO_CONTEXT_concise.json b/tests/context_builder/action_result_NO_CONTEXT_concise.json
index 7ba696b2..a04fbc6d 100644
--- a/tests/context_builder/action_result_NO_CONTEXT_concise.json
+++ b/tests/context_builder/action_result_NO_CONTEXT_concise.json
@@ -2,7 +2,7 @@
"@context": {
"kb": "http://example.org/kb/"
},
- "@graph": [
+ "graph": [
{
"id": "kb:action-1",
"type": "Action",
diff --git a/tests/context_builder/action_result_NO_CONTEXT_minimal.json b/tests/context_builder/action_result_NO_CONTEXT_minimal.json
index ef6ad0ba..5c7e1a61 100644
--- a/tests/context_builder/action_result_NO_CONTEXT_minimal.json
+++ b/tests/context_builder/action_result_NO_CONTEXT_minimal.json
@@ -2,7 +2,7 @@
"@context": {
"kb": "http://example.org/kb/"
},
- "@graph": [
+ "graph": [
{
"id": "kb:action-1",
"type": "uco-action:Action",
diff --git a/tests/context_builder/hash_NO_CONTEXT_concise.json b/tests/context_builder/hash_NO_CONTEXT_concise.json
index b28d308e..2187dd72 100644
--- a/tests/context_builder/hash_NO_CONTEXT_concise.json
+++ b/tests/context_builder/hash_NO_CONTEXT_concise.json
@@ -2,7 +2,7 @@
"@context": {
"kb": "http://example.org/kb/"
},
- "@graph": [
+ "graph": [
{
"id": "kb:hash-1",
"type": "Hash",
diff --git a/tests/context_builder/hash_NO_CONTEXT_minimal.json b/tests/context_builder/hash_NO_CONTEXT_minimal.json
index 27e96ccb..9fab936d 100644
--- a/tests/context_builder/hash_NO_CONTEXT_minimal.json
+++ b/tests/context_builder/hash_NO_CONTEXT_minimal.json
@@ -2,7 +2,7 @@
"@context": {
"kb": "http://example.org/kb/"
},
- "@graph": [
+ "graph": [
{
"id": "kb:hash-1",
"type": "uco-types:Hash",
From 4cdae7aaec8dbe7c50d5cdddbeda5106eb7e50e5 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Thu, 18 Aug 2022 08:25:05 -0400
Subject: [PATCH 34/47] Use concise object reference
Signed-off-by: Alex Nelson
---
.../action_result_NO_CONTEXT_concise.json | 32 +++++--------------
1 file changed, 8 insertions(+), 24 deletions(-)
diff --git a/tests/context_builder/action_result_NO_CONTEXT_concise.json b/tests/context_builder/action_result_NO_CONTEXT_concise.json
index a04fbc6d..865774a5 100644
--- a/tests/context_builder/action_result_NO_CONTEXT_concise.json
+++ b/tests/context_builder/action_result_NO_CONTEXT_concise.json
@@ -8,30 +8,14 @@
"type": "Action",
"rdfs:comment": "This node is some action that has some ObservableObjects as results. By the ontology, the results need to be some UcoObject or subclass of UcoObject. They are serialized here as ObservableObjects, and are redundantly assigned types of some of their superclasses. For completeness-tracking, let the id slug's number be a binary number tracking which superclasses are present, 2^0=core:UcoObject, 2^1=core:Item, 2^2=observable:Observable.",
"result": [
- {
- "id": "kb:node-0"
- },
- {
- "id": "kb:node-1"
- },
- {
- "id": "kb:node-2"
- },
- {
- "id": "kb:node-3"
- },
- {
- "id": "kb:node-4"
- },
- {
- "id": "kb:node-5"
- },
- {
- "id": "kb:node-6"
- },
- {
- "id": "kb:node-7"
- }
+ "kb:node-0",
+ "kb:node-1",
+ "kb:node-2",
+ "kb:node-3",
+ "kb:node-4",
+ "kb:node-5",
+ "kb:node-6",
+ "kb:node-7"
]
},
{
From 67a836d93d64a4a971070a647f48da0997ad47a4 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Tue, 23 Aug 2022 11:21:10 -0400
Subject: [PATCH 35/47] Remove constraint on semi-open vocabulary functionality
with context dictionary
I had previously realized, and forgotten, that datatyped literals do not
appear to be supported as a feature within context dictionaries.
References:
* https://github.com/ucoProject/UCO/issues/423
Signed-off-by: Alex Nelson
---
tests/context_builder/hash_NO_CONTEXT_concise.json | 5 ++++-
tests/context_builder/hash_NO_CONTEXT_minimal.json | 5 ++++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/tests/context_builder/hash_NO_CONTEXT_concise.json b/tests/context_builder/hash_NO_CONTEXT_concise.json
index 2187dd72..9a7187f3 100644
--- a/tests/context_builder/hash_NO_CONTEXT_concise.json
+++ b/tests/context_builder/hash_NO_CONTEXT_concise.json
@@ -6,7 +6,10 @@
{
"id": "kb:hash-1",
"type": "Hash",
- "hashMethod": "SHA1",
+ "hashMethod": {
+ "type": "HashNameVocab",
+ "value": "SHA1"
+ },
"hashValue": "da39a3ee5e6b4b0d3255bfef95601890afd80709"
}
]
diff --git a/tests/context_builder/hash_NO_CONTEXT_minimal.json b/tests/context_builder/hash_NO_CONTEXT_minimal.json
index 9fab936d..bdec40d1 100644
--- a/tests/context_builder/hash_NO_CONTEXT_minimal.json
+++ b/tests/context_builder/hash_NO_CONTEXT_minimal.json
@@ -6,7 +6,10 @@
{
"id": "kb:hash-1",
"type": "uco-types:Hash",
- "uco-types:hashMethod": "SHA1",
+ "uco-types:hashMethod": {
+ "type": "vocabulary:HashNameVocab",
+ "value": "SHA1"
+ },
"uco-types:hashValue": "da39a3ee5e6b4b0d3255bfef95601890afd80709"
}
]
From be1317e4c409fed3baf262385e1b8edd1f5bde10 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 23 Aug 2022 11:34:04 -0400
Subject: [PATCH 36/47] Troubleshooting hash pytest by hardcoding values in the
context
---
.../hash_NO_CONTEXT_concise.json | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/tests/context_builder/hash_NO_CONTEXT_concise.json b/tests/context_builder/hash_NO_CONTEXT_concise.json
index 9a7187f3..807e3d95 100644
--- a/tests/context_builder/hash_NO_CONTEXT_concise.json
+++ b/tests/context_builder/hash_NO_CONTEXT_concise.json
@@ -1,5 +1,24 @@
{
"@context": {
+ "uco-types": "https://ontology.unifiedcyberontology.org/uco/types/",
+ "uco-co": "https://ontology.unifiedcyberontology.org/co/",
+ "uco-victim": "https://ontology.unifiedcyberontology.org/uco/victim/",
+ "uco-vocabulary": "https://ontology.unifiedcyberontology.org/uco/vocabulary/",
+ "xsd": "http://www.w3.org/2001/XMLSchema#",
+ "HashNameVocab" : {
+ "@id": "uco-vocabulary:HashNameVocab"
+ },
+ "hashValue": {
+ "@id": "uco-types:hashValue",
+ "@type": "xsd:hexBinary"
+ },
+ "hashMethod": {
+ "@id": "uco-types:hashMethod"
+ },
+ "value": "@value",
+ "graph": "@graph",
+ "type": "@type",
+ "id": "@id",
"kb": "http://example.org/kb/"
},
"graph": [
From b7ec029519521a809fb1a1f75686ab7a2847f8d8 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 23 Aug 2022 12:55:37 -0400
Subject: [PATCH 37/47] Adding classes for concise contexts
---
src/uco_jsonld_context_builder.py | 128 +++++++++++++++++++++++++++++-
1 file changed, 124 insertions(+), 4 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 10a4645d..589c2b2f 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -104,6 +104,32 @@ def get_concise_json(self) -> str:
return json_str
+class UCO_Class:
+ def __init__(self) -> None:
+ self.ns_prefix: typing.Optional[str] = None
+ self.root_class_name: typing.Optional[str] = None
+ self.prefixed_datatype_name: typing.Optional[str] = None
+ self.shacl_count_lte_1: typing.Optional[bool] = None
+ self.shacl_property_bnode = None
+
+ def __get_json(self, hdr: str) -> str:
+ json_str = hdr
+ json_str += f'\t"@id":"{self.ns_prefix}:{self.root_class_name}"'
+ json_str += "\n"
+ json_str += "},\n"
+ return json_str
+
+ def get_minimal_json(self) -> str:
+ hdr_str = f'"{self.ns_prefix}:{self.root_class_name}":{{\n'
+ json_str = self.__get_json(hdr=hdr_str)
+ return json_str
+
+ def get_concise_json(self) -> str:
+ hdr_str = f'"{self.root_class_name}":{{\n'
+ json_str = self.__get_json(hdr=hdr_str)
+ return json_str
+
+
class ContextBuilder:
def __init__(self) -> None:
self.ttl_file_list: typing.Optional[typing.List[pathlib.Path]] = None
@@ -111,8 +137,13 @@ def __init__(self) -> None:
self.top_srcdir: typing.Optional[pathlib.Path] = None
self.iri_dict: typing.Optional[typing.Dict[str, str]] = None
# TODO ERROR MITIGATION: These two dicts should be keyed by IRI (str() cast) rather than IRI fragment.
- self.datatype_properties_dict: typing.Dict[str, typing.List[DatatypePropertyInfo]] = dict()
- self.object_properties_dict: typing.Dict[str, typing.List[ObjectPropertyInfo]] = dict()
+ self.datatype_properties_dict: typing.Dict[
+ str, typing.List[DatatypePropertyInfo]
+ ] = dict()
+ self.object_properties_dict: typing.Dict[
+ str, typing.List[ObjectPropertyInfo]
+ ] = dict()
+ self.classes_dict: typing.Dict[str, typing.List[ObjectPropertyInfo]] = dict()
# The string that will hold the processed context
self.context_str = ""
@@ -125,7 +156,9 @@ def close_context_str(self) -> None:
self.context_str = self.context_str[:-1]
self.context_str += "\n\t}\n}"
- def get_ttl_files(self, subdirs: typing.List[str]=[]) -> typing.List[pathlib.Path]:
+ def get_ttl_files(
+ self, subdirs: typing.List[str] = []
+ ) -> typing.List[pathlib.Path]:
"""
Finds all turtle (.ttl) files in directory structure
@subdirs - Optional list used to restrict search to particular
@@ -237,6 +270,18 @@ def __process_DatatypePropertiesHelper(self, in_file: str) -> None:
"Make sure to do an itter that looks for rdflib.OWL.class"
# If we cannot find rdf range, skip
# If rdf range is a blank node, skip
+
+ # Troubleshooting loop
+ for triple in graph.triples(
+ # (None, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)
+ (None, rdflib.RDF.type, None)
+ ):
+ _logger.debug(f"Any: {triple}")
+
+ # Troubleshooting loop
+ for triple in graph.triples((None, None, rdflib.OWL.DatatypeProperty)):
+ _logger.debug(f"Any Owl DatatypeProperty: {triple}")
+
for triple in graph.triples(
(None, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)
):
@@ -287,6 +332,7 @@ def __process_DatatypePropertiesHelper(self, in_file: str) -> None:
def process_DatatypeProperties(self) -> None:
assert self.ttl_file_list is not None
for ttl_file in self.ttl_file_list:
+ _logger.debug(f"Datatype Processing for {str(ttl_file)}")
self.__process_DatatypePropertiesHelper(in_file=str(ttl_file))
def __process_ObjectPropertiesHelper(self, in_file: str) -> None:
@@ -296,7 +342,6 @@ def __process_ObjectPropertiesHelper(self, in_file: str) -> None:
"""
graph = rdflib.Graph()
graph.parse(in_file, format="turtle")
- # Make sure to do an iter that looks for rdflib.OWL.class"
# If we cannot find rdf range, skip
# If rdf range is a blank node, skip
for triple in graph.triples((None, rdflib.RDF.type, rdflib.OWL.ObjectProperty)):
@@ -334,11 +379,66 @@ def __process_ObjectPropertiesHelper(self, in_file: str) -> None:
self.object_properties_dict[root] = [op_obj]
return
+ def __process_ClassesHelper(self, in_file: str) -> None:
+ graph = rdflib.Graph()
+ graph.parse(in_file, format="turtle")
+ # Make sure to do an iter that looks for rdflib.OWL.class"
+ # If we cannot find rdf range, skip
+ # If rdf range is a blank node, skip
+ for triple in graph.triples((None, rdflib.RDF.type, rdflib.OWL.Class)):
+ # Skip Blank Nodes
+ if isinstance(triple[0], rdflib.term.BNode):
+ _logger.debug(f"\tBlank: {triple}\n")
+ continue
+ c_obj = UCO_Class()
+ # print(triple)
+ _logger.debug((triple))
+ # print(triple[0].split("/"))
+ s_triple = triple[0].split("/")
+ root = s_triple[-1]
+ ns_prefix = f"{s_triple[-3]}-{s_triple[-2]}"
+ # print(ns_prefix, root)
+ # print(root)
+ c_obj.ns_prefix = ns_prefix
+ c_obj.root_class_name = root
+
+ # for sh_triple in graph.triples((None, rdflib.SH.path, triple[0])):
+ # _logger.debug(f"\t**obj_sh_triple:{sh_triple}")
+ # op_obj.shacl_property_bnode = sh_triple[0]
+ # for sh_triple2 in graph.triples(
+ # (op_obj.shacl_property_bnode, rdflib.SH.maxCount, None)
+ # ):
+ # _logger.debug(f"\t\t***sh_triple:{sh_triple2}")
+ # _logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
+ # if int(sh_triple2[2]) <= 1:
+ # if op_obj.shacl_count_lte_1 is not None:
+ # _logger.debug(
+ # f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}"
+ # )
+ # op_obj.shacl_count_lte_1 = True
+ # else:
+ # _logger.debug(f"\t\t\t***Large max_count: {sh_triple2[2]}")
+
+ if root in self.classes_dict.keys():
+ _logger.debug(f"None Unique Entry Found:\t {ns_prefix}:{root}")
+ print(f"None Unique Entry Found:\t {ns_prefix}:{root}")
+ self.classes_dict[root].append(c_obj)
+ else:
+ self.classes_dict[root] = [c_obj]
+ return
+
def process_ObjectProperties(self) -> None:
assert self.ttl_file_list is not None
for ttl_file in self.ttl_file_list:
+ _logger.debug(f"ObjectProperty Processing for {str(ttl_file)}")
self.__process_ObjectPropertiesHelper(in_file=str(ttl_file))
+ def process_Classes(self) -> None:
+ assert self.ttl_file_list is not None
+ for ttl_file in self.ttl_file_list:
+ _logger.debug(f"Class Processing for {str(ttl_file)}")
+ self.__process_ClassesHelper(in_file=str(ttl_file))
+
def process_prefixes(self) -> None:
"""
Finds all prefix lines in list of ttl files. Adds them to an
@@ -416,6 +516,23 @@ def add_key_strings_to_cntxt(self) -> None:
self.context_str += ks_str
+ def add_concise_classes_to_cntxt(self) -> None:
+ """Adds classes to context string"""
+ c_sect_str = ""
+ c_list = list(self.classes_dict.keys())
+ c_list.sort()
+
+ for key in c_list:
+ if len(self.classes_dict[key]) > 1:
+ # print(f"M:{self.classes_dict[key]}")
+ for c_obj in self.classes_dict[key]:
+ c_sect_str += c_obj.get_minimal_json()
+ else:
+ # print(f"S:{self.classes_dict[key]}")
+ for c_obj in self.classes_dict[key]:
+ c_sect_str += c_obj.get_concise_json()
+ self.context_str += c_sect_str
+
def main() -> None:
argument_parser = argparse.ArgumentParser()
@@ -452,6 +569,9 @@ def main() -> None:
cb.init_context_str()
cb.add_prefixes_to_cntxt()
if args.concise:
+ # Note there is classes are not in minimal context
+ cb.process_Classes()
+ cb.add_concise_classes_to_cntxt()
cb.add_concise_object_props_to_cntxt()
cb.add_concise_datatype_props_to_cntxt()
else:
From 17bd26640f6c658adf949bc1c0d949a6860b8d3b Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Tue, 23 Aug 2022 13:05:44 -0400
Subject: [PATCH 38/47] Add needed class designation
Signed-off-by: Alex Nelson
---
tests/context_builder/hash_NO_CONTEXT_concise.json | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tests/context_builder/hash_NO_CONTEXT_concise.json b/tests/context_builder/hash_NO_CONTEXT_concise.json
index 807e3d95..781a3af1 100644
--- a/tests/context_builder/hash_NO_CONTEXT_concise.json
+++ b/tests/context_builder/hash_NO_CONTEXT_concise.json
@@ -5,6 +5,9 @@
"uco-victim": "https://ontology.unifiedcyberontology.org/uco/victim/",
"uco-vocabulary": "https://ontology.unifiedcyberontology.org/uco/vocabulary/",
"xsd": "http://www.w3.org/2001/XMLSchema#",
+ "Hash" : {
+ "@id": "uco-types:Hash"
+ },
"HashNameVocab" : {
"@id": "uco-vocabulary:HashNameVocab"
},
From e43cfea283796efc34e45ccdcbc4cd62c1ffdf7b Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Tue, 23 Aug 2022 13:31:45 -0400
Subject: [PATCH 39/47] Fix prefix
Signed-off-by: Alex Nelson
---
tests/context_builder/hash_NO_CONTEXT_minimal.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/context_builder/hash_NO_CONTEXT_minimal.json b/tests/context_builder/hash_NO_CONTEXT_minimal.json
index bdec40d1..b64beabb 100644
--- a/tests/context_builder/hash_NO_CONTEXT_minimal.json
+++ b/tests/context_builder/hash_NO_CONTEXT_minimal.json
@@ -7,7 +7,7 @@
"id": "kb:hash-1",
"type": "uco-types:Hash",
"uco-types:hashMethod": {
- "type": "vocabulary:HashNameVocab",
+ "type": "uco-vocabulary:HashNameVocab",
"value": "SHA1"
},
"uco-types:hashValue": "da39a3ee5e6b4b0d3255bfef95601890afd80709"
From 474d91ed4206f250a8a20a25de53bcf8b5101adf Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 23 Aug 2022 13:33:25 -0400
Subject: [PATCH 40/47] Adds UCO Class processing to context builder
---
src/uco_jsonld_context_builder.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 589c2b2f..3aa19639 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -49,6 +49,7 @@ def __init__(self) -> None:
def __get_json(self, hdr: str) -> str:
json_str = hdr
+ json_str += f'\t"@id":"{self.ns_prefix}:{self.root_class_name}",\n'
json_str += '\t"@type":"@id"'
if self.shacl_count_lte_1 is not True:
json_str += ',\n\t"@container":"@set"\n'
From df5620feb431c0f697c4a4657ef75224bb003e88 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 23 Aug 2022 14:45:16 -0400
Subject: [PATCH 41/47] Adds datatype processing to context builder
---
src/uco_jsonld_context_builder.py | 107 ++++++++++++++++++++++++------
1 file changed, 86 insertions(+), 21 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 3aa19639..a66923ae 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -109,9 +109,35 @@ class UCO_Class:
def __init__(self) -> None:
self.ns_prefix: typing.Optional[str] = None
self.root_class_name: typing.Optional[str] = None
- self.prefixed_datatype_name: typing.Optional[str] = None
- self.shacl_count_lte_1: typing.Optional[bool] = None
- self.shacl_property_bnode = None
+ # self.prefixed_datatype_name: typing.Optional[str] = None
+ # self.shacl_count_lte_1: typing.Optional[bool] = None
+ # self.shacl_property_bnode = None
+
+ def __get_json(self, hdr: str) -> str:
+ json_str = hdr
+ json_str += f'\t"@id":"{self.ns_prefix}:{self.root_class_name}"'
+ json_str += "\n"
+ json_str += "},\n"
+ return json_str
+
+ def get_minimal_json(self) -> str:
+ hdr_str = f'"{self.ns_prefix}:{self.root_class_name}":{{\n'
+ json_str = self.__get_json(hdr=hdr_str)
+ return json_str
+
+ def get_concise_json(self) -> str:
+ hdr_str = f'"{self.root_class_name}":{{\n'
+ json_str = self.__get_json(hdr=hdr_str)
+ return json_str
+
+
+class DataType:
+ def __init__(self) -> None:
+ self.ns_prefix: typing.Optional[str] = None
+ self.root_class_name: typing.Optional[str] = None
+ # self.prefixed_datatype_name: typing.Optional[str] = None
+ # self.shacl_count_lte_1: typing.Optional[bool] = None
+ # self.shacl_property_bnode = None
def __get_json(self, hdr: str) -> str:
json_str = hdr
@@ -144,7 +170,8 @@ def __init__(self) -> None:
self.object_properties_dict: typing.Dict[
str, typing.List[ObjectPropertyInfo]
] = dict()
- self.classes_dict: typing.Dict[str, typing.List[ObjectPropertyInfo]] = dict()
+ self.classes_dict: typing.Dict[str, typing.List[UCO_Class]] = dict()
+ self.datatypes_dict: typing.Dict[str, typing.List[DataType]] = dict()
# The string that will hold the processed context
self.context_str = ""
@@ -403,23 +430,6 @@ def __process_ClassesHelper(self, in_file: str) -> None:
c_obj.ns_prefix = ns_prefix
c_obj.root_class_name = root
- # for sh_triple in graph.triples((None, rdflib.SH.path, triple[0])):
- # _logger.debug(f"\t**obj_sh_triple:{sh_triple}")
- # op_obj.shacl_property_bnode = sh_triple[0]
- # for sh_triple2 in graph.triples(
- # (op_obj.shacl_property_bnode, rdflib.SH.maxCount, None)
- # ):
- # _logger.debug(f"\t\t***sh_triple:{sh_triple2}")
- # _logger.debug(f"\t\t***sh_triple:{sh_triple2[2]}")
- # if int(sh_triple2[2]) <= 1:
- # if op_obj.shacl_count_lte_1 is not None:
- # _logger.debug(
- # f"\t\t\t**MaxCount Double Definition? {triple[0].n3(graph.namespace_manager)}"
- # )
- # op_obj.shacl_count_lte_1 = True
- # else:
- # _logger.debug(f"\t\t\t***Large max_count: {sh_triple2[2]}")
-
if root in self.classes_dict.keys():
_logger.debug(f"None Unique Entry Found:\t {ns_prefix}:{root}")
print(f"None Unique Entry Found:\t {ns_prefix}:{root}")
@@ -440,6 +450,42 @@ def process_Classes(self) -> None:
_logger.debug(f"Class Processing for {str(ttl_file)}")
self.__process_ClassesHelper(in_file=str(ttl_file))
+ def __process_DataTypesHelper(self, in_file: str) -> None:
+ graph = rdflib.Graph()
+ graph.parse(in_file, format="turtle")
+ # Make sure to do an iter that looks for rdflib.OWL.class"
+ # If we cannot find rdf range, skip
+ # If rdf range is a blank node, skip
+ for triple in graph.triples((None, rdflib.RDF.type, rdflib.RDFS.Datatype)):
+ # Skip Blank Nodes
+ if isinstance(triple[0], rdflib.term.BNode):
+ _logger.debug(f"\tBlank: {triple}\n")
+ continue
+ dt_obj = DataType()
+ # print(triple)
+ _logger.debug((triple))
+ # print(triple[0].split("/"))
+ s_triple = triple[0].split("/")
+ root = s_triple[-1]
+ ns_prefix = f"{s_triple[-3]}-{s_triple[-2]}"
+ # print(ns_prefix, root)
+ # print(root)
+ dt_obj.ns_prefix = ns_prefix
+ dt_obj.root_class_name = root
+
+ if root in self.datatypes_dict.keys():
+ _logger.debug(f"None Unique Entry Found:\t {ns_prefix}:{root}")
+ self.datatypes_dict[root].append(dt_obj)
+ else:
+ self.datatypes_dict[root] = [dt_obj]
+ return
+
+ def process_DataTypes(self) -> None:
+ assert self.ttl_file_list is not None
+ for ttl_file in self.ttl_file_list:
+ _logger.debug(f"DataType Processing for {str(ttl_file)}")
+ self.__process_DataTypesHelper(in_file=str(ttl_file))
+
def process_prefixes(self) -> None:
"""
Finds all prefix lines in list of ttl files. Adds them to an
@@ -534,6 +580,23 @@ def add_concise_classes_to_cntxt(self) -> None:
c_sect_str += c_obj.get_concise_json()
self.context_str += c_sect_str
+ def add_concise_datatypes_to_cntxt(self) -> None:
+ """Adds classes to context string"""
+ dt_sect_str = ""
+ dt_list = list(self.datatypes_dict.keys())
+ dt_list.sort()
+
+ for key in dt_list:
+ if len(self.datatypes_dict[key]) > 1:
+ # print(f"M:{self.classes_dict[key]}")
+ for dt_obj in self.datatypes_dict[key]:
+ dt_sect_str += dt_obj.get_minimal_json()
+ else:
+ # print(f"S:{self.classes_dict[key]}")
+ for dt_obj in self.datatypes_dict[key]:
+ dt_sect_str += dt_obj.get_concise_json()
+ self.context_str += dt_sect_str
+
def main() -> None:
argument_parser = argparse.ArgumentParser()
@@ -572,7 +635,9 @@ def main() -> None:
if args.concise:
# Note there is classes are not in minimal context
cb.process_Classes()
+ cb.process_DataTypes()
cb.add_concise_classes_to_cntxt()
+ cb.add_concise_datatypes_to_cntxt()
cb.add_concise_object_props_to_cntxt()
cb.add_concise_datatype_props_to_cntxt()
else:
From a594c1ec4575999a8e0692208b26bc6dbebb0c63 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 23 Aug 2022 14:46:11 -0400
Subject: [PATCH 42/47] Takes out cheats in context
---
.../hash_NO_CONTEXT_concise.json | 22 -------------------
1 file changed, 22 deletions(-)
diff --git a/tests/context_builder/hash_NO_CONTEXT_concise.json b/tests/context_builder/hash_NO_CONTEXT_concise.json
index 781a3af1..9a7187f3 100644
--- a/tests/context_builder/hash_NO_CONTEXT_concise.json
+++ b/tests/context_builder/hash_NO_CONTEXT_concise.json
@@ -1,27 +1,5 @@
{
"@context": {
- "uco-types": "https://ontology.unifiedcyberontology.org/uco/types/",
- "uco-co": "https://ontology.unifiedcyberontology.org/co/",
- "uco-victim": "https://ontology.unifiedcyberontology.org/uco/victim/",
- "uco-vocabulary": "https://ontology.unifiedcyberontology.org/uco/vocabulary/",
- "xsd": "http://www.w3.org/2001/XMLSchema#",
- "Hash" : {
- "@id": "uco-types:Hash"
- },
- "HashNameVocab" : {
- "@id": "uco-vocabulary:HashNameVocab"
- },
- "hashValue": {
- "@id": "uco-types:hashValue",
- "@type": "xsd:hexBinary"
- },
- "hashMethod": {
- "@id": "uco-types:hashMethod"
- },
- "value": "@value",
- "graph": "@graph",
- "type": "@type",
- "id": "@id",
"kb": "http://example.org/kb/"
},
"graph": [
From cb0ca0365498466420bead928b3e4180a6532b13 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Tue, 23 Aug 2022 15:12:32 -0400
Subject: [PATCH 43/47] Adjust test framework to parameterize for
context-independent file
Signed-off-by: Alex Nelson
---
tests/context_builder/test_context.py | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index e60678c1..332857e5 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -74,9 +74,13 @@ def test_action_context_minimal() -> None:
_test_action_graph_context_query("action_result_NO_CONTEXT_minimal.json", "context-minimal.json")
-def _test_hash_graph_context_query(input_graph_file: str, input_context_file: str) -> None:
+def _test_graph_context_independent_match(input_dependent_graph_file: str, input_context_file: str, input_independent_graph_file: str) -> None:
"""
- Run an exact-parse-match test.
+ Run an exact-parse-match test, confirming that the triples found in a file that does not depend on a context dictionary matches a JSON-LD file that does depend on a context dictionary.
+
+ :param input_dependent_graph_file: File that depends on externally-supplied context dictionary to function.
+ :param input_context_file: Context dictionary file.
+ :param input_independent_graph_file: File that does not depend on externally-supplied context dictionary to function.
"""
expected: Set[Tuple[Node, Node, Nonde]] = set()
@@ -85,13 +89,13 @@ def _test_hash_graph_context_query(input_graph_file: str, input_context_file: st
expected_graph = Graph()
computed_graph = Graph()
- expected_graph.parse("hash_expanded.json")
+ expected_graph.parse(input_independent_graph_file)
context_object: Dict[str, Any]
with open(input_context_file, "r") as context_fh:
context_object = json.load(context_fh)
- computed_graph.parse(input_graph_file, context=context_object)
+ computed_graph.parse(input_dependent_graph_file, context=context_object)
for expected_triple in expected_graph:
expected.add(expected_triple)
@@ -103,11 +107,11 @@ def _test_hash_graph_context_query(input_graph_file: str, input_context_file: st
def test_hash_context_concise() -> None:
- _test_hash_graph_context_query("hash_NO_CONTEXT_concise.json", "context-concise.json")
+ _test_graph_context_independent_match("hash_NO_CONTEXT_concise.json", "context-concise.json", "hash_expanded.json")
def test_hash_context_minimal() -> None:
- _test_hash_graph_context_query("hash_NO_CONTEXT_minimal.json", "context-minimal.json")
+ _test_graph_context_independent_match("hash_NO_CONTEXT_minimal.json", "context-minimal.json", "hash_expanded.json")
#def test_context_concise2() -> None:
From c8ce67fb35ee79028a55449fcf2bf35af49fda7d Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Tue, 23 Aug 2022 15:13:59 -0400
Subject: [PATCH 44/47] Exercise against sample that uses imported, non-UCO
classes
Signed-off-by: Alex Nelson
---
tests/context_builder/test_context.py | 8 +++++
.../thread_NO_CONTEXT_concise.json | 34 +++++++++++++++++++
.../thread_NO_CONTEXT_minimal.json | 34 +++++++++++++++++++
3 files changed, 76 insertions(+)
create mode 100644 tests/context_builder/thread_NO_CONTEXT_concise.json
create mode 100644 tests/context_builder/thread_NO_CONTEXT_minimal.json
diff --git a/tests/context_builder/test_context.py b/tests/context_builder/test_context.py
index 332857e5..4ba86b9d 100644
--- a/tests/context_builder/test_context.py
+++ b/tests/context_builder/test_context.py
@@ -114,6 +114,14 @@ def test_hash_context_minimal() -> None:
_test_graph_context_independent_match("hash_NO_CONTEXT_minimal.json", "context-minimal.json", "hash_expanded.json")
+def test_thread_context_concise() -> None:
+ _test_graph_context_independent_match("thread_NO_CONTEXT_concise.json", "context-concise.json", "../examples/thread_PASS.json")
+
+
+def test_thread_context_minimal() -> None:
+ _test_graph_context_independent_match("thread_NO_CONTEXT_minimal.json", "context-minimal.json", "../examples/thread_PASS.json")
+
+
#def test_context_concise2() -> None:
# _test_graph_context_query("action_result_concise_NO_CONTEXT.json", "context-concise.json")
diff --git a/tests/context_builder/thread_NO_CONTEXT_concise.json b/tests/context_builder/thread_NO_CONTEXT_concise.json
new file mode 100644
index 00000000..251875fe
--- /dev/null
+++ b/tests/context_builder/thread_NO_CONTEXT_concise.json
@@ -0,0 +1,34 @@
+{
+ "@context": {
+ "kb": "http://example.org/kb/"
+ },
+ "graph": [
+ {
+ "id": "kb:thread-1",
+ "type": "Thread",
+ "item": [
+ "kb:thread-1-item-1",
+ "kb:thread-1-item-2",
+ "kb:thread-1-item-3"
+ ]
+ },
+ {
+ "id": "kb:thread-1-item-1",
+ "type": "ThreadItem",
+ "threadNextItem": [
+ "kb:thread-1-item-2",
+ "kb:thread-1-item-3"
+ ]
+ },
+ {
+ "id": "kb:thread-1-item-2",
+ "type": "ThreadItem",
+ "threadPreviousItem": "kb:thread-1-item-1"
+ },
+ {
+ "id": "kb:thread-1-item-3",
+ "type": "ThreadItem",
+ "threadPreviousItem": "kb:thread-1-item-1"
+ }
+ ]
+}
diff --git a/tests/context_builder/thread_NO_CONTEXT_minimal.json b/tests/context_builder/thread_NO_CONTEXT_minimal.json
new file mode 100644
index 00000000..0014e0d2
--- /dev/null
+++ b/tests/context_builder/thread_NO_CONTEXT_minimal.json
@@ -0,0 +1,34 @@
+{
+ "@context": {
+ "kb": "http://example.org/kb/"
+ },
+ "graph": [
+ {
+ "id": "kb:thread-1",
+ "type": "types:Thread",
+ "co:item": [
+ "kb:thread-1-item-1",
+ "kb:thread-1-item-2",
+ "kb:thread-1-item-3"
+ ]
+ },
+ {
+ "id": "kb:thread-1-item-1",
+ "type": "types:ThreadItem",
+ "types:threadNextItem": [
+ "kb:thread-1-item-2",
+ "kb:thread-1-item-3"
+ ]
+ },
+ {
+ "id": "kb:thread-1-item-2",
+ "type": "types:ThreadItem",
+ "types:threadPreviousItem": "kb:thread-1-item-1"
+ },
+ {
+ "id": "kb:thread-1-item-3",
+ "type": "types:ThreadItem",
+ "types:threadPreviousItem": "kb:thread-1-item-1"
+ }
+ ]
+}
From 937bad0983dff61eb823fe97ba134ad9067fd271 Mon Sep 17 00:00:00 2001
From: Alex Nelson
Date: Tue, 23 Aug 2022 15:31:30 -0400
Subject: [PATCH 45/47] Expand search for class IRIs
This is to account for classes that UCO imports from external
ontologies.
This patch is necessary, but insufficient, to fix the thread test.
Signed-off-by: Alex Nelson
---
src/uco_jsonld_context_builder.py | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index a66923ae..4196a110 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -410,14 +410,19 @@ def __process_ObjectPropertiesHelper(self, in_file: str) -> None:
def __process_ClassesHelper(self, in_file: str) -> None:
graph = rdflib.Graph()
graph.parse(in_file, format="turtle")
- # Make sure to do an iter that looks for rdflib.OWL.class"
- # If we cannot find rdf range, skip
- # If rdf range is a blank node, skip
+ # Populate with an iter that looks for rdflib.OWL.class, and then for participation in subclassing.
+ all_class_iris: typing.Set[rdflib.URIRef] = set()
for triple in graph.triples((None, rdflib.RDF.type, rdflib.OWL.Class)):
# Skip Blank Nodes
- if isinstance(triple[0], rdflib.term.BNode):
- _logger.debug(f"\tBlank: {triple}\n")
- continue
+ if isinstance(triple[0], rdflib.URIRef):
+ all_class_iris.add(triple[0])
+ for triple in graph.triples((None, rdflib.RDFS.subClassOf, None)):
+ # Skip Blank Nodes
+ if isinstance(triple[0], rdflib.URIRef):
+ all_class_iris.add(triple[0])
+ if isinstance(triple[2], rdflib.URIRef):
+ all_class_iris.add(triple[2])
+ for class_iri in all_class_iris:
c_obj = UCO_Class()
# print(triple)
_logger.debug((triple))
From 6f03b0dba64ecff844fbf1be298dc95e407a4fe7 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Tue, 23 Aug 2022 21:27:49 -0400
Subject: [PATCH 46/47] Fixes a bug in class_iri processing to pass concise
hash test
---
src/uco_jsonld_context_builder.py | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/src/uco_jsonld_context_builder.py b/src/uco_jsonld_context_builder.py
index 4196a110..bcaeb620 100644
--- a/src/uco_jsonld_context_builder.py
+++ b/src/uco_jsonld_context_builder.py
@@ -424,10 +424,9 @@ def __process_ClassesHelper(self, in_file: str) -> None:
all_class_iris.add(triple[2])
for class_iri in all_class_iris:
c_obj = UCO_Class()
- # print(triple)
- _logger.debug((triple))
- # print(triple[0].split("/"))
- s_triple = triple[0].split("/")
+ _logger.debug((class_iri))
+ # print(class_iri.split("/"))
+ s_triple = class_iri.split("/")
root = s_triple[-1]
ns_prefix = f"{s_triple[-3]}-{s_triple[-2]}"
# print(ns_prefix, root)
@@ -437,7 +436,7 @@ def __process_ClassesHelper(self, in_file: str) -> None:
if root in self.classes_dict.keys():
_logger.debug(f"None Unique Entry Found:\t {ns_prefix}:{root}")
- print(f"None Unique Entry Found:\t {ns_prefix}:{root}")
+ # print(f"None Unique Entry Found:\t {ns_prefix}:{root}")
self.classes_dict[root].append(c_obj)
else:
self.classes_dict[root] = [c_obj]
From f7be4e521655e3999d0bec3f76c017d53398a592 Mon Sep 17 00:00:00 2001
From: Kevin Fairbanks <1482470+kfairbanks@users.noreply.github.com>
Date: Wed, 24 Aug 2022 09:30:43 -0400
Subject: [PATCH 47/47] Adds datatypes to minimal context
---
src/uco-json-ld-compaction.py | 253 ++++++++++++++++++++++++++++++++++
1 file changed, 253 insertions(+)
create mode 100644 src/uco-json-ld-compaction.py
diff --git a/src/uco-json-ld-compaction.py b/src/uco-json-ld-compaction.py
new file mode 100644
index 00000000..b3525be4
--- /dev/null
+++ b/src/uco-json-ld-compaction.py
@@ -0,0 +1,253 @@
+#
+# Release Statement?
+#
+
+"""
+Purpose statement
+
+1) json-ld context to support compaction of all IRI base paths through defined
+ prefixes
+2) json-ld context to support compaction of all property type assertions
+3) json-ld context to support assertion of properties with potential
+ cardinalities >1 as set arrrays
+4) json-ld context to support compaction of json-ld specific key strings @id,
+ @type, @value and @graph to simple json key strings id, type, value, and graph such that the body of content can be viewed as simple json and the context can be utilized to expand it into fully codified json-ld
+
+"""
+
+__version__ = "0.0.1"
+
+import argparse
+import logging
+from multiprocessing import context
+import os
+import typing
+import pathlib
+import sys
+import re
+import rdflib
+
+_logger = logging.getLogger(os.path.basename(__file__))
+
+"""
+ 27 def main():
+ 28 g = rdflib.Graph()
+ 29 for in_graph in args.in_graph:
+ 30 g.parse(in_graph, format="turtle")
+ 31 g.serialize(args.out_graph, format="turtle")
+"""
+
+class context_builder:
+ def __init__(self):
+ self.ttl_file_list=None
+ self.prefix_dict=None
+ self.top_srcdir=None
+ self.iri_dict=None
+ self.datatype_properties_dict={}
+
+ def get_ttl_files(self, subdirs=[]) -> list:
+ """
+ Finds all turtle (.ttl) files in directory structure
+ @subdirs - Optional list used to restrict search to particular directories.
+ """
+ if self.ttl_file_list is not None:
+ return self.ttl_file_list
+
+ #Shamelessly stolen from populate_node_kind.py
+ # 0. Self-orient.
+ self.top_srcdir = pathlib.Path(os.path.dirname(__file__)) / ".."
+ top_srcdir=self.top_srcdir
+ # Sanity check.
+ assert (top_srcdir / ".git").exists(), "Hard-coded top_srcdir discovery is no longer correct."
+
+ # 1. Load all ontology files into dictionary of graphs.
+
+ # The extra filtering step loop to keep from picking up CI files. Path.glob returns dot files, unlike shell's glob.
+ # The uco.ttl file is also skipped because the Python output removes supplementary prefix statements.
+ ontology_filepaths : typing.List[pathlib.Path] = []
+
+ file_list=[]
+ _logger.debug(top_srcdir)
+
+ if len(subdirs) < 1:
+ for x in (top_srcdir).rglob("*.ttl"):
+ if ".check-" in str(x):
+ continue
+ if "uco.ttl" in str(x):
+ continue
+ #_logger.debug(x)
+ file_list.append(x)
+ self.ttl_file_list=file_list
+ else:
+ for dir in subdirs:
+ for x in (top_srcdir / dir).rglob("*.ttl"):
+ if ".check-" in str(x):
+ continue
+ if "uco.ttl" in str(x):
+ continue
+ #_logger.debug(x)
+ file_list.append(x)
+ self.ttl_file_list=file_list
+
+ return self.ttl_file_list
+
+ def get_iris(self)->list:
+ """
+ Returns sorted list of IRIs
+ """
+ k_list=list(self.iri_dict.keys())
+ #print(k_list)
+ k_list.sort()
+ irs_list=[]
+ for k in k_list:
+ #print(f"\"{k}\":{self.iri_dict[k]}")
+ irs_list.append(f"\"{k}\":{self.iri_dict[k]}")
+ return irs_list
+
+ def __add_to_iri_dict(self, in_prefix):
+ """INTERNAL function: Adds unique key value pairs to dict
+ that will be used to generate context. Dies if inconsistent
+ key value pair is found.
+ @in_prefix - an input prefix triple
+ """
+ if self.iri_dict is None:
+ self.iri_dict={}
+
+ iri_dict = self.iri_dict
+ t_split=in_prefix.split()
+ #Taking the ':' off the end of the key
+ k=t_split[1][:-1]
+ v=t_split[2]
+ if k in iri_dict.keys():
+ #_logger.debug(f"'{k}' already exists")
+ if iri_dict[k]!=v:
+ _logger.error(f"Mismatched values:\t{iri_dict[k]}!={v}")
+ sys.exit()
+ else:
+ iri_dict[k]=v
+
+ def __process_DatatypePropertiesHelper(self, in_file=None):
+ """
+ Does the actual work using rdflib
+ @in_file - ttl file to get object properties from
+ """
+ graph = rdflib.Graph()
+ graph.parse(in_file, format="turtle")
+ "Make sure to do an itter that looks for rdflib.OWL.class"
+ #limit = 4
+ #count = 0
+ for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
+ print(triple)
+ print(triple[0].split('/'))
+ s_triple=triple[0].split('/')
+ root=s_triple[-1]
+ ns_prefix=f"{s_triple[-3]}-{s_triple[-2]}"
+ print(ns_prefix, root)
+
+ if root in self.datatype_properties_dict.keys():
+ print(f"None Unique Entry Found:\t {ns_prefix}:{root}")
+ self.datatype_properties_dict[root].append(ns_prefix)
+ else:
+ self.datatype_properties_dict[root]=[ns_prefix]
+
+ return
+ #count += 1
+ #if count >= limit:
+ # return
+
+ def process_DatatypeProperties(self):
+ for ttl_file in self.ttl_file_list:
+ self.__process_DatatypePropertiesHelper(in_file=ttl_file)
+
+ def get_prefixes(self):
+ """
+ Finds all prefix lines in list of ttl files. Adds them to an
+ an internal dict
+ """
+ ttl_file_list = self.get_ttl_files()
+ if len(ttl_file_list) < 1:
+ _logger.error("No ttls files to process")
+ sys.exit()
+
+ for ttl_file in ttl_file_list:
+ with open(ttl_file,'r') as file:
+ for line in file:
+ if re.search("^\@prefix",line):
+ #_logger.debug(line.strip())
+ self.__add_to_iri_dict(in_prefix=line.strip())
+
+
+
+def main():
+ argument_parser = argparse.ArgumentParser()
+ argument_parser.add_argument('--debug', action="store_true")
+ #argument_parser.add_argument('-i', '--in_graph', help="Input graph to be simplified")
+ args = argument_parser.parse_args()
+
+ logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
+
+ _logger.debug("Debug Mode enabled")
+
+ cb = context_builder()
+ for i in (cb.get_ttl_files(subdirs=['ontology'])):
+ _logger.debug(f" Input ttl: {i}")
+
+ cb.get_prefixes()
+ #for i in cb.get_iris():
+ # print(i)
+
+ cb.process_DatatypeProperties()
+
+"""
+If we cannot find rdf range, skip
+if rdf range is a blank node, skip
+"""
+ dt_list = list(cb.datatype_properties_dict.keys())
+ dt_list.sort()
+ for key in dt_list:
+ #Non-unique roots
+ if len(cb.datatype_properties_dict[key]) > 1:
+ print(f"{key}:{cb.datatype_properties_dict[key]}")
+ for ns in cb.datatype_properties_dict[key]:
+ con_str=f"\"{ns}:{key}\":{{"
+ con_str+="\n\t\"@id\":\"%s:%s\"," % (ns,key)
+ con_str+="\n\t\"@type\":\"@id\""
+ con_str+="\n\t},"
+ print(con_str)
+ #Unique roots
+ else:
+ pass
+
+ #from pprint import pprint
+ #pprint(cb.datatype_properties_dict)
+ graph = rdflib.Graph()
+ graph.parse("../tests/uco_monolithic.ttl", format="turtle")
+ graph.serialize("_uco_monolithic.json-ld", format="json-ld")
+ graph.serialize("_uco_monolithic.json-ld", format="json-ld")
+ sys.exit()
+ #context keyword in graph parse and graph serialize
+ #black formater FLAKE8 for isort
+ #check the case-uilities python
+
+
+
+ graph = rdflib.Graph()
+ graph.parse("../tests/uco_monolithic.ttl", format="turtle")
+ "Make sure to do an itter that looks for rdflib.OWL.class"
+ limit = 4
+ count = 0
+ for triple in graph.triples((None,rdflib.RDF.type,rdflib.OWL.DatatypeProperty)):
+ print(triple[0].fragment)
+ print(triple)
+ count += 1
+ if count >= limit:
+ sys.exit()
+
+ #print(f"{args.in_graph}")
+ #g = rdflib.Graph()
+ #g.parse(args.in_graph, format="turtle")
+ #g.serialize("temp.json-ld", format="json-ld")
+
+
+if __name__ == "__main__":
+ main()