changeset 1659:15e84c71beee

parse lists from graph into the Chunks
author drewp@bigasterisk.com
date Sun, 19 Sep 2021 14:42:39 -0700
parents 7ec2483d61b5
children 31f7dab6a60b
files service/mqtt_to_rdf/inference_test.py service/mqtt_to_rdf/stmt_chunk.py service/mqtt_to_rdf/stmt_chunk_test.py
diffstat 3 files changed, 71 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/service/mqtt_to_rdf/inference_test.py	Sun Sep 19 13:33:10 2021 -0700
+++ b/service/mqtt_to_rdf/inference_test.py	Sun Sep 19 14:42:39 2021 -0700
@@ -27,6 +27,7 @@
 @prefix ex: <http://example.com/> .
 @prefix room: <http://projects.bigasterisk.com/room/> .
 @prefix math: <http://www.w3.org/2000/10/swap/math#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
 """
     g.parse(StringInputSource((prefix + txt).encode('utf8')), format='n3')
     return g
--- a/service/mqtt_to_rdf/stmt_chunk.py	Sun Sep 19 13:33:10 2021 -0700
+++ b/service/mqtt_to_rdf/stmt_chunk.py	Sun Sep 19 14:42:39 2021 -0700
@@ -1,9 +1,10 @@
 import itertools
 import logging
 from dataclasses import dataclass
-from typing import Iterable, Iterator, List, Optional, Set, cast
+from typing import Iterable, Iterator, List, Optional, Set, Tuple, cast
 
 from rdflib.graph import Graph
+from rdflib.namespace import RDF
 from rdflib.term import BNode, Literal, Node, URIRef, Variable
 
 from candidate_binding import CandidateBinding
@@ -26,11 +27,14 @@
     Also a function call in a rule is always contained in exactly one chunk.
     """
     # all immutable
-    primary: Triple
+    primary: Tuple[Optional[Node], Node, Optional[Node]]
     subjList: Optional[List[Node]] = None
     objList: Optional[List[Node]] = None
 
     def __post_init__(self):
+        if not (((self.primary[0] is not None) ^ (self.subjList is not None)) and
+                ((self.primary[2] is not None) ^ (self.objList is not None))):
+            raise TypeError("invalid chunk init")
         self.predicate = self.primary[1]
         self.sortKey = (self.primary, tuple(self.subjList or []), tuple(self.objList or []))
 
@@ -40,10 +44,6 @@
     def __gt__(self, other):
         return self.sortKey > other.sortKey
 
-    @classmethod
-    def splitGraphIntoChunks(cls, graph: Graph) -> Iterator['Chunk']:
-        for stmt in graph:
-            yield cls(primary=stmt, subjList=None, objList=None)
 
     def totalBindingIfThisStmtWereTrue(self, prevBindings: CandidateBinding, proposed: 'Chunk') -> CandidateBinding:
         outBinding = prevBindings.copy()
@@ -74,7 +74,9 @@
         return True
 
     def __repr__(self):
-        return graphDump([self.primary]) + (''.join('+%s' % obj for obj in self.objList) if self.objList else '')
+        pre = ('+'.join('%s' % elem for elem in self.subjList) + '+' if self.subjList else '')
+        post = ('+' + '+'.join('%s' % elem for elem in self.objList) if self.objList else '')
+        return pre + repr(self.primary) + post
 
     def isFunctionCall(self, functionsFor) -> bool:
         return bool(list(functionsFor(cast(URIRef, self.predicate))))
@@ -89,7 +91,7 @@
 
 
 def _termIsStatic(term: Node) -> bool:
-    return isinstance(term, (URIRef, Literal))
+    return isinstance(term, (URIRef, Literal)) or term is None
 
 
 def applyChunky(cb: CandidateBinding, g: Iterable[Chunk], returnBoundStatementsOnly=True) -> Iterator[Chunk]:
@@ -124,7 +126,36 @@
         self.chunksUsedByFuncs: Set[Chunk] = set()
         self.staticChunks: Set[Chunk] = set()
         self.patternChunks: Set[Chunk] = set()
-        for c in Chunk.splitGraphIntoChunks(graph):
+
+        firstNodes = {}
+        restNodes = {}
+        graphStmts = set()
+        for s, p, o in graph:
+            if p == RDF['first']:
+                firstNodes[s] = o
+            elif p == RDF['rest']:
+                restNodes[s] = o
+            else:
+                graphStmts.add((s, p, o))
+
+        def gatherList(start):
+            lst = []
+            cur = start
+            while cur != RDF['nil']:
+                lst.append(firstNodes[cur])
+                cur = restNodes[cur]
+            return lst
+
+        for s, p, o in graphStmts:
+            subjList = objList = None
+            if s in firstNodes:
+                subjList = gatherList(s)
+                s = None
+            if o in firstNodes:
+                objList = gatherList(o)
+                o = None
+            c = Chunk((s, p, o), subjList=subjList, objList=objList)
+
             if c.isFunctionCall(functionsFor):
                 self.chunksUsedByFuncs.add(c)
             elif c.isStatic():
--- a/service/mqtt_to_rdf/stmt_chunk_test.py	Sun Sep 19 13:33:10 2021 -0700
+++ b/service/mqtt_to_rdf/stmt_chunk_test.py	Sun Sep 19 14:42:39 2021 -0700
@@ -1,3 +1,4 @@
+from time import clock_gettime
 import unittest
 
 from rdflib.term import Variable
@@ -41,6 +42,35 @@
         self.assertFalse(cg.noPredicatesAppear([ROOM.b, ROOM.d]))
 
 
+class TestListCollection(unittest.TestCase):
+
+    def testSubjList(self):
+        cg = ChunkedGraph(N3('(:u :v) :b :c .'), functionsFor)
+        expected = Chunk((None, ROOM.b, ROOM.c), subjList=[ROOM.u, ROOM.v])
+        self.assertEqual(cg.staticChunks, set([expected]))
+
+    def testObjList(self):
+        cg = ChunkedGraph(N3(':a :b (:u :v) .'), functionsFor)
+        expected = Chunk((ROOM.a, ROOM.b, None), objList=[ROOM.u, ROOM.v])
+        self.assertSetEqual(cg.staticChunks, set([expected]))
+
+    def testVariableInListMakesAPatternChunk(self):
+        cg = ChunkedGraph(N3(':a :b (?x :v) .'), functionsFor)
+        expected = Chunk((ROOM.a, ROOM.b, None), objList=[Variable('x'), ROOM.v])
+        self.assertSetEqual(cg.patternChunks, set([expected]))
+
+    def testListUsedTwice(self):
+        cg = ChunkedGraph(N3('(:u :v) :b :c, :d .'), functionsFor)
+
+        self.assertSetEqual(cg.staticChunks, set([
+            Chunk((None, ROOM.b, ROOM.c), subjList=[ROOM.u, ROOM.v]),
+            Chunk((None, ROOM.b, ROOM.d), subjList=[ROOM.u, ROOM.v])
+        ]))
+
+    def testUnusedListFragment(self):
+        cg = ChunkedGraph(N3(':a rdf:first :b .'), functionsFor)
+        self.assertFalse(cg)
+
 class TestApplyChunky(unittest.TestCase):
     binding = CandidateBinding({Variable('x'): ROOM.xval})