changeset 1661:00a5624d1d14

cleanups and optimizations
author drewp@bigasterisk.com
date Sun, 19 Sep 2021 16:51:51 -0700
parents 31f7dab6a60b
children 7a61113fd17d
files service/mqtt_to_rdf/infer_perf_test.py service/mqtt_to_rdf/inference.py service/mqtt_to_rdf/lhs_evaluation.py service/mqtt_to_rdf/stmt_chunk.py service/mqtt_to_rdf/stmt_chunk_test.py
diffstat 5 files changed, 29 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/service/mqtt_to_rdf/infer_perf_test.py	Sun Sep 19 15:39:37 2021 -0700
+++ b/service/mqtt_to_rdf/infer_perf_test.py	Sun Sep 19 16:51:51 2021 -0700
@@ -22,10 +22,10 @@
         expandedConfig = inference.infer(config)
         expandedConfig += inference.nonRuleStatements()
 
-        for loop in range(10):
+        for loop in range(50):
             # g = N3('''
             # <urn:uuid:2f5bbe1e-177f-11ec-9f97-8a12f6515350> a :MqttMessage ;
-            #     :body "online" ;   
+            #     :body "online" ;
             #     :onlineTerm :Online ;
             #     :topic ( "frontdoorlock" "status") .
             # ''')
@@ -33,7 +33,7 @@
 
             # g = N3('''
             # <urn:uuid:2f5bbe1e-177f-11ec-9f97-8a12f6515350> a :MqttMessage ;
-            #     :body "zz" ;   
+            #     :body "zz" ;
             #     :bodyFloat 12.2;
             #     :onlineTerm :Online ;
             #     :topic ( "air_quality_outdoor" "sensor" "bme280_temperature" "state") .
--- a/service/mqtt_to_rdf/inference.py	Sun Sep 19 15:39:37 2021 -0700
+++ b/service/mqtt_to_rdf/inference.py	Sun Sep 19 16:51:51 2021 -0700
@@ -48,7 +48,6 @@
     lhsChunk: Chunk
     prev: Optional['ChunkLooper']
     workingSet: 'ChunkedGraph'
-    parent: 'Lhs'  # just for lhs.graph, really
 
     def __repr__(self):
         return f'{self.__class__.__name__}{self._shortId}{"<pastEnd>" if self.pastEnd() else ""}'
@@ -61,7 +60,8 @@
         self._pastEnd = False
         self._seenBindings: List[CandidateBinding] = []
 
-        log.debug(f'{INDENT*6} introducing {self!r}({self.lhsChunk}, {self._myWorkingSetMatches=})')
+        if log.isEnabledFor(logging.DEBUG):
+            log.debug(f'{INDENT*6} introducing {self!r}({self.lhsChunk}, {self._myWorkingSetMatches=})')
 
         self.restart()
 
@@ -124,7 +124,7 @@
         log.debug(f'{INDENT*6} advanceWithFunctions {pred}')
 
         for functionType in functionsFor(pred):
-            fn = functionType(self.lhsChunk, self.parent.graph)
+            fn = functionType(self.lhsChunk)
             log.debug(f'{INDENT*7} ChunkLooper{self._shortId} advanceWithFunctions, {functionType=}')
 
             try:
@@ -263,7 +263,10 @@
 
             for s in perm:
                 try:
-                    elem = ChunkLooper(s, prev, knownTrue, parent=self)
+                    # These are getting rebuilt a lot which takes time. It would
+                    # be nice if they could accept a changing `prev` order
+                    # (which might already be ok).
+                    elem = ChunkLooper(s, prev, knownTrue)
                 except NoOptions:
                     log.debug(f'{INDENT*6} permutation didnt work, try another')
                     break
--- a/service/mqtt_to_rdf/lhs_evaluation.py	Sun Sep 19 15:39:37 2021 -0700
+++ b/service/mqtt_to_rdf/lhs_evaluation.py	Sun Sep 19 16:51:51 2021 -0700
@@ -32,11 +32,10 @@
     """any rule stmt that runs a function (not just a statement match)"""
     pred: URIRef
 
-    def __init__(self, chunk: Chunk, ruleGraph: ChunkedGraph):
+    def __init__(self, chunk: Chunk):
         self.chunk = chunk
         if chunk.predicate != self.pred:
             raise TypeError
-        self.ruleGraph = ruleGraph
 
     def getOperandNodes(self, existingBinding: CandidateBinding) -> List[Node]:
         raise NotImplementedError
@@ -80,13 +79,6 @@
 class ListFunction(Function):
     """function that takes an rdf list as input"""
 
-    def usedStatements(self) -> Set[Triple]:
-        raise NotImplementedError
-        if self.chunk.subjist is None:
-            raise ValueError(f'expected subject list on {self.chunk}')
-        _, used = _parseList(self.ruleGraph, self.chunk.primary[0])
-        return used
-
     def getOperandNodes(self, existingBinding: CandidateBinding) -> List[Node]:
         if self.chunk.subjList is None:
             raise ValueError(f'expected subject list on {self.chunk}')
@@ -111,15 +103,3 @@
         yield _byPred[pred]
     except KeyError:
         return
-
-
-# def lhsStmtsUsedByFuncs(graph: ChunkedGraph) -> Set[Chunk]:
-#     usedByFuncs: Set[Triple] = set()  # don't worry about matching these
-#     for s in graph:
-#         for cls in functionsFor(pred=s[1]):
-#             usedByFuncs.update(cls(s, graph).usedStatements())
-#     return usedByFuncs
-
-
-def rulePredicates() -> Set[URIRef]:
-    return set(c.pred for c in _registeredFunctionTypes)
--- a/service/mqtt_to_rdf/stmt_chunk.py	Sun Sep 19 15:39:37 2021 -0700
+++ b/service/mqtt_to_rdf/stmt_chunk.py	Sun Sep 19 16:51:51 2021 -0700
@@ -8,8 +8,7 @@
 from rdflib.term import BNode, Literal, Node, URIRef, Variable
 
 from candidate_binding import CandidateBinding
-from inference_types import BindingUnknown, Inconsistent, Triple
-from rdf_debug import graphDump
+from inference_types import BindingUnknown, Inconsistent
 
 log = logging.getLogger('infer')
 
@@ -20,13 +19,16 @@
 
 @dataclass
 class Chunk:  # rename this
-    """a statement, maybe with variables in it, except *the object can be an rdf list*.
-    This is done to optimize list comparisons (a lot) at the very minor expense of not
-    handling certain exotic cases, such as a branching list.
+    """A statement, maybe with variables in it, except *the subject or object
+    can be rdf lists*. This is done to optimize list comparisons (a lot) at the
+    very minor expense of not handling certain exotic cases, such as a branching
+    list.
 
-    Also the subject could be a list, e.g. for (?x ?y) math:sum ?z .
+    Example: (?x ?y) math:sum ?z . <-- this becomes one Chunk.
 
-    Also a function call in a rule is always contained in exactly one chunk.
+    A function call in a rule is always contained in exactly one chunk.
+
+    https://www.w3.org/TeamSubmission/n3/#:~:text=Implementations%20may%20treat%20list%20as%20a%20data%20type
     """
     # all immutable
     primary: ChunkPrimaryTriple
@@ -71,7 +73,8 @@
     def myMatches(self, g: 'ChunkedGraph') -> List['Chunk']:
         """Chunks from g where self, which may have BindableTerm wildcards, could match that chunk in g."""
         out: List['Chunk'] = []
-        log.debug(f'{INDENT*6} {self}.myMatches({g}')
+        if log.isEnabledFor(logging.DEBUG):
+            log.debug(f'{INDENT*6} {self}.myMatches({g}')
         for ch in g.allChunks():
             if self.matches(ch):
                 out.append(ch)
--- a/service/mqtt_to_rdf/stmt_chunk_test.py	Sun Sep 19 15:39:37 2021 -0700
+++ b/service/mqtt_to_rdf/stmt_chunk_test.py	Sun Sep 19 16:51:51 2021 -0700
@@ -62,15 +62,18 @@
     def testListUsedTwice(self):
         cg = ChunkedGraph(N3('(:u :v) :b :c, :d .'), functionsFor)
 
-        self.assertSetEqual(cg.staticChunks, set([
-            Chunk((None, ROOM.b, ROOM.c), subjList=[ROOM.u, ROOM.v]),
-            Chunk((None, ROOM.b, ROOM.d), subjList=[ROOM.u, ROOM.v])
-        ]))
+        self.assertSetEqual(
+            cg.staticChunks,
+            set([
+                Chunk((None, ROOM.b, ROOM.c), subjList=[ROOM.u, ROOM.v]),
+                Chunk((None, ROOM.b, ROOM.d), subjList=[ROOM.u, ROOM.v])
+            ]))
 
     def testUnusedListFragment(self):
         cg = ChunkedGraph(N3(':a rdf:first :b .'), functionsFor)
         self.assertFalse(cg)
 
+
 class TestApplyChunky(unittest.TestCase):
     binding = CandidateBinding({Variable('x'): ROOM.xval})