Mercurial > code > home > repos > homeauto
annotate service/mqtt_to_rdf/inference.py @ 1608:f928eb06a4f6
cleaning up inner loop
author | drewp@bigasterisk.com |
---|---|
date | Mon, 06 Sep 2021 17:03:19 -0700 |
parents | b21885181e35 |
children | 34f2817320cc |
rev | line source |
---|---|
1587 | 1 """ |
2 copied from reasoning 2021-08-29. probably same api. should | |
3 be able to lib/ this out | |
4 """ | |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
5 import itertools |
1587 | 6 import logging |
1601 | 7 import time |
1594 | 8 from collections import defaultdict |
9 from dataclasses import dataclass, field | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
10 from typing import Dict, Iterator, List, Set, Tuple, Union, cast |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
11 |
1587 | 12 from prometheus_client import Summary |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
13 from rdflib import BNode, Graph, Namespace, URIRef |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
14 from rdflib.graph import ConjunctiveGraph, ReadOnlyGraphAggregate |
1587 | 15 from rdflib.term import Node, Variable |
16 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
17 from candidate_binding import CandidateBinding |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
18 from inference_types import (BindableTerm, EvaluationFailed, ReadOnlyWorkingSet, Triple) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
19 from lhs_evaluation import Evaluation |
1605 | 20 |
1587 | 21 log = logging.getLogger('infer') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
22 INDENT = ' ' |
1587 | 23 |
1601 | 24 INFER_CALLS = Summary('read_rules_calls', 'calls') |
1587 | 25 |
26 ROOM = Namespace("http://projects.bigasterisk.com/room/") | |
27 LOG = Namespace('http://www.w3.org/2000/10/swap/log#') | |
28 MATH = Namespace('http://www.w3.org/2000/10/swap/math#') | |
29 | |
1601 | 30 # Graph() makes a BNode if you don't pass |
31 # identifier, which can be a bottleneck. | |
32 GRAPH_ID = URIRef('dont/care') | |
1587 | 33 |
34 | |
1594 | 35 @dataclass |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
36 class Lhs: |
1594 | 37 graph: Graph |
38 | |
39 def __post_init__(self): | |
1608 | 40 # do precomputation in here that's not specific to the workingSet |
41 self.staticRuleStmts = Graph() | |
42 self.lhsBindables: Set[BindableTerm] = set() | |
43 self.lhsBnodes: Set[BNode] = set() | |
1594 | 44 for ruleStmt in self.graph: |
45 varsAndBnodesInStmt = [term for term in ruleStmt if isinstance(term, (Variable, BNode))] | |
46 self.lhsBindables.update(varsAndBnodesInStmt) | |
47 self.lhsBnodes.update(x for x in varsAndBnodesInStmt if isinstance(x, BNode)) | |
48 if not varsAndBnodesInStmt: | |
49 self.staticRuleStmts.add(ruleStmt) | |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
50 |
1602 | 51 self.evaluations = list(Evaluation.findEvals(self.graph)) |
52 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
53 def findCandidateBindings(self, workingSet: ReadOnlyWorkingSet, stats) -> Iterator['BoundLhs']: |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
54 """bindings that fit the LHS of a rule, using statements from workingSet and functions |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
55 from LHS""" |
1597 | 56 log.debug(f'{INDENT*3} nodesToBind: {self.lhsBindables}') |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
57 stats['findCandidateBindingsCalls'] += 1 |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
58 |
1600 | 59 if not self._allStaticStatementsMatch(workingSet): |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
60 stats['findCandidateBindingEarlyExits'] += 1 |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
61 return |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
62 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
63 for binding in self._possibleBindings(workingSet, stats): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
64 log.debug('') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
65 log.debug(f'{INDENT*4}*trying {binding.binding}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
66 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
67 if not binding.verify(workingSet): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
68 log.debug(f'{INDENT*4} this binding did not verify') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
69 stats['permCountFailingVerify'] += 1 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
70 continue |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
71 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
72 stats['permCountSucceeding'] += 1 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
73 yield binding |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
74 |
1608 | 75 def _allStaticStatementsMatch(self, workingSet: ReadOnlyWorkingSet) -> bool: |
76 for ruleStmt in self.staticRuleStmts: | |
77 if ruleStmt not in workingSet: | |
78 log.debug(f'{INDENT*3} {ruleStmt} not in working set- skip rule') | |
79 return False | |
80 return True | |
81 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
82 def _possibleBindings(self, workingSet, stats) -> Iterator['BoundLhs']: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
83 """this yields at least the working bindings, and possibly others""" |
1600 | 84 candidateTermMatches: Dict[BindableTerm, Set[Node]] = self._allCandidateTermMatches(workingSet) |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
85 |
1600 | 86 orderedVars, orderedValueSets = _organize(candidateTermMatches) |
87 self._logCandidates(orderedVars, orderedValueSets) | |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
88 |
1597 | 89 log.debug(f'{INDENT*3} trying all permutations:') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
90 for perm in itertools.product(*orderedValueSets): |
1594 | 91 try: |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
92 yield BoundLhs(self, CandidateBinding(dict(zip(orderedVars, perm)))) |
1594 | 93 except EvaluationFailed: |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
94 stats['permCountFailingEval'] += 1 |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
95 |
1600 | 96 def _allCandidateTermMatches(self, workingSet: ReadOnlyWorkingSet) -> Dict[BindableTerm, Set[Node]]: |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
97 """the total set of terms each variable could possibly match""" |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
98 |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
99 candidateTermMatches: Dict[BindableTerm, Set[Node]] = defaultdict(set) |
1594 | 100 for lhsStmt in self.graph: |
1597 | 101 log.debug(f'{INDENT*4} possibles for this lhs stmt: {lhsStmt}') |
1608 | 102 for i, trueStmt in enumerate(workingSet): |
1597 | 103 # log.debug(f'{INDENT*5} consider this true stmt ({i}): {trueStmt}') |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
104 |
1594 | 105 for v, vals in self._bindingsFromStatement(lhsStmt, trueStmt): |
106 candidateTermMatches[v].update(vals) | |
107 | |
108 for trueStmt in itertools.chain(workingSet, self.graph): | |
109 for b in self.lhsBnodes: | |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
110 for t in [trueStmt[0], trueStmt[2]]: |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
111 if isinstance(t, (URIRef, BNode)): |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
112 candidateTermMatches[b].add(t) |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
113 return candidateTermMatches |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
114 |
1594 | 115 def _bindingsFromStatement(self, stmt1: Triple, stmt2: Triple) -> Iterator[Tuple[Variable, Set[Node]]]: |
116 """if these stmts match otherwise, what BNode or Variable mappings do we learn? | |
117 | |
118 e.g. stmt1=(?x B ?y) and stmt2=(A B C), then we yield (?x, {A}) and (?y, {C}) | |
119 or stmt1=(_:x B C) and stmt2=(A B C), then we yield (_:x, {A}) | |
120 or stmt1=(?x B C) and stmt2=(A B D), then we yield nothing | |
121 """ | |
122 bindingsFromStatement = {} | |
123 for term1, term2 in zip(stmt1, stmt2): | |
124 if isinstance(term1, (BNode, Variable)): | |
125 bindingsFromStatement.setdefault(term1, set()).add(term2) | |
126 elif term1 != term2: | |
127 break | |
128 else: | |
129 for v, vals in bindingsFromStatement.items(): | |
1597 | 130 log.debug(f'{INDENT*5} {v=} {vals=}') |
1594 | 131 yield v, vals |
132 | |
1600 | 133 def _logCandidates(self, orderedVars, orderedValueSets): |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
134 if not log.isEnabledFor(logging.DEBUG): |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
135 return |
1597 | 136 log.debug(f'{INDENT*3} resulting candidate terms:') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
137 for v, vals in zip(orderedVars, orderedValueSets): |
1597 | 138 log.debug(f'{INDENT*4} {v!r} could be:') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
139 for val in vals: |
1597 | 140 log.debug(f'{INDENT*5}{val!r}') |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
141 |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
142 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
143 @dataclass |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
144 class BoundLhs: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
145 lhs: Lhs |
1608 | 146 binding: CandidateBinding # mutable |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
147 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
148 def __post_init__(self): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
149 self.usedByFuncs = Graph(identifier=GRAPH_ID) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
150 self._applyFunctions() |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
151 |
1608 | 152 self.graphWithoutEvals = self.lhs.graph - self.usedByFuncs |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
153 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
154 def _applyFunctions(self): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
155 """may grow the binding with some results""" |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
156 while True: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
157 delta = self._applyFunctionsIteration() |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
158 if delta == 0: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
159 break |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
160 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
161 def _applyFunctionsIteration(self): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
162 before = len(self.binding.binding) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
163 delta = 0 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
164 for ev in self.lhs.evaluations: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
165 newBindings, usedGraph = ev.resultBindings(self.binding) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
166 self.usedByFuncs += usedGraph |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
167 self.binding.addNewBindings(newBindings) |
1608 | 168 delta = len(self.binding.binding) - before |
169 log.debug(f'{INDENT*4} eval rules made {delta} new bindings') | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
170 return delta |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
171 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
172 def verify(self, workingSet: ReadOnlyWorkingSet) -> bool: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
173 """Can this bound lhs be true all at once in workingSet?""" |
1608 | 174 boundLhs = list(self.binding.apply(self.lhs.graph)) # leave out statics and evals! |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
175 boundUsedByFuncs = list(self.binding.apply(self.usedByFuncs)) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
176 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
177 self._logVerifyBanner(boundLhs, workingSet, boundUsedByFuncs) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
178 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
179 for stmt in boundLhs: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
180 log.debug(f'{INDENT*4} check for {stmt}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
181 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
182 if stmt in boundUsedByFuncs: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
183 pass |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
184 elif stmt in workingSet: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
185 pass |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
186 else: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
187 log.debug(f'{INDENT*5} stmt not known to be true') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
188 return False |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
189 return True |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
190 |
1608 | 191 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
192 def _logVerifyBanner(self, boundLhs, workingSet: ReadOnlyWorkingSet, boundUsedByFuncs): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
193 if not log.isEnabledFor(logging.DEBUG): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
194 return |
1608 | 195 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
196 log.debug(f'{INDENT*4}/ verify all bindings against this boundLhs:') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
197 for stmt in sorted(boundLhs): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
198 log.debug(f'{INDENT*4}|{INDENT} {stmt}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
199 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
200 # log.debug(f'{INDENT*4}| and against this workingSet:') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
201 # for stmt in sorted(workingSet): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
202 # log.debug(f'{INDENT*4}|{INDENT} {stmt}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
203 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
204 stmts = sorted(boundUsedByFuncs) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
205 if stmts: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
206 log.debug(f'{INDENT*4}| while ignoring these usedByFuncs:') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
207 for stmt in stmts: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
208 log.debug(f'{INDENT*4}|{INDENT} {stmt}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
209 log.debug(f'{INDENT*4}\\') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
210 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
211 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
212 @dataclass |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
213 class Rule: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
214 lhsGraph: Graph |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
215 rhsGraph: Graph |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
216 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
217 def __post_init__(self): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
218 self.lhs = Lhs(self.lhsGraph) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
219 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
220 |
1587 | 221 class Inference: |
222 | |
223 def __init__(self) -> None: | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
224 self.rules = [] |
1587 | 225 |
226 def setRules(self, g: ConjunctiveGraph): | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
227 self.rules: List[Rule] = [] |
1599
abbf0eb0e640
fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents:
1598
diff
changeset
|
228 for stmt in g: |
abbf0eb0e640
fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents:
1598
diff
changeset
|
229 if stmt[1] == LOG['implies']: |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
230 self.rules.append(Rule(stmt[0], stmt[2])) |
1599
abbf0eb0e640
fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents:
1598
diff
changeset
|
231 # others should go to a default working set? |
1587 | 232 |
1601 | 233 @INFER_CALLS.time() |
1587 | 234 def infer(self, graph: Graph): |
235 """ | |
236 returns new graph of inferred statements. | |
237 """ | |
1597 | 238 log.info(f'{INDENT*0} Begin inference of graph len={graph.__len__()} with rules len={len(self.rules)}:') |
1601 | 239 startTime = time.time() |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
240 stats: Dict[str, Union[int, float]] = defaultdict(lambda: 0) |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
241 # everything that is true: the input graph, plus every rule conclusion we can make |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
242 workingSet = Graph() |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
243 workingSet += graph |
1587 | 244 |
1594 | 245 # just the statements that came from RHS's of rules that fired. |
1587 | 246 implied = ConjunctiveGraph() |
247 | |
248 bailout_iterations = 100 | |
249 delta = 1 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
250 stats['initWorkingSet'] = cast(int, workingSet.__len__()) |
1587 | 251 while delta > 0 and bailout_iterations > 0: |
1597 | 252 log.info(f'{INDENT*1}*iteration ({bailout_iterations} left)') |
1587 | 253 bailout_iterations -= 1 |
254 delta = -len(implied) | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
255 self._iterateAllRules(workingSet, implied, stats) |
1587 | 256 delta += len(implied) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
257 stats['iterations'] += 1 |
1597 | 258 log.info(f'{INDENT*2} this inference iteration added {delta} more implied stmts') |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
259 stats['timeSpent'] = round(time.time() - startTime, 3) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
260 stats['impliedStmts'] = len(implied) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
261 log.info(f'{INDENT*0} Inference done {dict(stats)}. Implied:') |
1587 | 262 for st in implied: |
1597 | 263 log.info(f'{INDENT*1} {st}') |
1587 | 264 return implied |
265 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
266 def _iterateAllRules(self, workingSet: Graph, implied: Graph, stats): |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
267 for i, r in enumerate(self.rules): |
1600 | 268 self._logRuleApplicationHeader(workingSet, i, r) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
269 _applyRule(r.lhs, r.rhsGraph, workingSet, implied, stats) |
1587 | 270 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
271 def _logRuleApplicationHeader(self, workingSet, i, r: Rule): |
1594 | 272 if not log.isEnabledFor(logging.DEBUG): |
273 return | |
274 | |
275 log.debug('') | |
276 log.debug(f'{INDENT*2} workingSet:') | |
1597 | 277 for j, stmt in enumerate(sorted(workingSet)): |
278 log.debug(f'{INDENT*3} ({j}) {stmt}') | |
1594 | 279 |
280 log.debug('') | |
281 log.debug(f'{INDENT*2}-applying rule {i}') | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
282 log.debug(f'{INDENT*3} rule def lhs: {graphDump(r.lhsGraph)}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
283 log.debug(f'{INDENT*3} rule def rhs: {graphDump(r.rhsGraph)}') |
1594 | 284 |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
285 |
1601 | 286 def _applyRule(lhs: Lhs, rhs: Graph, workingSet: Graph, implied: Graph, stats: Dict): |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
287 for bound in lhs.findCandidateBindings(ReadOnlyGraphAggregate([workingSet]), stats): |
1597 | 288 log.debug(f'{INDENT*3} rule has a working binding:') |
289 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
290 for lhsBoundStmt in bound.binding.apply(bound.graphWithoutEvals): |
1597 | 291 log.debug(f'{INDENT*5} adding {lhsBoundStmt=}') |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
292 workingSet.add(lhsBoundStmt) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
293 for newStmt in bound.binding.apply(rhs): |
1597 | 294 log.debug(f'{INDENT*5} adding {newStmt=}') |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
295 workingSet.add(newStmt) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
296 implied.add(newStmt) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
297 |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
298 |
1590
327202020892
WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents:
1589
diff
changeset
|
299 def graphDump(g: Union[Graph, List[Triple]]): |
327202020892
WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents:
1589
diff
changeset
|
300 if not isinstance(g, Graph): |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
301 log.warning(f"it's a {type(g)}") |
1590
327202020892
WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents:
1589
diff
changeset
|
302 g2 = Graph() |
1594 | 303 g2 += g |
1590
327202020892
WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents:
1589
diff
changeset
|
304 g = g2 |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
305 g.bind('', ROOM) |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
306 g.bind('ex', Namespace('http://example.com/')) |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
307 lines = cast(bytes, g.serialize(format='n3')).decode('utf8').splitlines() |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
308 lines = [line for line in lines if not line.startswith('@prefix')] |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
309 return ' '.join(lines) |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
310 |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
311 |
1600 | 312 def _organize(candidateTermMatches: Dict[BindableTerm, Set[Node]]) -> Tuple[List[BindableTerm], List[List[Node]]]: |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
313 items = list(candidateTermMatches.items()) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
314 items.sort() |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
315 orderedVars: List[BindableTerm] = [] |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
316 orderedValueSets: List[List[Node]] = [] |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
317 for v, vals in items: |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
318 orderedVars.append(v) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
319 orderedValues: List[Node] = list(vals) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
320 orderedValues.sort(key=str) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
321 orderedValueSets.append(orderedValues) |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
322 |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
323 return orderedVars, orderedValueSets |