comparison service/mqtt_to_rdf/inference.py @ 1608:f928eb06a4f6

cleaning up inner loop
author drewp@bigasterisk.com
date Mon, 06 Sep 2021 17:03:19 -0700
parents b21885181e35
children 34f2817320cc
comparison
equal deleted inserted replaced
1607:b21885181e35 1608:f928eb06a4f6
34 34
35 @dataclass 35 @dataclass
36 class Lhs: 36 class Lhs:
37 graph: Graph 37 graph: Graph
38 38
39 staticRuleStmts: Graph = field(default_factory=Graph)
40 lhsBindables: Set[BindableTerm] = field(default_factory=set)
41 lhsBnodes: Set[BNode] = field(default_factory=set)
42
43 def __post_init__(self): 39 def __post_init__(self):
40 # do precomputation in here that's not specific to the workingSet
41 self.staticRuleStmts = Graph()
42 self.lhsBindables: Set[BindableTerm] = set()
43 self.lhsBnodes: Set[BNode] = set()
44 for ruleStmt in self.graph: 44 for ruleStmt in self.graph:
45 varsAndBnodesInStmt = [term for term in ruleStmt if isinstance(term, (Variable, BNode))] 45 varsAndBnodesInStmt = [term for term in ruleStmt if isinstance(term, (Variable, BNode))]
46 self.lhsBindables.update(varsAndBnodesInStmt) 46 self.lhsBindables.update(varsAndBnodesInStmt)
47 self.lhsBnodes.update(x for x in varsAndBnodesInStmt if isinstance(x, BNode)) 47 self.lhsBnodes.update(x for x in varsAndBnodesInStmt if isinstance(x, BNode))
48 if not varsAndBnodesInStmt: 48 if not varsAndBnodesInStmt:
70 continue 70 continue
71 71
72 stats['permCountSucceeding'] += 1 72 stats['permCountSucceeding'] += 1
73 yield binding 73 yield binding
74 74
75 def _allStaticStatementsMatch(self, workingSet: ReadOnlyWorkingSet) -> bool:
76 for ruleStmt in self.staticRuleStmts:
77 if ruleStmt not in workingSet:
78 log.debug(f'{INDENT*3} {ruleStmt} not in working set- skip rule')
79 return False
80 return True
81
75 def _possibleBindings(self, workingSet, stats) -> Iterator['BoundLhs']: 82 def _possibleBindings(self, workingSet, stats) -> Iterator['BoundLhs']:
76 """this yields at least the working bindings, and possibly others""" 83 """this yields at least the working bindings, and possibly others"""
77 candidateTermMatches: Dict[BindableTerm, Set[Node]] = self._allCandidateTermMatches(workingSet) 84 candidateTermMatches: Dict[BindableTerm, Set[Node]] = self._allCandidateTermMatches(workingSet)
78 85
79 orderedVars, orderedValueSets = _organize(candidateTermMatches) 86 orderedVars, orderedValueSets = _organize(candidateTermMatches)
84 try: 91 try:
85 yield BoundLhs(self, CandidateBinding(dict(zip(orderedVars, perm)))) 92 yield BoundLhs(self, CandidateBinding(dict(zip(orderedVars, perm))))
86 except EvaluationFailed: 93 except EvaluationFailed:
87 stats['permCountFailingEval'] += 1 94 stats['permCountFailingEval'] += 1
88 95
89 def _allStaticStatementsMatch(self, workingSet: ReadOnlyWorkingSet) -> bool:
90 for ruleStmt in self.staticRuleStmts:
91 if ruleStmt not in workingSet:
92 log.debug(f'{INDENT*3} {ruleStmt} not in working set- skip rule')
93 return False
94 return True
95
96 def _allCandidateTermMatches(self, workingSet: ReadOnlyWorkingSet) -> Dict[BindableTerm, Set[Node]]: 96 def _allCandidateTermMatches(self, workingSet: ReadOnlyWorkingSet) -> Dict[BindableTerm, Set[Node]]:
97 """the total set of terms each variable could possibly match""" 97 """the total set of terms each variable could possibly match"""
98 98
99 candidateTermMatches: Dict[BindableTerm, Set[Node]] = defaultdict(set) 99 candidateTermMatches: Dict[BindableTerm, Set[Node]] = defaultdict(set)
100 for lhsStmt in self.graph: 100 for lhsStmt in self.graph:
101 log.debug(f'{INDENT*4} possibles for this lhs stmt: {lhsStmt}') 101 log.debug(f'{INDENT*4} possibles for this lhs stmt: {lhsStmt}')
102 for i, trueStmt in enumerate(sorted(workingSet)): 102 for i, trueStmt in enumerate(workingSet):
103 # log.debug(f'{INDENT*5} consider this true stmt ({i}): {trueStmt}') 103 # log.debug(f'{INDENT*5} consider this true stmt ({i}): {trueStmt}')
104 104
105 for v, vals in self._bindingsFromStatement(lhsStmt, trueStmt): 105 for v, vals in self._bindingsFromStatement(lhsStmt, trueStmt):
106 candidateTermMatches[v].update(vals) 106 candidateTermMatches[v].update(vals)
107 107
141 141
142 142
143 @dataclass 143 @dataclass
144 class BoundLhs: 144 class BoundLhs:
145 lhs: Lhs 145 lhs: Lhs
146 binding: CandidateBinding 146 binding: CandidateBinding # mutable
147 147
148 def __post_init__(self): 148 def __post_init__(self):
149 self.usedByFuncs = Graph(identifier=GRAPH_ID) 149 self.usedByFuncs = Graph(identifier=GRAPH_ID)
150 self.graphWithoutEvals = self._graphWithoutEvals()
151
152 def _graphWithoutEvals(self) -> Graph:
153 g = Graph(identifier=GRAPH_ID)
154 self._applyFunctions() 150 self._applyFunctions()
155 151
156 for stmt in self.lhs.graph: 152 self.graphWithoutEvals = self.lhs.graph - self.usedByFuncs
157 if stmt not in self.usedByFuncs:
158 g.add(stmt)
159 return g
160 153
161 def _applyFunctions(self): 154 def _applyFunctions(self):
162 """may grow the binding with some results""" 155 """may grow the binding with some results"""
163 while True: 156 while True:
164 delta = self._applyFunctionsIteration() 157 delta = self._applyFunctionsIteration()
167 160
168 def _applyFunctionsIteration(self): 161 def _applyFunctionsIteration(self):
169 before = len(self.binding.binding) 162 before = len(self.binding.binding)
170 delta = 0 163 delta = 0
171 for ev in self.lhs.evaluations: 164 for ev in self.lhs.evaluations:
172 log.debug(f'{INDENT*3} found Evaluation')
173
174 newBindings, usedGraph = ev.resultBindings(self.binding) 165 newBindings, usedGraph = ev.resultBindings(self.binding)
175 self.usedByFuncs += usedGraph 166 self.usedByFuncs += usedGraph
176 self.binding.addNewBindings(newBindings) 167 self.binding.addNewBindings(newBindings)
177 delta = len(self.binding.binding) - before 168 delta = len(self.binding.binding) - before
178 if log.isEnabledFor(logging.DEBUG): 169 log.debug(f'{INDENT*4} eval rules made {delta} new bindings')
179 dump = "(...)"
180 if cast(int, usedGraph.__len__()) < 20:
181 dump = graphDump(usedGraph)
182 log.debug(f'{INDENT*4} rule {dump} made {delta} new bindings')
183 return delta 170 return delta
184
185 171
186 def verify(self, workingSet: ReadOnlyWorkingSet) -> bool: 172 def verify(self, workingSet: ReadOnlyWorkingSet) -> bool:
187 """Can this bound lhs be true all at once in workingSet?""" 173 """Can this bound lhs be true all at once in workingSet?"""
188 boundLhs = list(self.binding.apply(self.lhs.graph)) 174 boundLhs = list(self.binding.apply(self.lhs.graph)) # leave out statics and evals!
189 boundUsedByFuncs = list(self.binding.apply(self.usedByFuncs)) 175 boundUsedByFuncs = list(self.binding.apply(self.usedByFuncs))
190 176
191 self._logVerifyBanner(boundLhs, workingSet, boundUsedByFuncs) 177 self._logVerifyBanner(boundLhs, workingSet, boundUsedByFuncs)
192 178
193 for stmt in boundLhs: 179 for stmt in boundLhs:
200 else: 186 else:
201 log.debug(f'{INDENT*5} stmt not known to be true') 187 log.debug(f'{INDENT*5} stmt not known to be true')
202 return False 188 return False
203 return True 189 return True
204 190
191
205 def _logVerifyBanner(self, boundLhs, workingSet: ReadOnlyWorkingSet, boundUsedByFuncs): 192 def _logVerifyBanner(self, boundLhs, workingSet: ReadOnlyWorkingSet, boundUsedByFuncs):
206 if not log.isEnabledFor(logging.DEBUG): 193 if not log.isEnabledFor(logging.DEBUG):
207 return 194 return
195
208 log.debug(f'{INDENT*4}/ verify all bindings against this boundLhs:') 196 log.debug(f'{INDENT*4}/ verify all bindings against this boundLhs:')
209 for stmt in sorted(boundLhs): 197 for stmt in sorted(boundLhs):
210 log.debug(f'{INDENT*4}|{INDENT} {stmt}') 198 log.debug(f'{INDENT*4}|{INDENT} {stmt}')
211 199
212 # log.debug(f'{INDENT*4}| and against this workingSet:') 200 # log.debug(f'{INDENT*4}| and against this workingSet:')