comparison service/mqtt_to_rdf/inference.py @ 1601:30463df12d89

infer() dumps stats
author drewp@bigasterisk.com
date Sun, 05 Sep 2021 23:27:49 -0700
parents 89a50242cb5e
children e3c44ac6d3c5
comparison
equal deleted inserted replaced
1600:89a50242cb5e 1601:30463df12d89
2 copied from reasoning 2021-08-29. probably same api. should 2 copied from reasoning 2021-08-29. probably same api. should
3 be able to lib/ this out 3 be able to lib/ this out
4 """ 4 """
5 import itertools 5 import itertools
6 import logging 6 import logging
7 import time
7 from collections import defaultdict 8 from collections import defaultdict
8 from dataclasses import dataclass, field 9 from dataclasses import dataclass, field
9 from decimal import Decimal 10 from decimal import Decimal
10 from typing import Dict, Iterable, Iterator, List, Set, Tuple, Union, cast 11 from typing import Dict, Iterable, Iterator, List, Set, Tuple, Union, cast
11 12
20 Triple = Tuple[Node, Node, Node] 21 Triple = Tuple[Node, Node, Node]
21 Rule = Tuple[Graph, Node, Graph] 22 Rule = Tuple[Graph, Node, Graph]
22 BindableTerm = Union[Variable, BNode] 23 BindableTerm = Union[Variable, BNode]
23 ReadOnlyWorkingSet = ReadOnlyGraphAggregate 24 ReadOnlyWorkingSet = ReadOnlyGraphAggregate
24 25
25 READ_RULES_CALLS = Summary('read_rules_calls', 'calls') 26 INFER_CALLS = Summary('read_rules_calls', 'calls')
26 27
27 ROOM = Namespace("http://projects.bigasterisk.com/room/") 28 ROOM = Namespace("http://projects.bigasterisk.com/room/")
28 LOG = Namespace('http://www.w3.org/2000/10/swap/log#') 29 LOG = Namespace('http://www.w3.org/2000/10/swap/log#')
29 MATH = Namespace('http://www.w3.org/2000/10/swap/math#') 30 MATH = Namespace('http://www.w3.org/2000/10/swap/math#')
30 31
32 # Graph() makes a BNode if you don't pass
33 # identifier, which can be a bottleneck.
34 GRAPH_ID = URIRef('dont/care')
31 35
32 class EvaluationFailed(ValueError): 36 class EvaluationFailed(ValueError):
33 """e.g. we were given (5 math:greaterThan 6)""" 37 """e.g. we were given (5 math:greaterThan 6)"""
34 38
35 39
63 raise BindingUnknown() 67 raise BindingUnknown()
64 return term 68 return term
65 69
66 def applyFunctions(self, lhs) -> Graph: 70 def applyFunctions(self, lhs) -> Graph:
67 """may grow the binding with some results""" 71 """may grow the binding with some results"""
68 usedByFuncs = Graph() 72 usedByFuncs = Graph(identifier=GRAPH_ID)
69 while True: 73 while True:
70 delta = self._applyFunctionsIteration(lhs, usedByFuncs) 74 delta = self._applyFunctionsIteration(lhs, usedByFuncs)
71 if delta == 0: 75 if delta == 0:
72 break 76 break
73 return usedByFuncs 77 return usedByFuncs
133 137
134 138
135 @dataclass 139 @dataclass
136 class Lhs: 140 class Lhs:
137 graph: Graph 141 graph: Graph
142 stats: Dict
138 143
139 staticRuleStmts: Graph = field(default_factory=Graph) 144 staticRuleStmts: Graph = field(default_factory=Graph)
140 lhsBindables: Set[BindableTerm] = field(default_factory=set) 145 lhsBindables: Set[BindableTerm] = field(default_factory=set)
141 lhsBnodes: Set[BNode] = field(default_factory=set) 146 lhsBnodes: Set[BNode] = field(default_factory=set)
142 147
162 167
163 self._logCandidates(orderedVars, orderedValueSets) 168 self._logCandidates(orderedVars, orderedValueSets)
164 169
165 log.debug(f'{INDENT*3} trying all permutations:') 170 log.debug(f'{INDENT*3} trying all permutations:')
166 171
172
167 for perm in itertools.product(*orderedValueSets): 173 for perm in itertools.product(*orderedValueSets):
168 binding = CandidateBinding(dict(zip(orderedVars, perm))) 174 binding = CandidateBinding(dict(zip(orderedVars, perm)))
169 log.debug('') 175 log.debug('')
170 log.debug(f'{INDENT*4}*trying {binding}') 176 log.debug(f'{INDENT*4}*trying {binding}')
171 177
172 try: 178 try:
173 usedByFuncs = binding.applyFunctions(self) 179 usedByFuncs = binding.applyFunctions(self)
174 except EvaluationFailed: 180 except EvaluationFailed:
181 self.stats['permCountFailingEval'] += 1
175 continue 182 continue
176 183
177 if not binding.verify(self, workingSet, usedByFuncs): 184 if not binding.verify(self, workingSet, usedByFuncs):
178 log.debug(f'{INDENT*4} this binding did not verify') 185 log.debug(f'{INDENT*4} this binding did not verify')
186 self.stats['permCountFailingVerify'] += 1
179 continue 187 continue
188
189 self.stats['permCountSucceeding'] += 1
180 yield binding 190 yield binding
181 191
182 def _allStaticStatementsMatch(self, workingSet: ReadOnlyWorkingSet) -> bool: 192 def _allStaticStatementsMatch(self, workingSet: ReadOnlyWorkingSet) -> bool:
183 for ruleStmt in self.staticRuleStmts: 193 for ruleStmt in self.staticRuleStmts:
184 if ruleStmt not in workingSet: 194 if ruleStmt not in workingSet:
222 for v, vals in bindingsFromStatement.items(): 232 for v, vals in bindingsFromStatement.items():
223 log.debug(f'{INDENT*5} {v=} {vals=}') 233 log.debug(f'{INDENT*5} {v=} {vals=}')
224 yield v, vals 234 yield v, vals
225 235
226 def graphWithoutEvals(self, binding: CandidateBinding) -> Graph: 236 def graphWithoutEvals(self, binding: CandidateBinding) -> Graph:
227 g = Graph() 237 g = Graph(identifier=GRAPH_ID)
228 usedByFuncs = binding.applyFunctions(self) 238 usedByFuncs = binding.applyFunctions(self)
229 239
230 for stmt in self.graph: 240 for stmt in self.graph:
231 if stmt not in usedByFuncs: 241 if stmt not in usedByFuncs:
232 g.add(stmt) 242 g.add(stmt)
264 yield Evaluation([stmt[0]], stmt, []) 274 yield Evaluation([stmt[0]], stmt, [])
265 275
266 # internal, use findEvals 276 # internal, use findEvals
267 def __init__(self, operands: List[Node], mainStmt: Triple, otherStmts: Iterable[Triple]) -> None: 277 def __init__(self, operands: List[Node], mainStmt: Triple, otherStmts: Iterable[Triple]) -> None:
268 self.operands = operands 278 self.operands = operands
269 self.operandsStmts = Graph() 279 self.operandsStmts = Graph(identifier=GRAPH_ID)
270 self.operandsStmts += otherStmts # may grow 280 self.operandsStmts += otherStmts # may grow
271 self.operandsStmts.add(mainStmt) 281 self.operandsStmts.add(mainStmt)
272 self.stmt = mainStmt 282 self.stmt = mainStmt
273 283
274 def resultBindings(self, inputBindings) -> Tuple[Dict[BindableTerm, Node], Graph]: 284 def resultBindings(self, inputBindings) -> Tuple[Dict[BindableTerm, Node], Graph]:
326 for stmt in g: 336 for stmt in g:
327 if stmt[1] == LOG['implies']: 337 if stmt[1] == LOG['implies']:
328 self.rules.add(stmt) 338 self.rules.add(stmt)
329 # others should go to a default working set? 339 # others should go to a default working set?
330 340
341 @INFER_CALLS.time()
331 def infer(self, graph: Graph): 342 def infer(self, graph: Graph):
332 """ 343 """
333 returns new graph of inferred statements. 344 returns new graph of inferred statements.
334 """ 345 """
335 log.info(f'{INDENT*0} Begin inference of graph len={graph.__len__()} with rules len={len(self.rules)}:') 346 log.info(f'{INDENT*0} Begin inference of graph len={graph.__len__()} with rules len={len(self.rules)}:')
336 347 startTime = time.time()
348 self.stats: Dict[str, Union[int,float]] = defaultdict(lambda: 0)
337 # everything that is true: the input graph, plus every rule conclusion we can make 349 # everything that is true: the input graph, plus every rule conclusion we can make
338 workingSet = Graph() 350 workingSet = Graph()
339 workingSet += graph 351 workingSet += graph
340 352
341 # just the statements that came from RHS's of rules that fired. 353 # just the statements that came from RHS's of rules that fired.
342 implied = ConjunctiveGraph() 354 implied = ConjunctiveGraph()
343 355
344 bailout_iterations = 100 356 bailout_iterations = 100
345 delta = 1 357 delta = 1
358 self.stats['initWorkingSet'] = cast(int, workingSet.__len__())
346 while delta > 0 and bailout_iterations > 0: 359 while delta > 0 and bailout_iterations > 0:
347 log.info(f'{INDENT*1}*iteration ({bailout_iterations} left)') 360 log.info(f'{INDENT*1}*iteration ({bailout_iterations} left)')
348 bailout_iterations -= 1 361 bailout_iterations -= 1
349 delta = -len(implied) 362 delta = -len(implied)
350 self._iterateAllRules(workingSet, implied) 363 self._iterateAllRules(workingSet, implied)
351 delta += len(implied) 364 delta += len(implied)
365 self.stats['iterations'] += 1
352 log.info(f'{INDENT*2} this inference iteration added {delta} more implied stmts') 366 log.info(f'{INDENT*2} this inference iteration added {delta} more implied stmts')
353 log.info(f'{INDENT*0} Inference done; {len(implied)} stmts implied:') 367 self.stats['timeSpent'] = round(time.time() - startTime, 3)
368 self.stats['impliedStmts'] = len(implied)
369 log.info(f'{INDENT*0} Inference done {dict(self.stats)}. Implied:')
354 for st in implied: 370 for st in implied:
355 log.info(f'{INDENT*1} {st}') 371 log.info(f'{INDENT*1} {st}')
356 return implied 372 return implied
357 373
358 def _iterateAllRules(self, workingSet: Graph, implied: Graph): 374 def _iterateAllRules(self, workingSet: Graph, implied: Graph):
359 for i, r in enumerate(self.rules): 375 for i, r in enumerate(self.rules):
360 self._logRuleApplicationHeader(workingSet, i, r) 376 self._logRuleApplicationHeader(workingSet, i, r)
361 _applyRule(Lhs(r[0]), r[2], workingSet, implied) 377 _applyRule(Lhs(r[0], self.stats), r[2], workingSet, implied, self.stats)
362 378
363 def _logRuleApplicationHeader(self, workingSet, i, r): 379 def _logRuleApplicationHeader(self, workingSet, i, r):
364 if not log.isEnabledFor(logging.DEBUG): 380 if not log.isEnabledFor(logging.DEBUG):
365 return 381 return
366 382
373 log.debug(f'{INDENT*2}-applying rule {i}') 389 log.debug(f'{INDENT*2}-applying rule {i}')
374 log.debug(f'{INDENT*3} rule def lhs: {graphDump(r[0])}') 390 log.debug(f'{INDENT*3} rule def lhs: {graphDump(r[0])}')
375 log.debug(f'{INDENT*3} rule def rhs: {graphDump(r[2])}') 391 log.debug(f'{INDENT*3} rule def rhs: {graphDump(r[2])}')
376 392
377 393
378 def _applyRule(lhs: Lhs, rhs: Graph, workingSet: Graph, implied: Graph): 394 def _applyRule(lhs: Lhs, rhs: Graph, workingSet: Graph, implied: Graph, stats: Dict):
379 for binding in lhs.findCandidateBindings(ReadOnlyGraphAggregate([workingSet])): 395 for binding in lhs.findCandidateBindings(ReadOnlyGraphAggregate([workingSet])):
380 log.debug(f'{INDENT*3} rule has a working binding:') 396 log.debug(f'{INDENT*3} rule has a working binding:')
381 397
382 for lhsBoundStmt in binding.apply(lhs.graphWithoutEvals(binding)): 398 for lhsBoundStmt in binding.apply(lhs.graphWithoutEvals(binding)):
383 log.debug(f'{INDENT*5} adding {lhsBoundStmt=}') 399 log.debug(f'{INDENT*5} adding {lhsBoundStmt=}')