Mercurial > code > home > repos > homeauto
comparison service/mqtt_to_rdf/inference.py @ 1601:30463df12d89
infer() dumps stats
author | drewp@bigasterisk.com |
---|---|
date | Sun, 05 Sep 2021 23:27:49 -0700 |
parents | 89a50242cb5e |
children | e3c44ac6d3c5 |
comparison
equal
deleted
inserted
replaced
1600:89a50242cb5e | 1601:30463df12d89 |
---|---|
2 copied from reasoning 2021-08-29. probably same api. should | 2 copied from reasoning 2021-08-29. probably same api. should |
3 be able to lib/ this out | 3 be able to lib/ this out |
4 """ | 4 """ |
5 import itertools | 5 import itertools |
6 import logging | 6 import logging |
7 import time | |
7 from collections import defaultdict | 8 from collections import defaultdict |
8 from dataclasses import dataclass, field | 9 from dataclasses import dataclass, field |
9 from decimal import Decimal | 10 from decimal import Decimal |
10 from typing import Dict, Iterable, Iterator, List, Set, Tuple, Union, cast | 11 from typing import Dict, Iterable, Iterator, List, Set, Tuple, Union, cast |
11 | 12 |
20 Triple = Tuple[Node, Node, Node] | 21 Triple = Tuple[Node, Node, Node] |
21 Rule = Tuple[Graph, Node, Graph] | 22 Rule = Tuple[Graph, Node, Graph] |
22 BindableTerm = Union[Variable, BNode] | 23 BindableTerm = Union[Variable, BNode] |
23 ReadOnlyWorkingSet = ReadOnlyGraphAggregate | 24 ReadOnlyWorkingSet = ReadOnlyGraphAggregate |
24 | 25 |
25 READ_RULES_CALLS = Summary('read_rules_calls', 'calls') | 26 INFER_CALLS = Summary('read_rules_calls', 'calls') |
26 | 27 |
27 ROOM = Namespace("http://projects.bigasterisk.com/room/") | 28 ROOM = Namespace("http://projects.bigasterisk.com/room/") |
28 LOG = Namespace('http://www.w3.org/2000/10/swap/log#') | 29 LOG = Namespace('http://www.w3.org/2000/10/swap/log#') |
29 MATH = Namespace('http://www.w3.org/2000/10/swap/math#') | 30 MATH = Namespace('http://www.w3.org/2000/10/swap/math#') |
30 | 31 |
32 # Graph() makes a BNode if you don't pass | |
33 # identifier, which can be a bottleneck. | |
34 GRAPH_ID = URIRef('dont/care') | |
31 | 35 |
32 class EvaluationFailed(ValueError): | 36 class EvaluationFailed(ValueError): |
33 """e.g. we were given (5 math:greaterThan 6)""" | 37 """e.g. we were given (5 math:greaterThan 6)""" |
34 | 38 |
35 | 39 |
63 raise BindingUnknown() | 67 raise BindingUnknown() |
64 return term | 68 return term |
65 | 69 |
66 def applyFunctions(self, lhs) -> Graph: | 70 def applyFunctions(self, lhs) -> Graph: |
67 """may grow the binding with some results""" | 71 """may grow the binding with some results""" |
68 usedByFuncs = Graph() | 72 usedByFuncs = Graph(identifier=GRAPH_ID) |
69 while True: | 73 while True: |
70 delta = self._applyFunctionsIteration(lhs, usedByFuncs) | 74 delta = self._applyFunctionsIteration(lhs, usedByFuncs) |
71 if delta == 0: | 75 if delta == 0: |
72 break | 76 break |
73 return usedByFuncs | 77 return usedByFuncs |
133 | 137 |
134 | 138 |
135 @dataclass | 139 @dataclass |
136 class Lhs: | 140 class Lhs: |
137 graph: Graph | 141 graph: Graph |
142 stats: Dict | |
138 | 143 |
139 staticRuleStmts: Graph = field(default_factory=Graph) | 144 staticRuleStmts: Graph = field(default_factory=Graph) |
140 lhsBindables: Set[BindableTerm] = field(default_factory=set) | 145 lhsBindables: Set[BindableTerm] = field(default_factory=set) |
141 lhsBnodes: Set[BNode] = field(default_factory=set) | 146 lhsBnodes: Set[BNode] = field(default_factory=set) |
142 | 147 |
162 | 167 |
163 self._logCandidates(orderedVars, orderedValueSets) | 168 self._logCandidates(orderedVars, orderedValueSets) |
164 | 169 |
165 log.debug(f'{INDENT*3} trying all permutations:') | 170 log.debug(f'{INDENT*3} trying all permutations:') |
166 | 171 |
172 | |
167 for perm in itertools.product(*orderedValueSets): | 173 for perm in itertools.product(*orderedValueSets): |
168 binding = CandidateBinding(dict(zip(orderedVars, perm))) | 174 binding = CandidateBinding(dict(zip(orderedVars, perm))) |
169 log.debug('') | 175 log.debug('') |
170 log.debug(f'{INDENT*4}*trying {binding}') | 176 log.debug(f'{INDENT*4}*trying {binding}') |
171 | 177 |
172 try: | 178 try: |
173 usedByFuncs = binding.applyFunctions(self) | 179 usedByFuncs = binding.applyFunctions(self) |
174 except EvaluationFailed: | 180 except EvaluationFailed: |
181 self.stats['permCountFailingEval'] += 1 | |
175 continue | 182 continue |
176 | 183 |
177 if not binding.verify(self, workingSet, usedByFuncs): | 184 if not binding.verify(self, workingSet, usedByFuncs): |
178 log.debug(f'{INDENT*4} this binding did not verify') | 185 log.debug(f'{INDENT*4} this binding did not verify') |
186 self.stats['permCountFailingVerify'] += 1 | |
179 continue | 187 continue |
188 | |
189 self.stats['permCountSucceeding'] += 1 | |
180 yield binding | 190 yield binding |
181 | 191 |
182 def _allStaticStatementsMatch(self, workingSet: ReadOnlyWorkingSet) -> bool: | 192 def _allStaticStatementsMatch(self, workingSet: ReadOnlyWorkingSet) -> bool: |
183 for ruleStmt in self.staticRuleStmts: | 193 for ruleStmt in self.staticRuleStmts: |
184 if ruleStmt not in workingSet: | 194 if ruleStmt not in workingSet: |
222 for v, vals in bindingsFromStatement.items(): | 232 for v, vals in bindingsFromStatement.items(): |
223 log.debug(f'{INDENT*5} {v=} {vals=}') | 233 log.debug(f'{INDENT*5} {v=} {vals=}') |
224 yield v, vals | 234 yield v, vals |
225 | 235 |
226 def graphWithoutEvals(self, binding: CandidateBinding) -> Graph: | 236 def graphWithoutEvals(self, binding: CandidateBinding) -> Graph: |
227 g = Graph() | 237 g = Graph(identifier=GRAPH_ID) |
228 usedByFuncs = binding.applyFunctions(self) | 238 usedByFuncs = binding.applyFunctions(self) |
229 | 239 |
230 for stmt in self.graph: | 240 for stmt in self.graph: |
231 if stmt not in usedByFuncs: | 241 if stmt not in usedByFuncs: |
232 g.add(stmt) | 242 g.add(stmt) |
264 yield Evaluation([stmt[0]], stmt, []) | 274 yield Evaluation([stmt[0]], stmt, []) |
265 | 275 |
266 # internal, use findEvals | 276 # internal, use findEvals |
267 def __init__(self, operands: List[Node], mainStmt: Triple, otherStmts: Iterable[Triple]) -> None: | 277 def __init__(self, operands: List[Node], mainStmt: Triple, otherStmts: Iterable[Triple]) -> None: |
268 self.operands = operands | 278 self.operands = operands |
269 self.operandsStmts = Graph() | 279 self.operandsStmts = Graph(identifier=GRAPH_ID) |
270 self.operandsStmts += otherStmts # may grow | 280 self.operandsStmts += otherStmts # may grow |
271 self.operandsStmts.add(mainStmt) | 281 self.operandsStmts.add(mainStmt) |
272 self.stmt = mainStmt | 282 self.stmt = mainStmt |
273 | 283 |
274 def resultBindings(self, inputBindings) -> Tuple[Dict[BindableTerm, Node], Graph]: | 284 def resultBindings(self, inputBindings) -> Tuple[Dict[BindableTerm, Node], Graph]: |
326 for stmt in g: | 336 for stmt in g: |
327 if stmt[1] == LOG['implies']: | 337 if stmt[1] == LOG['implies']: |
328 self.rules.add(stmt) | 338 self.rules.add(stmt) |
329 # others should go to a default working set? | 339 # others should go to a default working set? |
330 | 340 |
341 @INFER_CALLS.time() | |
331 def infer(self, graph: Graph): | 342 def infer(self, graph: Graph): |
332 """ | 343 """ |
333 returns new graph of inferred statements. | 344 returns new graph of inferred statements. |
334 """ | 345 """ |
335 log.info(f'{INDENT*0} Begin inference of graph len={graph.__len__()} with rules len={len(self.rules)}:') | 346 log.info(f'{INDENT*0} Begin inference of graph len={graph.__len__()} with rules len={len(self.rules)}:') |
336 | 347 startTime = time.time() |
348 self.stats: Dict[str, Union[int,float]] = defaultdict(lambda: 0) | |
337 # everything that is true: the input graph, plus every rule conclusion we can make | 349 # everything that is true: the input graph, plus every rule conclusion we can make |
338 workingSet = Graph() | 350 workingSet = Graph() |
339 workingSet += graph | 351 workingSet += graph |
340 | 352 |
341 # just the statements that came from RHS's of rules that fired. | 353 # just the statements that came from RHS's of rules that fired. |
342 implied = ConjunctiveGraph() | 354 implied = ConjunctiveGraph() |
343 | 355 |
344 bailout_iterations = 100 | 356 bailout_iterations = 100 |
345 delta = 1 | 357 delta = 1 |
358 self.stats['initWorkingSet'] = cast(int, workingSet.__len__()) | |
346 while delta > 0 and bailout_iterations > 0: | 359 while delta > 0 and bailout_iterations > 0: |
347 log.info(f'{INDENT*1}*iteration ({bailout_iterations} left)') | 360 log.info(f'{INDENT*1}*iteration ({bailout_iterations} left)') |
348 bailout_iterations -= 1 | 361 bailout_iterations -= 1 |
349 delta = -len(implied) | 362 delta = -len(implied) |
350 self._iterateAllRules(workingSet, implied) | 363 self._iterateAllRules(workingSet, implied) |
351 delta += len(implied) | 364 delta += len(implied) |
365 self.stats['iterations'] += 1 | |
352 log.info(f'{INDENT*2} this inference iteration added {delta} more implied stmts') | 366 log.info(f'{INDENT*2} this inference iteration added {delta} more implied stmts') |
353 log.info(f'{INDENT*0} Inference done; {len(implied)} stmts implied:') | 367 self.stats['timeSpent'] = round(time.time() - startTime, 3) |
368 self.stats['impliedStmts'] = len(implied) | |
369 log.info(f'{INDENT*0} Inference done {dict(self.stats)}. Implied:') | |
354 for st in implied: | 370 for st in implied: |
355 log.info(f'{INDENT*1} {st}') | 371 log.info(f'{INDENT*1} {st}') |
356 return implied | 372 return implied |
357 | 373 |
358 def _iterateAllRules(self, workingSet: Graph, implied: Graph): | 374 def _iterateAllRules(self, workingSet: Graph, implied: Graph): |
359 for i, r in enumerate(self.rules): | 375 for i, r in enumerate(self.rules): |
360 self._logRuleApplicationHeader(workingSet, i, r) | 376 self._logRuleApplicationHeader(workingSet, i, r) |
361 _applyRule(Lhs(r[0]), r[2], workingSet, implied) | 377 _applyRule(Lhs(r[0], self.stats), r[2], workingSet, implied, self.stats) |
362 | 378 |
363 def _logRuleApplicationHeader(self, workingSet, i, r): | 379 def _logRuleApplicationHeader(self, workingSet, i, r): |
364 if not log.isEnabledFor(logging.DEBUG): | 380 if not log.isEnabledFor(logging.DEBUG): |
365 return | 381 return |
366 | 382 |
373 log.debug(f'{INDENT*2}-applying rule {i}') | 389 log.debug(f'{INDENT*2}-applying rule {i}') |
374 log.debug(f'{INDENT*3} rule def lhs: {graphDump(r[0])}') | 390 log.debug(f'{INDENT*3} rule def lhs: {graphDump(r[0])}') |
375 log.debug(f'{INDENT*3} rule def rhs: {graphDump(r[2])}') | 391 log.debug(f'{INDENT*3} rule def rhs: {graphDump(r[2])}') |
376 | 392 |
377 | 393 |
378 def _applyRule(lhs: Lhs, rhs: Graph, workingSet: Graph, implied: Graph): | 394 def _applyRule(lhs: Lhs, rhs: Graph, workingSet: Graph, implied: Graph, stats: Dict): |
379 for binding in lhs.findCandidateBindings(ReadOnlyGraphAggregate([workingSet])): | 395 for binding in lhs.findCandidateBindings(ReadOnlyGraphAggregate([workingSet])): |
380 log.debug(f'{INDENT*3} rule has a working binding:') | 396 log.debug(f'{INDENT*3} rule has a working binding:') |
381 | 397 |
382 for lhsBoundStmt in binding.apply(lhs.graphWithoutEvals(binding)): | 398 for lhsBoundStmt in binding.apply(lhs.graphWithoutEvals(binding)): |
383 log.debug(f'{INDENT*5} adding {lhsBoundStmt=}') | 399 log.debug(f'{INDENT*5} adding {lhsBoundStmt=}') |