Mercurial > code > home > repos > homeauto
annotate service/mqtt_to_rdf/inference.py @ 1650:2061df259224
move graphDump (on its way out, since reprs are getting better)
author | drewp@bigasterisk.com |
---|---|
date | Sat, 18 Sep 2021 23:53:59 -0700 |
parents | bb5d2b5370ac |
children | 20474ad4968e |
rev | line source |
---|---|
1587 | 1 """ |
2 copied from reasoning 2021-08-29. probably same api. should | |
3 be able to lib/ this out | |
4 """ | |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
5 import itertools |
1587 | 6 import logging |
1601 | 7 import time |
1594 | 8 from collections import defaultdict |
1626 | 9 from dataclasses import dataclass |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
10 from typing import (Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union, cast) |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
11 |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
12 from prometheus_client import Histogram, Summary |
1649
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
13 from rdflib import RDF, BNode, Graph, Literal, Namespace |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
14 from rdflib.graph import ConjunctiveGraph, ReadOnlyGraphAggregate |
1640 | 15 from rdflib.term import Node, URIRef, Variable |
1587 | 16 |
1638
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
17 from candidate_binding import BindingConflict, CandidateBinding |
1637 | 18 from inference_types import BindingUnknown, ReadOnlyWorkingSet, Triple |
1640 | 19 from lhs_evaluation import functionsFor, lhsStmtsUsedByFuncs, rulePredicates |
1650
2061df259224
move graphDump (on its way out, since reprs are getting better)
drewp@bigasterisk.com
parents:
1649
diff
changeset
|
20 from rdf_debug import graphDump |
1605 | 21 |
1587 | 22 log = logging.getLogger('infer') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
23 INDENT = ' ' |
1587 | 24 |
1626 | 25 INFER_CALLS = Summary('inference_infer_calls', 'calls') |
26 INFER_GRAPH_SIZE = Histogram('inference_graph_size', 'statements', buckets=[2**x for x in range(2, 20, 2)]) | |
1587 | 27 |
28 ROOM = Namespace("http://projects.bigasterisk.com/room/") | |
29 LOG = Namespace('http://www.w3.org/2000/10/swap/log#') | |
30 MATH = Namespace('http://www.w3.org/2000/10/swap/math#') | |
31 | |
32 | |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
33 def stmtTemplate(stmt: Triple) -> Tuple[Optional[Node], Optional[Node], Optional[Node]]: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
34 return ( |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
35 None if isinstance(stmt[0], (Variable, BNode)) else stmt[0], |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
36 None if isinstance(stmt[1], (Variable, BNode)) else stmt[1], |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
37 None if isinstance(stmt[2], (Variable, BNode)) else stmt[2], |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
38 ) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
39 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
40 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
41 class NoOptions(ValueError): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
42 """stmtlooper has no possibilites to add to the binding; the whole rule must therefore not apply""" |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
43 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
44 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
45 class Inconsistent(ValueError): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
46 """adding this stmt would be inconsistent with an existing binding""" |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
47 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
48 |
1632 | 49 _stmtLooperShortId = itertools.count() |
50 | |
51 | |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
52 @dataclass |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
53 class StmtLooper: |
1632 | 54 """given one LHS stmt, iterate through the possible matches for it, |
55 returning what bindings they would imply. Only distinct bindings are | |
56 returned. The bindings build on any `prev` StmtLooper's results. | |
57 | |
58 This iterator is restartable.""" | |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
59 lhsStmt: Triple |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
60 prev: Optional['StmtLooper'] |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
61 workingSet: ReadOnlyWorkingSet |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
62 parent: 'Lhs' # just for lhs.graph, really |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
63 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
64 def __repr__(self): |
1648 | 65 return f'StmtLooper{self._shortId}{"<pastEnd>" if self.pastEnd() else ""})' |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
66 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
67 def __post_init__(self): |
1632 | 68 self._shortId = next(_stmtLooperShortId) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
69 self._myWorkingSetMatches = self._myMatches(self.workingSet) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
70 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
71 self._current = CandidateBinding({}) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
72 self._pastEnd = False |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
73 self._seenBindings: List[CandidateBinding] = [] |
1648 | 74 |
75 log.debug(f'introducing {self!r}({graphDump([self.lhsStmt])})') | |
76 | |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
77 self.restart() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
78 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
79 def _myMatches(self, g: Graph) -> List[Triple]: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
80 template = stmtTemplate(self.lhsStmt) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
81 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
82 stmts = sorted(cast(Iterator[Triple], list(g.triples(template)))) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
83 # plus new lhs possibilties... |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
84 # log.debug(f'{INDENT*6} {self} find {len(stmts)=} in {len(self.workingSet)=}') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
85 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
86 return stmts |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
87 |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
88 def _prevBindings(self) -> CandidateBinding: |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
89 if not self.prev or self.prev.pastEnd(): |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
90 return CandidateBinding({}) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
91 |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
92 return self.prev.currentBinding() |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
93 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
94 def advance(self): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
95 """update to a new set of bindings we haven't seen (since last restart), or go into pastEnd mode""" |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
96 if self._pastEnd: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
97 raise NotImplementedError('need restart') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
98 log.debug('') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
99 augmentedWorkingSet: Sequence[Triple] = [] |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
100 if self.prev is None: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
101 augmentedWorkingSet = self._myWorkingSetMatches |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
102 else: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
103 augmentedWorkingSet = list(self.prev.currentBinding().apply(self._myWorkingSetMatches, |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
104 returnBoundStatementsOnly=False)) |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
105 |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
106 log.debug(f'{INDENT*6} {self}.advance has {augmentedWorkingSet=}') |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
107 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
108 if self._advanceWithPlainMatches(augmentedWorkingSet): |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
109 return |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
110 |
1639 | 111 if self._advanceWithFunctions(): |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
112 return |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
113 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
114 log.debug(f'{INDENT*6} {self} is past end') |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
115 self._pastEnd = True |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
116 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
117 def _advanceWithPlainMatches(self, augmentedWorkingSet: Sequence[Triple]) -> bool: |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
118 log.debug(f'{INDENT*7} {self} mines {len(augmentedWorkingSet)} matching augmented statements') |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
119 for s in augmentedWorkingSet: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
120 log.debug(f'{INDENT*7} {s}') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
121 |
1637 | 122 for stmt in augmentedWorkingSet: |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
123 try: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
124 outBinding = self._totalBindingIfThisStmtWereTrue(stmt) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
125 except Inconsistent: |
1648 | 126 log.debug(f'{INDENT*7} StmtLooper{self._shortId} - {stmt} would be inconsistent with prev bindings') |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
127 continue |
1632 | 128 |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
129 log.debug(f'{INDENT*7} {outBinding=} {self._seenBindings=}') |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
130 if outBinding not in self._seenBindings: |
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
131 self._seenBindings.append(outBinding.copy()) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
132 self._current = outBinding |
1632 | 133 log.debug(f'{INDENT*7} new binding from {self} -> {outBinding}') |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
134 return True |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
135 return False |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
136 |
1639 | 137 def _advanceWithFunctions(self) -> bool: |
1637 | 138 pred: Node = self.lhsStmt[1] |
1640 | 139 if not isinstance(pred, URIRef): |
140 raise NotImplementedError | |
1632 | 141 |
1637 | 142 for functionType in functionsFor(pred): |
143 fn = functionType(self.lhsStmt, self.parent.graph) | |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
144 try: |
1637 | 145 out = fn.bind(self._prevBindings()) |
146 except BindingUnknown: | |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
147 pass |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
148 else: |
1637 | 149 if out is not None: |
150 binding: CandidateBinding = self._prevBindings().copy() | |
151 binding.addNewBindings(out) | |
152 if binding not in self._seenBindings: | |
153 self._seenBindings.append(binding) | |
154 self._current = binding | |
155 log.debug(f'{INDENT*7} new binding from {self} -> {binding}') | |
156 return True | |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
157 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
158 return False |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
159 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
160 def _boundOperands(self, operands) -> List[Node]: |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
161 pb: CandidateBinding = self._prevBindings() |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
162 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
163 boundOperands: List[Node] = [] |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
164 for op in operands: |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
165 if isinstance(op, (Variable, BNode)): |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
166 boundOperands.append(pb.applyTerm(op)) |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
167 else: |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
168 boundOperands.append(op) |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
169 return boundOperands |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
170 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
171 def _totalBindingIfThisStmtWereTrue(self, newStmt: Triple) -> CandidateBinding: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
172 outBinding = self._prevBindings().copy() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
173 for rt, ct in zip(self.lhsStmt, newStmt): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
174 if isinstance(rt, (Variable, BNode)): |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
175 if outBinding.contains(rt) and outBinding.applyTerm(rt) != ct: |
1648 | 176 msg = f'{rt=} {ct=} {outBinding=}' if log.isEnabledFor(logging.DEBUG) else '' |
177 raise Inconsistent(msg) | |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
178 outBinding.addNewBindings(CandidateBinding({rt: ct})) |
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
179 return outBinding |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
180 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
181 def currentBinding(self) -> CandidateBinding: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
182 if self.pastEnd(): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
183 raise NotImplementedError() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
184 return self._current |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
185 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
186 def pastEnd(self) -> bool: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
187 return self._pastEnd |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
188 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
189 def restart(self): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
190 self._pastEnd = False |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
191 self._seenBindings = [] |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
192 self.advance() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
193 if self.pastEnd(): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
194 raise NoOptions() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
195 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
196 |
1594 | 197 @dataclass |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
198 class Lhs: |
1648 | 199 graph: Graph # our full LHS graph, as input. See below for the statements partitioned into groups. |
1594 | 200 |
201 def __post_init__(self): | |
1648 | 202 |
203 usedByFuncs: Set[Triple] = lhsStmtsUsedByFuncs(self.graph) | |
204 | |
205 stmtsToMatch = list(self.graph - usedByFuncs) | |
206 self.staticStmts = [] | |
207 self.patternStmts = [] | |
208 for st in stmtsToMatch: | |
209 if all(isinstance(term, (URIRef, Literal)) for term in st): | |
210 self.staticStmts.append(st) | |
211 else: | |
212 self.patternStmts.append(st) | |
213 | |
214 # sort them by variable dependencies; don't just try all perms! | |
215 def lightSortKey(stmt): # Not this. | |
216 (s, p, o) = stmt | |
217 return p in rulePredicates(), p, s, o | |
218 | |
219 self.patternStmts.sort(key=lightSortKey) | |
220 | |
221 self.myPreds = set(p for s, p, o in self.graph if isinstance(p, URIRef)) | |
222 self.myPreds -= rulePredicates() | |
223 self.myPreds -= {RDF.first, RDF.rest} | |
224 self.myPreds = set(self.myPreds) | |
1602 | 225 |
1609
34f2817320cc
new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents:
1608
diff
changeset
|
226 def __repr__(self): |
34f2817320cc
new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents:
1608
diff
changeset
|
227 return f"Lhs({graphDump(self.graph)})" |
34f2817320cc
new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents:
1608
diff
changeset
|
228 |
1648 | 229 def findCandidateBindings(self, knownTrue: ReadOnlyWorkingSet, stats, ruleStatementsIterationLimit) -> Iterator['BoundLhs']: |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
230 """bindings that fit the LHS of a rule, using statements from workingSet and functions |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
231 from LHS""" |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
232 if self.graph.__len__() == 0: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
233 # special case- no LHS! |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
234 yield BoundLhs(self, CandidateBinding({})) |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
235 return |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
236 |
1640 | 237 if self._checkPredicateCounts(knownTrue): |
238 stats['_checkPredicateCountsCulls'] += 1 | |
239 return | |
240 | |
1648 | 241 if not all(st in knownTrue for st in self.staticStmts): |
242 stats['staticStmtCulls'] += 1 | |
243 return | |
244 | |
245 if len(self.patternStmts) == 0: | |
246 # static only | |
247 yield BoundLhs(self, CandidateBinding({})) | |
248 return | |
249 | |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
250 log.debug(f'{INDENT*4} build new StmtLooper stack') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
251 |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
252 try: |
1648 | 253 stmtStack = self._assembleRings(knownTrue, stats) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
254 except NoOptions: |
1632 | 255 log.debug(f'{INDENT*5} start up with no options; 0 bindings') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
256 return |
1632 | 257 self._debugStmtStack('initial odometer', stmtStack) |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
258 self._assertAllRingsAreValid(stmtStack) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
259 |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
260 lastRing = stmtStack[-1] |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
261 iterCount = 0 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
262 while True: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
263 iterCount += 1 |
1648 | 264 if iterCount > ruleStatementsIterationLimit: |
265 raise ValueError('rule too complex') | |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
266 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
267 log.debug(f'{INDENT*4} vv findCandBindings iteration {iterCount}') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
268 |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
269 yield BoundLhs(self, lastRing.currentBinding()) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
270 |
1632 | 271 self._debugStmtStack('odometer', stmtStack) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
272 |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
273 done = self._advanceAll(stmtStack) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
274 |
1632 | 275 self._debugStmtStack('odometer after ({done=})', stmtStack) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
276 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
277 log.debug(f'{INDENT*4} ^^ findCandBindings iteration done') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
278 if done: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
279 break |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
280 |
1632 | 281 def _debugStmtStack(self, label, stmtStack): |
282 log.debug(f'{INDENT*5} {label}:') | |
283 for l in stmtStack: | |
284 log.debug(f'{INDENT*6} {l} curbind={l.currentBinding() if not l.pastEnd() else "<end>"}') | |
285 | |
1640 | 286 def _checkPredicateCounts(self, knownTrue): |
287 """raise NoOptions quickly in some cases""" | |
1648 | 288 |
289 if any((None, p, None) not in knownTrue for p in self.myPreds): | |
1640 | 290 return True |
1648 | 291 log.info(f'{INDENT*2} checkPredicateCounts does not cull because all {self.myPreds=} are in knownTrue') |
1640 | 292 return False |
293 | |
1648 | 294 def _assembleRings(self, knownTrue: ReadOnlyWorkingSet, stats) -> List[StmtLooper]: |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
295 """make StmtLooper for each stmt in our LHS graph, but do it in a way that they all |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
296 start out valid (or else raise NoOptions)""" |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
297 |
1648 | 298 log.info(f'{INDENT*2} stats={dict(stats)}') |
299 log.info(f'{INDENT*2} taking permutations of {len(self.patternStmts)=}') | |
300 for i, perm in enumerate(itertools.permutations(self.patternStmts)): | |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
301 stmtStack: List[StmtLooper] = [] |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
302 prev: Optional[StmtLooper] = None |
1648 | 303 if log.isEnabledFor(logging.DEBUG): |
304 log.debug(f'{INDENT*5} [perm {i}] try stmts in this order: {" -> ".join(graphDump([p]) for p in perm)}') | |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
305 |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
306 for s in perm: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
307 try: |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
308 elem = StmtLooper(s, prev, knownTrue, parent=self) |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
309 except NoOptions: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
310 log.debug(f'{INDENT*6} permutation didnt work, try another') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
311 break |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
312 stmtStack.append(elem) |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
313 prev = stmtStack[-1] |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
314 else: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
315 return stmtStack |
1648 | 316 if i > 5000: |
317 raise NotImplementedError(f'trying too many permutations {len(self.patternStmts)=}') | |
318 | |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
319 log.debug(f'{INDENT*6} no perms worked- rule cannot match anything') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
320 raise NoOptions() |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
321 |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
322 def _advanceAll(self, stmtStack: List[StmtLooper]) -> bool: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
323 carry = True # 1st elem always must advance |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
324 for i, ring in enumerate(stmtStack): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
325 # unlike normal odometer, advancing any earlier ring could invalidate later ones |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
326 if carry: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
327 log.debug(f'{INDENT*5} advanceAll [{i}] {ring} carry/advance') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
328 ring.advance() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
329 carry = False |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
330 if ring.pastEnd(): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
331 if ring is stmtStack[-1]: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
332 log.debug(f'{INDENT*5} advanceAll [{i}] {ring} says we done') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
333 return True |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
334 log.debug(f'{INDENT*5} advanceAll [{i}] {ring} restart') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
335 ring.restart() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
336 carry = True |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
337 return False |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
338 |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
339 def _assertAllRingsAreValid(self, stmtStack): |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
340 if any(ring.pastEnd() for ring in stmtStack): # this is an unexpected debug assertion |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
341 log.debug(f'{INDENT*5} some rings started at pastEnd {stmtStack}') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
342 raise NoOptions() |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
343 |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
344 |
1622
38bd8ef9ef67
add CandidateTermMatches, unused so far
drewp@bigasterisk.com
parents:
1621
diff
changeset
|
345 @dataclass |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
346 class BoundLhs: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
347 lhs: Lhs |
1610 | 348 binding: CandidateBinding |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
349 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
350 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
351 @dataclass |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
352 class Rule: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
353 lhsGraph: Graph |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
354 rhsGraph: Graph |
1632 | 355 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
356 def __post_init__(self): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
357 self.lhs = Lhs(self.lhsGraph) |
1632 | 358 # |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
359 self.rhsBnodeMap = {} |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
360 |
1648 | 361 def applyRule(self, workingSet: Graph, implied: Graph, stats: Dict, ruleStatementsIterationLimit): |
362 for bound in self.lhs.findCandidateBindings(ReadOnlyGraphAggregate([workingSet]), stats, ruleStatementsIterationLimit): | |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
363 log.debug(f'{INDENT*5} +rule has a working binding: {bound}') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
364 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
365 # rhs could have more bnodes, and they just need to be distinct per rule-firing that we do |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
366 existingRhsBnodes = set() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
367 for stmt in self.rhsGraph: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
368 for t in stmt: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
369 if isinstance(t, BNode): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
370 existingRhsBnodes.add(t) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
371 # if existingRhsBnodes: |
1632 | 372 # log.debug(f'{INDENT*6} mapping rhs bnodes {existingRhsBnodes} to new ones') |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
373 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
374 for b in existingRhsBnodes: |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
375 |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
376 key = tuple(sorted(bound.binding.binding.items())), b |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
377 self.rhsBnodeMap.setdefault(key, BNode()) |
1638
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
378 try: |
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
379 bound.binding.addNewBindings(CandidateBinding({b: self.rhsBnodeMap[key]})) |
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
380 except BindingConflict: |
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
381 continue |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
382 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
383 # for lhsBoundStmt in bound.binding.apply(bound.lhsStmtsWithoutEvals()): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
384 # log.debug(f'{INDENT*6} adding to workingSet {lhsBoundStmt=}') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
385 # workingSet.add(lhsBoundStmt) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
386 # log.debug(f'{INDENT*6} rhsGraph is good: {list(self.rhsGraph)}') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
387 |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
388 for newStmt in bound.binding.apply(self.rhsGraph): |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
389 # log.debug(f'{INDENT*6} adding {newStmt=}') |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
390 workingSet.add(newStmt) |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
391 implied.add(newStmt) |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
392 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
393 |
1648 | 394 @dataclass |
1587 | 395 class Inference: |
1648 | 396 rulesIterationLimit = 3 |
397 ruleStatementsIterationLimit = 3 | |
1587 | 398 |
399 def __init__(self) -> None: | |
1648 | 400 self.rules: List[Rule] = [] |
401 self._nonRuleStmts: List[Triple] = [] | |
1587 | 402 |
403 def setRules(self, g: ConjunctiveGraph): | |
1648 | 404 self.rules = [] |
405 self._nonRuleStmts = [] | |
1599
abbf0eb0e640
fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents:
1598
diff
changeset
|
406 for stmt in g: |
abbf0eb0e640
fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents:
1598
diff
changeset
|
407 if stmt[1] == LOG['implies']: |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
408 self.rules.append(Rule(stmt[0], stmt[2])) |
1649
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
409 else: |
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
410 self._nonRuleStmts.append(stmt) |
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
411 |
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
412 def nonRuleStatements(self) -> List[Triple]: |
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
413 return self._nonRuleStmts |
1587 | 414 |
1601 | 415 @INFER_CALLS.time() |
1587 | 416 def infer(self, graph: Graph): |
417 """ | |
418 returns new graph of inferred statements. | |
419 """ | |
1626 | 420 n = graph.__len__() |
421 INFER_GRAPH_SIZE.observe(n) | |
422 log.info(f'{INDENT*0} Begin inference of graph len={n} with rules len={len(self.rules)}:') | |
1601 | 423 startTime = time.time() |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
424 stats: Dict[str, Union[int, float]] = defaultdict(lambda: 0) |
1649
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
425 |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
426 # everything that is true: the input graph, plus every rule conclusion we can make |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
427 workingSet = Graph() |
1649
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
428 workingSet += self._nonRuleStmts |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
429 workingSet += graph |
1587 | 430 |
1594 | 431 # just the statements that came from RHS's of rules that fired. |
1587 | 432 implied = ConjunctiveGraph() |
433 | |
1648 | 434 rulesIterations = 0 |
1587 | 435 delta = 1 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
436 stats['initWorkingSet'] = cast(int, workingSet.__len__()) |
1648 | 437 while delta > 0 and rulesIterations <= self.rulesIterationLimit: |
1620 | 438 log.debug('') |
1648 | 439 log.info(f'{INDENT*1}*iteration {rulesIterations}') |
440 | |
1587 | 441 delta = -len(implied) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
442 self._iterateAllRules(workingSet, implied, stats) |
1587 | 443 delta += len(implied) |
1648 | 444 rulesIterations += 1 |
1597 | 445 log.info(f'{INDENT*2} this inference iteration added {delta} more implied stmts') |
1648 | 446 stats['iterations'] = rulesIterations |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
447 stats['timeSpent'] = round(time.time() - startTime, 3) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
448 stats['impliedStmts'] = len(implied) |
1648 | 449 log.info(f'{INDENT*0} Inference done {dict(stats)}.') |
450 log.debug('Implied:') | |
451 log.debug(graphDump(implied)) | |
1587 | 452 return implied |
453 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
454 def _iterateAllRules(self, workingSet: Graph, implied: Graph, stats): |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
455 for i, rule in enumerate(self.rules): |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
456 self._logRuleApplicationHeader(workingSet, i, rule) |
1648 | 457 rule.applyRule(workingSet, implied, stats, self.ruleStatementsIterationLimit) |
1587 | 458 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
459 def _logRuleApplicationHeader(self, workingSet, i, r: Rule): |
1594 | 460 if not log.isEnabledFor(logging.DEBUG): |
461 return | |
462 | |
463 log.debug('') | |
464 log.debug(f'{INDENT*2} workingSet:') | |
1648 | 465 # for j, stmt in enumerate(sorted(workingSet)): |
466 # log.debug(f'{INDENT*3} ({j}) {stmt}') | |
467 log.debug(f'{INDENT*3} {graphDump(workingSet, oneLine=False)}') | |
1594 | 468 |
469 log.debug('') | |
470 log.debug(f'{INDENT*2}-applying rule {i}') | |
1632 | 471 log.debug(f'{INDENT*3} rule def lhs:') |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
472 for stmt in sorted(r.lhsGraph, reverse=True): |
1632 | 473 log.debug(f'{INDENT*4} {stmt}') |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
474 log.debug(f'{INDENT*3} rule def rhs: {graphDump(r.rhsGraph)}') |