Mercurial > code > home > repos > homeauto
annotate service/mqtt_to_rdf/inference.py @ 1618:48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
author | drewp@bigasterisk.com |
---|---|
date | Wed, 08 Sep 2021 18:32:11 -0700 |
parents | 3a6ed545357f |
children |
rev | line source |
---|---|
1587 | 1 """ |
2 copied from reasoning 2021-08-29. probably same api. should | |
3 be able to lib/ this out | |
4 """ | |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
5 import itertools |
1587 | 6 import logging |
1601 | 7 import time |
1594 | 8 from collections import defaultdict |
9 from dataclasses import dataclass, field | |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
10 from typing import Dict, Iterator, List, Literal, Optional, Set, Tuple, Union, cast |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
11 |
1587 | 12 from prometheus_client import Summary |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
13 from rdflib import BNode, Graph, Namespace, URIRef |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
14 from rdflib.graph import ConjunctiveGraph, ReadOnlyGraphAggregate |
1587 | 15 from rdflib.term import Node, Variable |
16 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
17 from candidate_binding import CandidateBinding |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
18 from inference_types import (BindableTerm, EvaluationFailed, ReadOnlyWorkingSet, Triple) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
19 from lhs_evaluation import Evaluation |
1605 | 20 |
1587 | 21 log = logging.getLogger('infer') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
22 INDENT = ' ' |
1587 | 23 |
1601 | 24 INFER_CALLS = Summary('read_rules_calls', 'calls') |
1587 | 25 |
26 ROOM = Namespace("http://projects.bigasterisk.com/room/") | |
27 LOG = Namespace('http://www.w3.org/2000/10/swap/log#') | |
28 MATH = Namespace('http://www.w3.org/2000/10/swap/math#') | |
29 | |
30 | |
1594 | 31 @dataclass |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
32 class Lhs: |
1594 | 33 graph: Graph |
34 | |
35 def __post_init__(self): | |
1608 | 36 # do precomputation in here that's not specific to the workingSet |
37 self.staticRuleStmts = Graph() | |
1611
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
38 self.nonStaticRuleStmts = Graph() |
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
39 |
1608 | 40 self.lhsBindables: Set[BindableTerm] = set() |
41 self.lhsBnodes: Set[BNode] = set() | |
1594 | 42 for ruleStmt in self.graph: |
43 varsAndBnodesInStmt = [term for term in ruleStmt if isinstance(term, (Variable, BNode))] | |
44 self.lhsBindables.update(varsAndBnodesInStmt) | |
45 self.lhsBnodes.update(x for x in varsAndBnodesInStmt if isinstance(x, BNode)) | |
46 if not varsAndBnodesInStmt: | |
47 self.staticRuleStmts.add(ruleStmt) | |
1611
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
48 else: |
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
49 self.nonStaticRuleStmts.add(ruleStmt) |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
50 |
1615
bcfa368e5498
change a Graph.__sub__ to Set.difference in verify() for a big speedup
drewp@bigasterisk.com
parents:
1614
diff
changeset
|
51 self.nonStaticRuleStmtsSet = set(self.nonStaticRuleStmts) |
bcfa368e5498
change a Graph.__sub__ to Set.difference in verify() for a big speedup
drewp@bigasterisk.com
parents:
1614
diff
changeset
|
52 |
1602 | 53 self.evaluations = list(Evaluation.findEvals(self.graph)) |
54 | |
1609
34f2817320cc
new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents:
1608
diff
changeset
|
55 def __repr__(self): |
34f2817320cc
new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents:
1608
diff
changeset
|
56 return f"Lhs({graphDump(self.graph)})" |
34f2817320cc
new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents:
1608
diff
changeset
|
57 |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
58 def findCandidateBindings(self, knownTrue: ReadOnlyWorkingSet, stats) -> Iterator['BoundLhs']: |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
59 """bindings that fit the LHS of a rule, using statements from workingSet and functions |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
60 from LHS""" |
1597 | 61 log.debug(f'{INDENT*3} nodesToBind: {self.lhsBindables}') |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
62 stats['findCandidateBindingsCalls'] += 1 |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
63 |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
64 if not self._allStaticStatementsMatch(knownTrue): |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
65 stats['findCandidateBindingEarlyExits'] += 1 |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
66 return |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
67 |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
68 boundSoFar = CandidateBinding({}) |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
69 for binding in self._possibleBindings(knownTrue, boundSoFar, stats): |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
70 log.debug('') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
71 log.debug(f'{INDENT*4}*trying {binding.binding}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
72 |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
73 if not binding.verify(knownTrue): |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
74 log.debug(f'{INDENT*4} this binding did not verify') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
75 stats['permCountFailingVerify'] += 1 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
76 continue |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
77 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
78 stats['permCountSucceeding'] += 1 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
79 yield binding |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
80 boundSoFar.addNewBindings(binding.binding) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
81 |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
82 def _allStaticStatementsMatch(self, knownTrue: ReadOnlyWorkingSet) -> bool: |
1613
03ed8c9abd5b
forget GRAPH_ID optimization in this case
drewp@bigasterisk.com
parents:
1612
diff
changeset
|
83 # bug: see TestSelfFulfillingRule.test3 for a case where this rule's |
03ed8c9abd5b
forget GRAPH_ID optimization in this case
drewp@bigasterisk.com
parents:
1612
diff
changeset
|
84 # static stmt is matched by a non-static stmt in the rule itself |
1608 | 85 for ruleStmt in self.staticRuleStmts: |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
86 if ruleStmt not in knownTrue: |
1608 | 87 log.debug(f'{INDENT*3} {ruleStmt} not in working set- skip rule') |
88 return False | |
89 return True | |
90 | |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
91 def _possibleBindings(self, workingSet, boundSoFar, stats) -> Iterator['BoundLhs']: |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
92 """this yields at least the working bindings, and possibly others""" |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
93 for bindRow in self._product(workingSet, boundSoFar): |
1614
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
94 try: |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
95 yield BoundLhs(self, bindRow) |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
96 except EvaluationFailed: |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
97 stats['permCountFailingEval'] += 1 |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
98 |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
99 def _product(self, workingSet, boundSoFar: CandidateBinding) -> Iterator[CandidateBinding]: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
100 orderedVars = [] |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
101 for stmt in self.graph: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
102 for t in stmt: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
103 if isinstance(t, (Variable, BNode)): |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
104 orderedVars.append(t) |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
105 orderedVars = sorted(set(orderedVars)) |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
106 |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
107 orderedValueSets = [] |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
108 for v in orderedVars: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
109 orderedValueSets.append(CandidateTermMatches(v, self, workingSet, boundSoFar).results) |
1600 | 110 self._logCandidates(orderedVars, orderedValueSets) |
1597 | 111 log.debug(f'{INDENT*3} trying all permutations:') |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
112 if not orderedVars: |
1614
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
113 yield CandidateBinding({}) |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
114 return |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
115 |
1614
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
116 if not orderedValueSets or not all(orderedValueSets): |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
117 # some var or bnode has no options at all |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
118 return |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
119 rings: List[Iterator[Node]] = [itertools.cycle(valSet) for valSet in orderedValueSets] |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
120 currentSet: List[Node] = [next(ring) for ring in rings] |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
121 starts = [valSet[-1] for valSet in orderedValueSets] |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
122 while True: |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
123 for col, curr in enumerate(currentSet): |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
124 currentSet[col] = next(rings[col]) |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
125 log.debug(f'{INDENT*4} currentSet: {repr(currentSet)}') |
1614
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
126 yield CandidateBinding(dict(zip(orderedVars, currentSet))) |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
127 if curr is not starts[col]: |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
128 break |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
129 if col == len(orderedValueSets) - 1: |
97b2c3cfdb83
refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents:
1613
diff
changeset
|
130 return |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
131 |
1600 | 132 def _allCandidateTermMatches(self, workingSet: ReadOnlyWorkingSet) -> Dict[BindableTerm, Set[Node]]: |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
133 """the total set of terms each variable could possibly match""" |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
134 |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
135 candidateTermMatches: Dict[BindableTerm, Set[Node]] = defaultdict(set) |
1594 | 136 for lhsStmt in self.graph: |
1597 | 137 log.debug(f'{INDENT*4} possibles for this lhs stmt: {lhsStmt}') |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
138 for trueStmt in workingSet: |
1597 | 139 # log.debug(f'{INDENT*5} consider this true stmt ({i}): {trueStmt}') |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
140 |
1594 | 141 for v, vals in self._bindingsFromStatement(lhsStmt, trueStmt): |
142 candidateTermMatches[v].update(vals) | |
143 | |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
144 # for trueStmt in itertools.chain(workingSet, self.graph): |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
145 # for b in self.lhsBnodes: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
146 # for t in [trueStmt[0], trueStmt[2]]: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
147 # if isinstance(t, (URIRef, BNode)): |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
148 # candidateTermMatches[b].add(t) |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
149 return candidateTermMatches |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
150 |
1594 | 151 def _bindingsFromStatement(self, stmt1: Triple, stmt2: Triple) -> Iterator[Tuple[Variable, Set[Node]]]: |
152 """if these stmts match otherwise, what BNode or Variable mappings do we learn? | |
153 | |
154 e.g. stmt1=(?x B ?y) and stmt2=(A B C), then we yield (?x, {A}) and (?y, {C}) | |
155 or stmt1=(_:x B C) and stmt2=(A B C), then we yield (_:x, {A}) | |
156 or stmt1=(?x B C) and stmt2=(A B D), then we yield nothing | |
157 """ | |
158 bindingsFromStatement = {} | |
159 for term1, term2 in zip(stmt1, stmt2): | |
160 if isinstance(term1, (BNode, Variable)): | |
161 bindingsFromStatement.setdefault(term1, set()).add(term2) | |
162 elif term1 != term2: | |
163 break | |
164 else: | |
165 for v, vals in bindingsFromStatement.items(): | |
1597 | 166 log.debug(f'{INDENT*5} {v=} {vals=}') |
1594 | 167 yield v, vals |
168 | |
1600 | 169 def _logCandidates(self, orderedVars, orderedValueSets): |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
170 if not log.isEnabledFor(logging.DEBUG): |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
171 return |
1597 | 172 log.debug(f'{INDENT*3} resulting candidate terms:') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
173 for v, vals in zip(orderedVars, orderedValueSets): |
1597 | 174 log.debug(f'{INDENT*4} {v!r} could be:') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
175 for val in vals: |
1597 | 176 log.debug(f'{INDENT*5}{val!r}') |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
177 |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
178 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
179 @dataclass |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
180 class CandidateTermMatches: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
181 """lazily find the possible matches for this term""" |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
182 term: BindableTerm |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
183 lhs: Lhs |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
184 workingSet: Graph |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
185 boundSoFar: CandidateBinding |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
186 |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
187 def __post_init__(self): |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
188 self.results: List[Node] = [] # we have to be able to repeat the results |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
189 |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
190 res: Set[Node] = set() |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
191 for trueStmt in self.workingSet: # all bound |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
192 lStmts = list(self.lhsStmtsContainingTerm()) |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
193 log.debug(f'{INDENT*4} {trueStmt=} {len(lStmts)}') |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
194 for pat in self.boundSoFar.apply(lStmts, returnBoundStatementsOnly=False): |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
195 log.debug(f'{INDENT*4} {pat=}') |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
196 implied = self._stmtImplies(pat, trueStmt) |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
197 if implied is not None: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
198 res.add(implied) |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
199 self.results = list(res) |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
200 # self.results.sort() |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
201 |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
202 log.debug(f'{INDENT*3} CandTermMatches: {self.term} {graphDump(self.lhs.graph)} {self.boundSoFar=} ===> {self.results=}') |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
203 |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
204 def _stmtImplies(self, pat: Triple, trueStmt: Triple) -> Optional[Node]: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
205 """what value, if any, do we learn for our term from this LHS pattern statement and this known-true stmt""" |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
206 r = None |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
207 for p, t in zip(pat, trueStmt): |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
208 if isinstance(p, (Variable, BNode)): |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
209 if p != self.term: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
210 # stmt is unbound in more than just our term |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
211 continue # unsure what to do - err on the side of too many bindings, since they get rechecked later |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
212 if r is None: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
213 r = t |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
214 log.debug(f'{INDENT*4} implied term value {p=} {t=}') |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
215 elif r != t: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
216 # (?x c ?x) matched with (a b c) doesn't work |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
217 return None |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
218 return r |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
219 |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
220 def lhsStmtsContainingTerm(self): |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
221 # lhs could precompute this |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
222 for lhsStmt in self.lhs.graph: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
223 if self.term in lhsStmt: |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
224 yield lhsStmt |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
225 |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
226 def __iter__(self): |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
227 return iter(self.results) |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
228 |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
229 |
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
230 @dataclass |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
231 class BoundLhs: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
232 lhs: Lhs |
1610 | 233 binding: CandidateBinding |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
234 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
235 def __post_init__(self): |
1613
03ed8c9abd5b
forget GRAPH_ID optimization in this case
drewp@bigasterisk.com
parents:
1612
diff
changeset
|
236 self.usedByFuncs = Graph() |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
237 self._applyFunctions() |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
238 |
1616
3a6ed545357f
optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents:
1615
diff
changeset
|
239 def lhsStmtsWithoutEvals(self): |
3a6ed545357f
optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents:
1615
diff
changeset
|
240 for stmt in self.lhs.graph: |
3a6ed545357f
optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents:
1615
diff
changeset
|
241 if stmt in self.usedByFuncs: |
3a6ed545357f
optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents:
1615
diff
changeset
|
242 continue |
3a6ed545357f
optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents:
1615
diff
changeset
|
243 yield stmt |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
244 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
245 def _applyFunctions(self): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
246 """may grow the binding with some results""" |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
247 while True: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
248 delta = self._applyFunctionsIteration() |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
249 if delta == 0: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
250 break |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
251 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
252 def _applyFunctionsIteration(self): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
253 before = len(self.binding.binding) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
254 delta = 0 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
255 for ev in self.lhs.evaluations: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
256 newBindings, usedGraph = ev.resultBindings(self.binding) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
257 self.usedByFuncs += usedGraph |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
258 self.binding.addNewBindings(newBindings) |
1608 | 259 delta = len(self.binding.binding) - before |
260 log.debug(f'{INDENT*4} eval rules made {delta} new bindings') | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
261 return delta |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
262 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
263 def verify(self, workingSet: ReadOnlyWorkingSet) -> bool: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
264 """Can this bound lhs be true all at once in workingSet?""" |
1615
bcfa368e5498
change a Graph.__sub__ to Set.difference in verify() for a big speedup
drewp@bigasterisk.com
parents:
1614
diff
changeset
|
265 rem = cast(Set[Triple], self.lhs.nonStaticRuleStmtsSet.difference(self.usedByFuncs)) |
bcfa368e5498
change a Graph.__sub__ to Set.difference in verify() for a big speedup
drewp@bigasterisk.com
parents:
1614
diff
changeset
|
266 boundLhs = self.binding.apply(rem) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
267 |
1611
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
268 if log.isEnabledFor(logging.DEBUG): |
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
269 boundLhs = list(boundLhs) |
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
270 self._logVerifyBanner(boundLhs, workingSet) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
271 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
272 for stmt in boundLhs: |
1611
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
273 log.debug(f'{INDENT*4} check for %s', stmt) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
274 |
1611
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
275 if stmt not in workingSet: |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
276 log.debug(f'{INDENT*5} stmt not known to be true') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
277 return False |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
278 return True |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
279 |
1611
a794a150a89b
a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents:
1610
diff
changeset
|
280 def _logVerifyBanner(self, boundLhs, workingSet: ReadOnlyWorkingSet): |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
281 log.debug(f'{INDENT*4}/ verify all bindings against this boundLhs:') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
282 for stmt in sorted(boundLhs): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
283 log.debug(f'{INDENT*4}|{INDENT} {stmt}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
284 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
285 # log.debug(f'{INDENT*4}| and against this workingSet:') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
286 # for stmt in sorted(workingSet): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
287 # log.debug(f'{INDENT*4}|{INDENT} {stmt}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
288 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
289 log.debug(f'{INDENT*4}\\') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
290 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
291 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
292 @dataclass |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
293 class Rule: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
294 lhsGraph: Graph |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
295 rhsGraph: Graph |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
296 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
297 def __post_init__(self): |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
298 self.lhs = Lhs(self.lhsGraph) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
299 |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
300 def applyRule(self, workingSet: Graph, implied: Graph, stats: Dict): |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
301 for bound in self.lhs.findCandidateBindings(ReadOnlyGraphAggregate([workingSet]), stats): |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
302 log.debug(f'{INDENT*3} rule has a working binding:') |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
303 |
1616
3a6ed545357f
optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents:
1615
diff
changeset
|
304 for lhsBoundStmt in bound.binding.apply(bound.lhsStmtsWithoutEvals()): |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
305 log.debug(f'{INDENT*4} adding {lhsBoundStmt=}') |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
306 workingSet.add(lhsBoundStmt) |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
307 for newStmt in bound.binding.apply(self.rhsGraph): |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
308 log.debug(f'{INDENT*4} adding {newStmt=}') |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
309 workingSet.add(newStmt) |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
310 implied.add(newStmt) |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
311 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
312 |
1587 | 313 class Inference: |
314 | |
315 def __init__(self) -> None: | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
316 self.rules = [] |
1587 | 317 |
318 def setRules(self, g: ConjunctiveGraph): | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
319 self.rules: List[Rule] = [] |
1599
abbf0eb0e640
fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents:
1598
diff
changeset
|
320 for stmt in g: |
abbf0eb0e640
fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents:
1598
diff
changeset
|
321 if stmt[1] == LOG['implies']: |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
322 self.rules.append(Rule(stmt[0], stmt[2])) |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
323 # other stmts should go to a default working set? |
1587 | 324 |
1601 | 325 @INFER_CALLS.time() |
1587 | 326 def infer(self, graph: Graph): |
327 """ | |
328 returns new graph of inferred statements. | |
329 """ | |
1597 | 330 log.info(f'{INDENT*0} Begin inference of graph len={graph.__len__()} with rules len={len(self.rules)}:') |
1601 | 331 startTime = time.time() |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
332 stats: Dict[str, Union[int, float]] = defaultdict(lambda: 0) |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
333 # everything that is true: the input graph, plus every rule conclusion we can make |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
334 workingSet = Graph() |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
335 workingSet += graph |
1587 | 336 |
1594 | 337 # just the statements that came from RHS's of rules that fired. |
1587 | 338 implied = ConjunctiveGraph() |
339 | |
340 bailout_iterations = 100 | |
341 delta = 1 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
342 stats['initWorkingSet'] = cast(int, workingSet.__len__()) |
1587 | 343 while delta > 0 and bailout_iterations > 0: |
1618
48bf62008c82
attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents:
1616
diff
changeset
|
344 log.debug('') |
1597 | 345 log.info(f'{INDENT*1}*iteration ({bailout_iterations} left)') |
1587 | 346 bailout_iterations -= 1 |
347 delta = -len(implied) | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
348 self._iterateAllRules(workingSet, implied, stats) |
1587 | 349 delta += len(implied) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
350 stats['iterations'] += 1 |
1597 | 351 log.info(f'{INDENT*2} this inference iteration added {delta} more implied stmts') |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
352 stats['timeSpent'] = round(time.time() - startTime, 3) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
353 stats['impliedStmts'] = len(implied) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
354 log.info(f'{INDENT*0} Inference done {dict(stats)}. Implied:') |
1587 | 355 for st in implied: |
1597 | 356 log.info(f'{INDENT*1} {st}') |
1587 | 357 return implied |
358 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
359 def _iterateAllRules(self, workingSet: Graph, implied: Graph, stats): |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
360 for i, rule in enumerate(self.rules): |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
361 self._logRuleApplicationHeader(workingSet, i, rule) |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
362 rule.applyRule(workingSet, implied, stats) |
1587 | 363 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
364 def _logRuleApplicationHeader(self, workingSet, i, r: Rule): |
1594 | 365 if not log.isEnabledFor(logging.DEBUG): |
366 return | |
367 | |
368 log.debug('') | |
369 log.debug(f'{INDENT*2} workingSet:') | |
1597 | 370 for j, stmt in enumerate(sorted(workingSet)): |
371 log.debug(f'{INDENT*3} ({j}) {stmt}') | |
1594 | 372 |
373 log.debug('') | |
374 log.debug(f'{INDENT*2}-applying rule {i}') | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
375 log.debug(f'{INDENT*3} rule def lhs: {graphDump(r.lhsGraph)}') |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
376 log.debug(f'{INDENT*3} rule def rhs: {graphDump(r.rhsGraph)}') |
1594 | 377 |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
378 |
1590
327202020892
WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents:
1589
diff
changeset
|
379 def graphDump(g: Union[Graph, List[Triple]]): |
327202020892
WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents:
1589
diff
changeset
|
380 if not isinstance(g, Graph): |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
381 log.warning(f"it's a {type(g)}") |
1590
327202020892
WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents:
1589
diff
changeset
|
382 g2 = Graph() |
1594 | 383 g2 += g |
1590
327202020892
WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents:
1589
diff
changeset
|
384 g = g2 |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
385 g.bind('', ROOM) |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
386 g.bind('ex', Namespace('http://example.com/')) |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
387 lines = cast(bytes, g.serialize(format='n3')).decode('utf8').splitlines() |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
388 lines = [line.strip() for line in lines if not line.startswith('@prefix')] |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
389 return ' '.join(lines) |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
390 |
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
391 |
1600 | 392 def _organize(candidateTermMatches: Dict[BindableTerm, Set[Node]]) -> Tuple[List[BindableTerm], List[List[Node]]]: |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
393 items = list(candidateTermMatches.items()) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
394 items.sort() |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
395 orderedVars: List[BindableTerm] = [] |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
396 orderedValueSets: List[List[Node]] = [] |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
397 for v, vals in items: |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
398 orderedVars.append(v) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
399 orderedValues: List[Node] = list(vals) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
400 orderedValues.sort(key=str) |
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
401 orderedValueSets.append(orderedValues) |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
402 |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
403 return orderedVars, orderedValueSets |