Mercurial > code > home > repos > homeauto
annotate service/mqtt_to_rdf/inference.py @ 1656:d8e5b2232474
bugfix for simple (no arg lists) functions not evaluating
author | drewp@bigasterisk.com |
---|---|
date | Sun, 19 Sep 2021 13:22:29 -0700 |
parents | e7d594c065d4 |
children | 00a5624d1d14 |
rev | line source |
---|---|
1587 | 1 """ |
2 copied from reasoning 2021-08-29. probably same api. should | |
3 be able to lib/ this out | |
4 """ | |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
5 import itertools |
1587 | 6 import logging |
1601 | 7 import time |
1594 | 8 from collections import defaultdict |
1626 | 9 from dataclasses import dataclass |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
10 from typing import Dict, Iterator, List, Optional, Sequence, Tuple, Union, cast |
1588
0757fafbfdab
WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents:
1587
diff
changeset
|
11 |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
12 from prometheus_client import Histogram, Summary |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
13 from rdflib import RDF, BNode, Graph, Namespace |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
14 from rdflib.graph import ConjunctiveGraph |
1640 | 15 from rdflib.term import Node, URIRef, Variable |
1587 | 16 |
1638
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
17 from candidate_binding import BindingConflict, CandidateBinding |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
18 from inference_types import BindingUnknown, Inconsistent, Triple |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
19 from lhs_evaluation import functionsFor |
1650
2061df259224
move graphDump (on its way out, since reprs are getting better)
drewp@bigasterisk.com
parents:
1649
diff
changeset
|
20 from rdf_debug import graphDump |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
21 from stmt_chunk import Chunk, ChunkedGraph, applyChunky |
1605 | 22 |
1587 | 23 log = logging.getLogger('infer') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
24 INDENT = ' ' |
1587 | 25 |
1626 | 26 INFER_CALLS = Summary('inference_infer_calls', 'calls') |
27 INFER_GRAPH_SIZE = Histogram('inference_graph_size', 'statements', buckets=[2**x for x in range(2, 20, 2)]) | |
1587 | 28 |
29 ROOM = Namespace("http://projects.bigasterisk.com/room/") | |
30 LOG = Namespace('http://www.w3.org/2000/10/swap/log#') | |
31 MATH = Namespace('http://www.w3.org/2000/10/swap/math#') | |
32 | |
33 | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
34 class NoOptions(ValueError): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
35 """ChunkLooper has no possibilites to add to the binding; the whole rule must therefore not apply""" |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
36 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
37 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
38 _chunkLooperShortId = itertools.count() |
1632 | 39 |
40 | |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
41 @dataclass |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
42 class ChunkLooper: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
43 """given one LHS Chunk, iterate through the possible matches for it, |
1632 | 44 returning what bindings they would imply. Only distinct bindings are |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
45 returned. The bindings build on any `prev` ChunkLooper's results. |
1632 | 46 |
47 This iterator is restartable.""" | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
48 lhsChunk: Chunk |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
49 prev: Optional['ChunkLooper'] |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
50 workingSet: 'ChunkedGraph' |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
51 parent: 'Lhs' # just for lhs.graph, really |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
52 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
53 def __repr__(self): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
54 return f'{self.__class__.__name__}{self._shortId}{"<pastEnd>" if self.pastEnd() else ""}' |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
55 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
56 def __post_init__(self): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
57 self._shortId = next(_chunkLooperShortId) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
58 self._myWorkingSetMatches = self.lhsChunk.myMatches(self.workingSet) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
59 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
60 self._current = CandidateBinding({}) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
61 self._pastEnd = False |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
62 self._seenBindings: List[CandidateBinding] = [] |
1648 | 63 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
64 log.debug(f'{INDENT*6} introducing {self!r}({self.lhsChunk}, {self._myWorkingSetMatches=})') |
1648 | 65 |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
66 self.restart() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
67 |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
68 def _prevBindings(self) -> CandidateBinding: |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
69 if not self.prev or self.prev.pastEnd(): |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
70 return CandidateBinding({}) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
71 |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
72 return self.prev.currentBinding() |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
73 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
74 def advance(self): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
75 """update to a new set of bindings we haven't seen (since last restart), or go into pastEnd mode""" |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
76 if self._pastEnd: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
77 raise NotImplementedError('need restart') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
78 log.debug('') |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
79 augmentedWorkingSet: Sequence[Chunk] = [] |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
80 if self.prev is None: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
81 augmentedWorkingSet = self._myWorkingSetMatches |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
82 else: |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
83 augmentedWorkingSet = list( |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
84 applyChunky(self.prev.currentBinding(), self._myWorkingSetMatches, returnBoundStatementsOnly=False)) |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
85 |
1652 | 86 log.debug(f'{INDENT*6} --> {self}.advance has {augmentedWorkingSet=} {self._current=}') |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
87 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
88 if self._advanceWithPlainMatches(augmentedWorkingSet): |
1652 | 89 log.debug(f'{INDENT*6} <-- {self}.advance finished with plain matches') |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
90 return |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
91 |
1639 | 92 if self._advanceWithFunctions(): |
1652 | 93 log.debug(f'{INDENT*6} <-- {self}.advance finished with function matches') |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
94 return |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
95 |
1652 | 96 log.debug(f'{INDENT*6} <-- {self}.advance had nothing and is now past end') |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
97 self._pastEnd = True |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
98 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
99 def _advanceWithPlainMatches(self, augmentedWorkingSet: Sequence[Chunk]) -> bool: |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
100 log.debug(f'{INDENT*7} {self} mines {len(augmentedWorkingSet)} matching augmented statements') |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
101 for s in augmentedWorkingSet: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
102 log.debug(f'{INDENT*7} {s}') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
103 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
104 for chunk in augmentedWorkingSet: |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
105 try: |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
106 outBinding = self.lhsChunk.totalBindingIfThisStmtWereTrue(self._prevBindings(), chunk) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
107 except Inconsistent: |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
108 log.debug(f'{INDENT*7} ChunkLooper{self._shortId} - {chunk} would be inconsistent with prev bindings') |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
109 continue |
1632 | 110 |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
111 log.debug(f'{INDENT*7} {outBinding=} {self._seenBindings=}') |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
112 if outBinding not in self._seenBindings: |
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
113 self._seenBindings.append(outBinding.copy()) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
114 self._current = outBinding |
1632 | 115 log.debug(f'{INDENT*7} new binding from {self} -> {outBinding}') |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
116 return True |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
117 return False |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
118 |
1639 | 119 def _advanceWithFunctions(self) -> bool: |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
120 pred: Node = self.lhsChunk.predicate |
1640 | 121 if not isinstance(pred, URIRef): |
122 raise NotImplementedError | |
1632 | 123 |
1652 | 124 log.debug(f'{INDENT*6} advanceWithFunctions {pred}') |
125 | |
1637 | 126 for functionType in functionsFor(pred): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
127 fn = functionType(self.lhsChunk, self.parent.graph) |
1652 | 128 log.debug(f'{INDENT*7} ChunkLooper{self._shortId} advanceWithFunctions, {functionType=}') |
129 | |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
130 try: |
1652 | 131 |
1637 | 132 out = fn.bind(self._prevBindings()) |
133 except BindingUnknown: | |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
134 pass |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
135 else: |
1637 | 136 if out is not None: |
137 binding: CandidateBinding = self._prevBindings().copy() | |
138 binding.addNewBindings(out) | |
139 if binding not in self._seenBindings: | |
140 self._seenBindings.append(binding) | |
141 self._current = binding | |
142 log.debug(f'{INDENT*7} new binding from {self} -> {binding}') | |
143 return True | |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
144 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
145 return False |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
146 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
147 def _boundOperands(self, operands) -> List[Node]: |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
148 pb: CandidateBinding = self._prevBindings() |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
149 |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
150 boundOperands: List[Node] = [] |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
151 for op in operands: |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
152 if isinstance(op, (Variable, BNode)): |
1635
22d481f0a924
refactor: use CandidateBinding throughout, not loose dicts
drewp@bigasterisk.com
parents:
1634
diff
changeset
|
153 boundOperands.append(pb.applyTerm(op)) |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
154 else: |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
155 boundOperands.append(op) |
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
156 return boundOperands |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
157 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
158 def currentBinding(self) -> CandidateBinding: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
159 if self.pastEnd(): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
160 raise NotImplementedError() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
161 return self._current |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
162 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
163 def pastEnd(self) -> bool: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
164 return self._pastEnd |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
165 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
166 def restart(self): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
167 self._pastEnd = False |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
168 self._seenBindings = [] |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
169 self.advance() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
170 if self.pastEnd(): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
171 raise NoOptions() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
172 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
173 |
1594 | 174 @dataclass |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
175 class Lhs: |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
176 graph: ChunkedGraph # our full LHS graph, as input. See below for the statements partitioned into groups. |
1594 | 177 |
178 def __post_init__(self): | |
1648 | 179 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
180 self.myPreds = self.graph.allPredicatesExceptFunctions() |
1602 | 181 |
1609
34f2817320cc
new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents:
1608
diff
changeset
|
182 def __repr__(self): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
183 return f"Lhs({self.graph!r})" |
1609
34f2817320cc
new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents:
1608
diff
changeset
|
184 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
185 def findCandidateBindings(self, knownTrue: ChunkedGraph, stats, ruleStatementsIterationLimit) -> Iterator['BoundLhs']: |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
186 """bindings that fit the LHS of a rule, using statements from workingSet and functions |
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
187 from LHS""" |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
188 if not self.graph: |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
189 # special case- no LHS! |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
190 yield BoundLhs(self, CandidateBinding({})) |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
191 return |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
192 |
1640 | 193 if self._checkPredicateCounts(knownTrue): |
194 stats['_checkPredicateCountsCulls'] += 1 | |
195 return | |
196 | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
197 if not all(ch in knownTrue for ch in self.graph.staticChunks): |
1648 | 198 stats['staticStmtCulls'] += 1 |
199 return | |
200 | |
1656
d8e5b2232474
bugfix for simple (no arg lists) functions not evaluating
drewp@bigasterisk.com
parents:
1653
diff
changeset
|
201 if not self.graph.patternChunks and not self.graph.chunksUsedByFuncs: |
1648 | 202 # static only |
203 yield BoundLhs(self, CandidateBinding({})) | |
204 return | |
205 | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
206 log.debug(f'{INDENT*4} build new ChunkLooper stack') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
207 |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
208 try: |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
209 chunkStack = self._assembleRings(knownTrue, stats) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
210 except NoOptions: |
1632 | 211 log.debug(f'{INDENT*5} start up with no options; 0 bindings') |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
212 return |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
213 self._debugChunkStack('initial odometer', chunkStack) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
214 self._assertAllRingsAreValid(chunkStack) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
215 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
216 lastRing = chunkStack[-1] |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
217 iterCount = 0 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
218 while True: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
219 iterCount += 1 |
1648 | 220 if iterCount > ruleStatementsIterationLimit: |
221 raise ValueError('rule too complex') | |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
222 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
223 log.debug(f'{INDENT*4} vv findCandBindings iteration {iterCount}') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
224 |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
225 yield BoundLhs(self, lastRing.currentBinding()) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
226 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
227 self._debugChunkStack('odometer', chunkStack) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
228 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
229 done = self._advanceAll(chunkStack) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
230 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
231 self._debugChunkStack(f'odometer after ({done=})', chunkStack) |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
232 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
233 log.debug(f'{INDENT*4} ^^ findCandBindings iteration done') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
234 if done: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
235 break |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
236 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
237 def _debugChunkStack(self, label: str, chunkStack: List[ChunkLooper]): |
1632 | 238 log.debug(f'{INDENT*5} {label}:') |
1652 | 239 for i, l in enumerate(chunkStack): |
240 log.debug(f'{INDENT*6} [{i}] {l} curbind={l.currentBinding() if not l.pastEnd() else "<end>"}') | |
1632 | 241 |
1640 | 242 def _checkPredicateCounts(self, knownTrue): |
243 """raise NoOptions quickly in some cases""" | |
1648 | 244 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
245 if self.graph.noPredicatesAppear(self.myPreds): |
1652 | 246 log.debug(f'{INDENT*3} checkPredicateCounts does cull because not all {self.myPreds=} are in knownTrue') |
1640 | 247 return True |
1652 | 248 log.debug(f'{INDENT*3} checkPredicateCounts does not cull because all {self.myPreds=} are in knownTrue') |
1640 | 249 return False |
250 | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
251 def _assembleRings(self, knownTrue: ChunkedGraph, stats) -> List[ChunkLooper]: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
252 """make ChunkLooper for each stmt in our LHS graph, but do it in a way that they all |
1652 | 253 start out valid (or else raise NoOptions). static chunks have already been confirmed.""" |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
254 |
1653 | 255 log.info(f' {INDENT*4} stats={dict(stats)}') |
256 chunks = self.graph.patternChunks.union(self.graph.chunksUsedByFuncs) | |
257 log.info(f' {INDENT*4} taking permutations of {len(chunks)=}') | |
258 for i, perm in enumerate(itertools.permutations(chunks)): | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
259 stmtStack: List[ChunkLooper] = [] |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
260 prev: Optional[ChunkLooper] = None |
1648 | 261 if log.isEnabledFor(logging.DEBUG): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
262 log.debug(f'{INDENT*5} [perm {i}] try stmts in this order: {" -> ".join(repr(p) for p in perm)}') |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
263 |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
264 for s in perm: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
265 try: |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
266 elem = ChunkLooper(s, prev, knownTrue, parent=self) |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
267 except NoOptions: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
268 log.debug(f'{INDENT*6} permutation didnt work, try another') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
269 break |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
270 stmtStack.append(elem) |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
271 prev = stmtStack[-1] |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
272 else: |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
273 return stmtStack |
1648 | 274 if i > 5000: |
1652 | 275 raise NotImplementedError(f'trying too many permutations {len(chunks)=}') |
1648 | 276 |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
277 log.debug(f'{INDENT*6} no perms worked- rule cannot match anything') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
278 raise NoOptions() |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
279 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
280 def _advanceAll(self, stmtStack: List[ChunkLooper]) -> bool: |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
281 carry = True # 1st elem always must advance |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
282 for i, ring in enumerate(stmtStack): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
283 # unlike normal odometer, advancing any earlier ring could invalidate later ones |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
284 if carry: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
285 log.debug(f'{INDENT*5} advanceAll [{i}] {ring} carry/advance') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
286 ring.advance() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
287 carry = False |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
288 if ring.pastEnd(): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
289 if ring is stmtStack[-1]: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
290 log.debug(f'{INDENT*5} advanceAll [{i}] {ring} says we done') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
291 return True |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
292 log.debug(f'{INDENT*5} advanceAll [{i}] {ring} restart') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
293 ring.restart() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
294 carry = True |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
295 return False |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
296 |
1633
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
297 def _assertAllRingsAreValid(self, stmtStack): |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
298 if any(ring.pastEnd() for ring in stmtStack): # this is an unexpected debug assertion |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
299 log.debug(f'{INDENT*5} some rings started at pastEnd {stmtStack}') |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
300 raise NoOptions() |
6107603ed455
fix farenheit rule case, fix some others that depend on rings order, but this breaks some performance because of itertools.perm
drewp@bigasterisk.com
parents:
1632
diff
changeset
|
301 |
1592
d7b66234064b
pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents:
1591
diff
changeset
|
302 |
1622
38bd8ef9ef67
add CandidateTermMatches, unused so far
drewp@bigasterisk.com
parents:
1621
diff
changeset
|
303 @dataclass |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
304 class BoundLhs: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
305 lhs: Lhs |
1610 | 306 binding: CandidateBinding |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
307 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
308 |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
309 @dataclass |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
310 class Rule: |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
311 lhsGraph: Graph |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
312 rhsGraph: Graph |
1632 | 313 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
314 def __post_init__(self): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
315 self.lhs = Lhs(ChunkedGraph(self.lhsGraph, functionsFor)) |
1632 | 316 # |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
317 self.rhsBnodeMap = {} |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
318 |
1648 | 319 def applyRule(self, workingSet: Graph, implied: Graph, stats: Dict, ruleStatementsIterationLimit): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
320 # this does not change for the current applyRule call. The rule will be |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
321 # tried again in an outer loop, in case it can produce more. |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
322 workingSetChunked = ChunkedGraph(workingSet, functionsFor) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
323 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
1650
diff
changeset
|
324 for bound in self.lhs.findCandidateBindings(workingSetChunked, stats, ruleStatementsIterationLimit): |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
325 log.debug(f'{INDENT*5} +rule has a working binding: {bound}') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
326 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
327 # rhs could have more bnodes, and they just need to be distinct per rule-firing that we do |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
328 existingRhsBnodes = set() |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
329 for stmt in self.rhsGraph: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
330 for t in stmt: |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
331 if isinstance(t, BNode): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
332 existingRhsBnodes.add(t) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
333 # if existingRhsBnodes: |
1632 | 334 # log.debug(f'{INDENT*6} mapping rhs bnodes {existingRhsBnodes} to new ones') |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
335 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
336 for b in existingRhsBnodes: |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
337 |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
338 key = tuple(sorted(bound.binding.binding.items())), b |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
339 self.rhsBnodeMap.setdefault(key, BNode()) |
1638
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
340 try: |
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
341 bound.binding.addNewBindings(CandidateBinding({b: self.rhsBnodeMap[key]})) |
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
342 except BindingConflict: |
0ba1625037ae
don't crash, just skip the rule if there's a BindingConflict (no clear test case yet)
drewp@bigasterisk.com
parents:
1637
diff
changeset
|
343 continue |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
344 |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
345 # for lhsBoundStmt in bound.binding.apply(bound.lhsStmtsWithoutEvals()): |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
346 # log.debug(f'{INDENT*6} adding to workingSet {lhsBoundStmt=}') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
347 # workingSet.add(lhsBoundStmt) |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
348 # log.debug(f'{INDENT*6} rhsGraph is good: {list(self.rhsGraph)}') |
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
349 |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
350 for newStmt in bound.binding.apply(self.rhsGraph): |
1631
2c85a4f5dd9c
big rewrite of infer() using statements not variables as the things to iterate over
drewp@bigasterisk.com
parents:
1627
diff
changeset
|
351 # log.debug(f'{INDENT*6} adding {newStmt=}') |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
352 workingSet.add(newStmt) |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
353 implied.add(newStmt) |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
354 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
355 |
1648 | 356 @dataclass |
1587 | 357 class Inference: |
1648 | 358 rulesIterationLimit = 3 |
359 ruleStatementsIterationLimit = 3 | |
1587 | 360 |
361 def __init__(self) -> None: | |
1648 | 362 self.rules: List[Rule] = [] |
363 self._nonRuleStmts: List[Triple] = [] | |
1587 | 364 |
365 def setRules(self, g: ConjunctiveGraph): | |
1648 | 366 self.rules = [] |
367 self._nonRuleStmts = [] | |
1599
abbf0eb0e640
fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents:
1598
diff
changeset
|
368 for stmt in g: |
abbf0eb0e640
fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents:
1598
diff
changeset
|
369 if stmt[1] == LOG['implies']: |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
370 self.rules.append(Rule(stmt[0], stmt[2])) |
1649
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
371 else: |
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
372 self._nonRuleStmts.append(stmt) |
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
373 |
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
374 def nonRuleStatements(self) -> List[Triple]: |
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
375 return self._nonRuleStmts |
1587 | 376 |
1601 | 377 @INFER_CALLS.time() |
1587 | 378 def infer(self, graph: Graph): |
379 """ | |
380 returns new graph of inferred statements. | |
381 """ | |
1626 | 382 n = graph.__len__() |
383 INFER_GRAPH_SIZE.observe(n) | |
384 log.info(f'{INDENT*0} Begin inference of graph len={n} with rules len={len(self.rules)}:') | |
1601 | 385 startTime = time.time() |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
386 stats: Dict[str, Union[int, float]] = defaultdict(lambda: 0) |
1649
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
387 |
1589
5c1055be3c36
WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents:
1588
diff
changeset
|
388 # everything that is true: the input graph, plus every rule conclusion we can make |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
389 workingSet = Graph() |
1649
bb5d2b5370ac
add nonRuleStatments to Inference api. there's already a test in an eariler commit
drewp@bigasterisk.com
parents:
1648
diff
changeset
|
390 workingSet += self._nonRuleStmts |
1593
b0df43d5494c
big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents:
1592
diff
changeset
|
391 workingSet += graph |
1587 | 392 |
1594 | 393 # just the statements that came from RHS's of rules that fired. |
1587 | 394 implied = ConjunctiveGraph() |
395 | |
1648 | 396 rulesIterations = 0 |
1587 | 397 delta = 1 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
398 stats['initWorkingSet'] = cast(int, workingSet.__len__()) |
1648 | 399 while delta > 0 and rulesIterations <= self.rulesIterationLimit: |
1620 | 400 log.debug('') |
1648 | 401 log.info(f'{INDENT*1}*iteration {rulesIterations}') |
402 | |
1587 | 403 delta = -len(implied) |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
404 self._iterateAllRules(workingSet, implied, stats) |
1587 | 405 delta += len(implied) |
1648 | 406 rulesIterations += 1 |
1597 | 407 log.info(f'{INDENT*2} this inference iteration added {delta} more implied stmts') |
1648 | 408 stats['iterations'] = rulesIterations |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
409 stats['timeSpent'] = round(time.time() - startTime, 3) |
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
410 stats['impliedStmts'] = len(implied) |
1648 | 411 log.info(f'{INDENT*0} Inference done {dict(stats)}.') |
412 log.debug('Implied:') | |
413 log.debug(graphDump(implied)) | |
1587 | 414 return implied |
415 | |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
416 def _iterateAllRules(self, workingSet: Graph, implied: Graph, stats): |
1612
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
417 for i, rule in enumerate(self.rules): |
272f78d4671a
mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents:
1611
diff
changeset
|
418 self._logRuleApplicationHeader(workingSet, i, rule) |
1648 | 419 rule.applyRule(workingSet, implied, stats, self.ruleStatementsIterationLimit) |
1587 | 420 |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
421 def _logRuleApplicationHeader(self, workingSet, i, r: Rule): |
1594 | 422 if not log.isEnabledFor(logging.DEBUG): |
423 return | |
424 | |
425 log.debug('') | |
426 log.debug(f'{INDENT*2} workingSet:') | |
1648 | 427 # for j, stmt in enumerate(sorted(workingSet)): |
428 # log.debug(f'{INDENT*3} ({j}) {stmt}') | |
429 log.debug(f'{INDENT*3} {graphDump(workingSet, oneLine=False)}') | |
1594 | 430 |
431 log.debug('') | |
432 log.debug(f'{INDENT*2}-applying rule {i}') | |
1632 | 433 log.debug(f'{INDENT*3} rule def lhs:') |
1634
ba59cfc3c747
hack math:sum in there. Test suite is passing except some slow performers
drewp@bigasterisk.com
parents:
1633
diff
changeset
|
434 for stmt in sorted(r.lhsGraph, reverse=True): |
1632 | 435 log.debug(f'{INDENT*4} {stmt}') |
1607
b21885181e35
more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents:
1605
diff
changeset
|
436 log.debug(f'{INDENT*3} rule def rhs: {graphDump(r.rhsGraph)}') |