annotate service/mqtt_to_rdf/inference.py @ 1618:48bf62008c82

attempted to rewrite with CandidateTermMatches but it broke
author drewp@bigasterisk.com
date Wed, 08 Sep 2021 18:32:11 -0700
parents 3a6ed545357f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
1 """
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
2 copied from reasoning 2021-08-29. probably same api. should
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
3 be able to lib/ this out
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
4 """
1588
0757fafbfdab WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents: 1587
diff changeset
5 import itertools
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
6 import logging
1601
30463df12d89 infer() dumps stats
drewp@bigasterisk.com
parents: 1600
diff changeset
7 import time
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
8 from collections import defaultdict
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
9 from dataclasses import dataclass, field
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
10 from typing import Dict, Iterator, List, Literal, Optional, Set, Tuple, Union, cast
1588
0757fafbfdab WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents: 1587
diff changeset
11
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
12 from prometheus_client import Summary
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
13 from rdflib import BNode, Graph, Namespace, URIRef
1589
5c1055be3c36 WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents: 1588
diff changeset
14 from rdflib.graph import ConjunctiveGraph, ReadOnlyGraphAggregate
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
15 from rdflib.term import Node, Variable
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
16
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
17 from candidate_binding import CandidateBinding
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
18 from inference_types import (BindableTerm, EvaluationFailed, ReadOnlyWorkingSet, Triple)
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
19 from lhs_evaluation import Evaluation
1605
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents: 1603
diff changeset
20
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
21 log = logging.getLogger('infer')
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
22 INDENT = ' '
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
23
1601
30463df12d89 infer() dumps stats
drewp@bigasterisk.com
parents: 1600
diff changeset
24 INFER_CALLS = Summary('read_rules_calls', 'calls')
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
25
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
26 ROOM = Namespace("http://projects.bigasterisk.com/room/")
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
27 LOG = Namespace('http://www.w3.org/2000/10/swap/log#')
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
28 MATH = Namespace('http://www.w3.org/2000/10/swap/math#')
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
29
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
30
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
31 @dataclass
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
32 class Lhs:
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
33 graph: Graph
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
34
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
35 def __post_init__(self):
1608
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
36 # do precomputation in here that's not specific to the workingSet
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
37 self.staticRuleStmts = Graph()
1611
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
38 self.nonStaticRuleStmts = Graph()
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
39
1608
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
40 self.lhsBindables: Set[BindableTerm] = set()
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
41 self.lhsBnodes: Set[BNode] = set()
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
42 for ruleStmt in self.graph:
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
43 varsAndBnodesInStmt = [term for term in ruleStmt if isinstance(term, (Variable, BNode))]
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
44 self.lhsBindables.update(varsAndBnodesInStmt)
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
45 self.lhsBnodes.update(x for x in varsAndBnodesInStmt if isinstance(x, BNode))
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
46 if not varsAndBnodesInStmt:
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
47 self.staticRuleStmts.add(ruleStmt)
1611
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
48 else:
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
49 self.nonStaticRuleStmts.add(ruleStmt)
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
50
1615
bcfa368e5498 change a Graph.__sub__ to Set.difference in verify() for a big speedup
drewp@bigasterisk.com
parents: 1614
diff changeset
51 self.nonStaticRuleStmtsSet = set(self.nonStaticRuleStmts)
bcfa368e5498 change a Graph.__sub__ to Set.difference in verify() for a big speedup
drewp@bigasterisk.com
parents: 1614
diff changeset
52
1602
e3c44ac6d3c5 do findEvals once at setRules time
drewp@bigasterisk.com
parents: 1601
diff changeset
53 self.evaluations = list(Evaluation.findEvals(self.graph))
e3c44ac6d3c5 do findEvals once at setRules time
drewp@bigasterisk.com
parents: 1601
diff changeset
54
1609
34f2817320cc new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents: 1608
diff changeset
55 def __repr__(self):
34f2817320cc new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents: 1608
diff changeset
56 return f"Lhs({graphDump(self.graph)})"
34f2817320cc new tests for a smaller part of the inner loop
drewp@bigasterisk.com
parents: 1608
diff changeset
57
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
58 def findCandidateBindings(self, knownTrue: ReadOnlyWorkingSet, stats) -> Iterator['BoundLhs']:
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
59 """bindings that fit the LHS of a rule, using statements from workingSet and functions
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
60 from LHS"""
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
61 log.debug(f'{INDENT*3} nodesToBind: {self.lhsBindables}')
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
62 stats['findCandidateBindingsCalls'] += 1
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
63
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
64 if not self._allStaticStatementsMatch(knownTrue):
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
65 stats['findCandidateBindingEarlyExits'] += 1
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
66 return
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
67
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
68 boundSoFar = CandidateBinding({})
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
69 for binding in self._possibleBindings(knownTrue, boundSoFar, stats):
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
70 log.debug('')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
71 log.debug(f'{INDENT*4}*trying {binding.binding}')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
72
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
73 if not binding.verify(knownTrue):
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
74 log.debug(f'{INDENT*4} this binding did not verify')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
75 stats['permCountFailingVerify'] += 1
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
76 continue
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
77
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
78 stats['permCountSucceeding'] += 1
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
79 yield binding
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
80 boundSoFar.addNewBindings(binding.binding)
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
81
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
82 def _allStaticStatementsMatch(self, knownTrue: ReadOnlyWorkingSet) -> bool:
1613
03ed8c9abd5b forget GRAPH_ID optimization in this case
drewp@bigasterisk.com
parents: 1612
diff changeset
83 # bug: see TestSelfFulfillingRule.test3 for a case where this rule's
03ed8c9abd5b forget GRAPH_ID optimization in this case
drewp@bigasterisk.com
parents: 1612
diff changeset
84 # static stmt is matched by a non-static stmt in the rule itself
1608
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
85 for ruleStmt in self.staticRuleStmts:
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
86 if ruleStmt not in knownTrue:
1608
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
87 log.debug(f'{INDENT*3} {ruleStmt} not in working set- skip rule')
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
88 return False
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
89 return True
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
90
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
91 def _possibleBindings(self, workingSet, boundSoFar, stats) -> Iterator['BoundLhs']:
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
92 """this yields at least the working bindings, and possibly others"""
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
93 for bindRow in self._product(workingSet, boundSoFar):
1614
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
94 try:
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
95 yield BoundLhs(self, bindRow)
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
96 except EvaluationFailed:
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
97 stats['permCountFailingEval'] += 1
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
98
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
99 def _product(self, workingSet, boundSoFar: CandidateBinding) -> Iterator[CandidateBinding]:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
100 orderedVars = []
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
101 for stmt in self.graph:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
102 for t in stmt:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
103 if isinstance(t, (Variable, BNode)):
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
104 orderedVars.append(t)
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
105 orderedVars = sorted(set(orderedVars))
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
106
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
107 orderedValueSets = []
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
108 for v in orderedVars:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
109 orderedValueSets.append(CandidateTermMatches(v, self, workingSet, boundSoFar).results)
1600
89a50242cb5e cleanup internal names, imports
drewp@bigasterisk.com
parents: 1599
diff changeset
110 self._logCandidates(orderedVars, orderedValueSets)
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
111 log.debug(f'{INDENT*3} trying all permutations:')
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
112 if not orderedVars:
1614
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
113 yield CandidateBinding({})
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
114 return
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
115
1614
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
116 if not orderedValueSets or not all(orderedValueSets):
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
117 # some var or bnode has no options at all
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
118 return
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
119 rings: List[Iterator[Node]] = [itertools.cycle(valSet) for valSet in orderedValueSets]
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
120 currentSet: List[Node] = [next(ring) for ring in rings]
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
121 starts = [valSet[-1] for valSet in orderedValueSets]
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
122 while True:
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
123 for col, curr in enumerate(currentSet):
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
124 currentSet[col] = next(rings[col])
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
125 log.debug(f'{INDENT*4} currentSet: {repr(currentSet)}')
1614
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
126 yield CandidateBinding(dict(zip(orderedVars, currentSet)))
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
127 if curr is not starts[col]:
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
128 break
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
129 if col == len(orderedValueSets) - 1:
97b2c3cfdb83 refactor: inline an odometer algorithm in place of itertools.product
drewp@bigasterisk.com
parents: 1613
diff changeset
130 return
1592
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
131
1600
89a50242cb5e cleanup internal names, imports
drewp@bigasterisk.com
parents: 1599
diff changeset
132 def _allCandidateTermMatches(self, workingSet: ReadOnlyWorkingSet) -> Dict[BindableTerm, Set[Node]]:
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
133 """the total set of terms each variable could possibly match"""
1592
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
134
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
135 candidateTermMatches: Dict[BindableTerm, Set[Node]] = defaultdict(set)
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
136 for lhsStmt in self.graph:
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
137 log.debug(f'{INDENT*4} possibles for this lhs stmt: {lhsStmt}')
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
138 for trueStmt in workingSet:
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
139 # log.debug(f'{INDENT*5} consider this true stmt ({i}): {trueStmt}')
1592
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
140
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
141 for v, vals in self._bindingsFromStatement(lhsStmt, trueStmt):
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
142 candidateTermMatches[v].update(vals)
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
143
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
144 # for trueStmt in itertools.chain(workingSet, self.graph):
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
145 # for b in self.lhsBnodes:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
146 # for t in [trueStmt[0], trueStmt[2]]:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
147 # if isinstance(t, (URIRef, BNode)):
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
148 # candidateTermMatches[b].add(t)
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
149 return candidateTermMatches
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
150
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
151 def _bindingsFromStatement(self, stmt1: Triple, stmt2: Triple) -> Iterator[Tuple[Variable, Set[Node]]]:
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
152 """if these stmts match otherwise, what BNode or Variable mappings do we learn?
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
153
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
154 e.g. stmt1=(?x B ?y) and stmt2=(A B C), then we yield (?x, {A}) and (?y, {C})
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
155 or stmt1=(_:x B C) and stmt2=(A B C), then we yield (_:x, {A})
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
156 or stmt1=(?x B C) and stmt2=(A B D), then we yield nothing
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
157 """
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
158 bindingsFromStatement = {}
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
159 for term1, term2 in zip(stmt1, stmt2):
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
160 if isinstance(term1, (BNode, Variable)):
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
161 bindingsFromStatement.setdefault(term1, set()).add(term2)
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
162 elif term1 != term2:
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
163 break
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
164 else:
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
165 for v, vals in bindingsFromStatement.items():
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
166 log.debug(f'{INDENT*5} {v=} {vals=}')
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
167 yield v, vals
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
168
1600
89a50242cb5e cleanup internal names, imports
drewp@bigasterisk.com
parents: 1599
diff changeset
169 def _logCandidates(self, orderedVars, orderedValueSets):
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
170 if not log.isEnabledFor(logging.DEBUG):
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
171 return
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
172 log.debug(f'{INDENT*3} resulting candidate terms:')
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
173 for v, vals in zip(orderedVars, orderedValueSets):
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
174 log.debug(f'{INDENT*4} {v!r} could be:')
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
175 for val in vals:
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
176 log.debug(f'{INDENT*5}{val!r}')
1592
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
177
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
178
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
179 @dataclass
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
180 class CandidateTermMatches:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
181 """lazily find the possible matches for this term"""
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
182 term: BindableTerm
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
183 lhs: Lhs
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
184 workingSet: Graph
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
185 boundSoFar: CandidateBinding
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
186
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
187 def __post_init__(self):
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
188 self.results: List[Node] = [] # we have to be able to repeat the results
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
189
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
190 res: Set[Node] = set()
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
191 for trueStmt in self.workingSet: # all bound
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
192 lStmts = list(self.lhsStmtsContainingTerm())
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
193 log.debug(f'{INDENT*4} {trueStmt=} {len(lStmts)}')
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
194 for pat in self.boundSoFar.apply(lStmts, returnBoundStatementsOnly=False):
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
195 log.debug(f'{INDENT*4} {pat=}')
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
196 implied = self._stmtImplies(pat, trueStmt)
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
197 if implied is not None:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
198 res.add(implied)
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
199 self.results = list(res)
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
200 # self.results.sort()
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
201
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
202 log.debug(f'{INDENT*3} CandTermMatches: {self.term} {graphDump(self.lhs.graph)} {self.boundSoFar=} ===> {self.results=}')
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
203
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
204 def _stmtImplies(self, pat: Triple, trueStmt: Triple) -> Optional[Node]:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
205 """what value, if any, do we learn for our term from this LHS pattern statement and this known-true stmt"""
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
206 r = None
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
207 for p, t in zip(pat, trueStmt):
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
208 if isinstance(p, (Variable, BNode)):
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
209 if p != self.term:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
210 # stmt is unbound in more than just our term
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
211 continue # unsure what to do - err on the side of too many bindings, since they get rechecked later
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
212 if r is None:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
213 r = t
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
214 log.debug(f'{INDENT*4} implied term value {p=} {t=}')
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
215 elif r != t:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
216 # (?x c ?x) matched with (a b c) doesn't work
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
217 return None
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
218 return r
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
219
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
220 def lhsStmtsContainingTerm(self):
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
221 # lhs could precompute this
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
222 for lhsStmt in self.lhs.graph:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
223 if self.term in lhsStmt:
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
224 yield lhsStmt
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
225
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
226 def __iter__(self):
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
227 return iter(self.results)
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
228
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
229
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
230 @dataclass
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
231 class BoundLhs:
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
232 lhs: Lhs
1610
6fc48ef4c696 mysteriously lost an important line
drewp@bigasterisk.com
parents: 1609
diff changeset
233 binding: CandidateBinding
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
234
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
235 def __post_init__(self):
1613
03ed8c9abd5b forget GRAPH_ID optimization in this case
drewp@bigasterisk.com
parents: 1612
diff changeset
236 self.usedByFuncs = Graph()
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
237 self._applyFunctions()
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
238
1616
3a6ed545357f optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents: 1615
diff changeset
239 def lhsStmtsWithoutEvals(self):
3a6ed545357f optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents: 1615
diff changeset
240 for stmt in self.lhs.graph:
3a6ed545357f optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents: 1615
diff changeset
241 if stmt in self.usedByFuncs:
3a6ed545357f optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents: 1615
diff changeset
242 continue
3a6ed545357f optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents: 1615
diff changeset
243 yield stmt
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
244
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
245 def _applyFunctions(self):
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
246 """may grow the binding with some results"""
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
247 while True:
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
248 delta = self._applyFunctionsIteration()
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
249 if delta == 0:
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
250 break
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
251
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
252 def _applyFunctionsIteration(self):
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
253 before = len(self.binding.binding)
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
254 delta = 0
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
255 for ev in self.lhs.evaluations:
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
256 newBindings, usedGraph = ev.resultBindings(self.binding)
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
257 self.usedByFuncs += usedGraph
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
258 self.binding.addNewBindings(newBindings)
1608
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
259 delta = len(self.binding.binding) - before
f928eb06a4f6 cleaning up inner loop
drewp@bigasterisk.com
parents: 1607
diff changeset
260 log.debug(f'{INDENT*4} eval rules made {delta} new bindings')
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
261 return delta
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
262
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
263 def verify(self, workingSet: ReadOnlyWorkingSet) -> bool:
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
264 """Can this bound lhs be true all at once in workingSet?"""
1615
bcfa368e5498 change a Graph.__sub__ to Set.difference in verify() for a big speedup
drewp@bigasterisk.com
parents: 1614
diff changeset
265 rem = cast(Set[Triple], self.lhs.nonStaticRuleStmtsSet.difference(self.usedByFuncs))
bcfa368e5498 change a Graph.__sub__ to Set.difference in verify() for a big speedup
drewp@bigasterisk.com
parents: 1614
diff changeset
266 boundLhs = self.binding.apply(rem)
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
267
1611
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
268 if log.isEnabledFor(logging.DEBUG):
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
269 boundLhs = list(boundLhs)
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
270 self._logVerifyBanner(boundLhs, workingSet)
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
271
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
272 for stmt in boundLhs:
1611
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
273 log.debug(f'{INDENT*4} check for %s', stmt)
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
274
1611
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
275 if stmt not in workingSet:
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
276 log.debug(f'{INDENT*5} stmt not known to be true')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
277 return False
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
278 return True
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
279
1611
a794a150a89b a little inner-loop performace. same number of iterations, but less time on logging and some stmt filtering
drewp@bigasterisk.com
parents: 1610
diff changeset
280 def _logVerifyBanner(self, boundLhs, workingSet: ReadOnlyWorkingSet):
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
281 log.debug(f'{INDENT*4}/ verify all bindings against this boundLhs:')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
282 for stmt in sorted(boundLhs):
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
283 log.debug(f'{INDENT*4}|{INDENT} {stmt}')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
284
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
285 # log.debug(f'{INDENT*4}| and against this workingSet:')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
286 # for stmt in sorted(workingSet):
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
287 # log.debug(f'{INDENT*4}|{INDENT} {stmt}')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
288
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
289 log.debug(f'{INDENT*4}\\')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
290
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
291
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
292 @dataclass
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
293 class Rule:
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
294 lhsGraph: Graph
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
295 rhsGraph: Graph
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
296
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
297 def __post_init__(self):
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
298 self.lhs = Lhs(self.lhsGraph)
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
299
1612
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
300 def applyRule(self, workingSet: Graph, implied: Graph, stats: Dict):
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
301 for bound in self.lhs.findCandidateBindings(ReadOnlyGraphAggregate([workingSet]), stats):
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
302 log.debug(f'{INDENT*3} rule has a working binding:')
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
303
1616
3a6ed545357f optimization: stream stmts instead of building a Graph
drewp@bigasterisk.com
parents: 1615
diff changeset
304 for lhsBoundStmt in bound.binding.apply(bound.lhsStmtsWithoutEvals()):
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
305 log.debug(f'{INDENT*4} adding {lhsBoundStmt=}')
1612
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
306 workingSet.add(lhsBoundStmt)
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
307 for newStmt in bound.binding.apply(self.rhsGraph):
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
308 log.debug(f'{INDENT*4} adding {newStmt=}')
1612
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
309 workingSet.add(newStmt)
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
310 implied.add(newStmt)
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
311
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
312
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
313 class Inference:
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
314
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
315 def __init__(self) -> None:
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
316 self.rules = []
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
317
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
318 def setRules(self, g: ConjunctiveGraph):
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
319 self.rules: List[Rule] = []
1599
abbf0eb0e640 fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents: 1598
diff changeset
320 for stmt in g:
abbf0eb0e640 fix a bug with a slightly moer complicated set of rules
drewp@bigasterisk.com
parents: 1598
diff changeset
321 if stmt[1] == LOG['implies']:
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
322 self.rules.append(Rule(stmt[0], stmt[2]))
1612
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
323 # other stmts should go to a default working set?
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
324
1601
30463df12d89 infer() dumps stats
drewp@bigasterisk.com
parents: 1600
diff changeset
325 @INFER_CALLS.time()
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
326 def infer(self, graph: Graph):
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
327 """
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
328 returns new graph of inferred statements.
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
329 """
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
330 log.info(f'{INDENT*0} Begin inference of graph len={graph.__len__()} with rules len={len(self.rules)}:')
1601
30463df12d89 infer() dumps stats
drewp@bigasterisk.com
parents: 1600
diff changeset
331 startTime = time.time()
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
332 stats: Dict[str, Union[int, float]] = defaultdict(lambda: 0)
1589
5c1055be3c36 WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents: 1588
diff changeset
333 # everything that is true: the input graph, plus every rule conclusion we can make
1593
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
334 workingSet = Graph()
b0df43d5494c big rewrite- more classes, smaller methods, more typesafe, all current tests passing
drewp@bigasterisk.com
parents: 1592
diff changeset
335 workingSet += graph
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
336
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
337 # just the statements that came from RHS's of rules that fired.
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
338 implied = ConjunctiveGraph()
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
339
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
340 bailout_iterations = 100
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
341 delta = 1
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
342 stats['initWorkingSet'] = cast(int, workingSet.__len__())
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
343 while delta > 0 and bailout_iterations > 0:
1618
48bf62008c82 attempted to rewrite with CandidateTermMatches but it broke
drewp@bigasterisk.com
parents: 1616
diff changeset
344 log.debug('')
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
345 log.info(f'{INDENT*1}*iteration ({bailout_iterations} left)')
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
346 bailout_iterations -= 1
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
347 delta = -len(implied)
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
348 self._iterateAllRules(workingSet, implied, stats)
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
349 delta += len(implied)
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
350 stats['iterations'] += 1
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
351 log.info(f'{INDENT*2} this inference iteration added {delta} more implied stmts')
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
352 stats['timeSpent'] = round(time.time() - startTime, 3)
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
353 stats['impliedStmts'] = len(implied)
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
354 log.info(f'{INDENT*0} Inference done {dict(stats)}. Implied:')
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
355 for st in implied:
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
356 log.info(f'{INDENT*1} {st}')
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
357 return implied
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
358
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
359 def _iterateAllRules(self, workingSet: Graph, implied: Graph, stats):
1612
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
360 for i, rule in enumerate(self.rules):
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
361 self._logRuleApplicationHeader(workingSet, i, rule)
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
362 rule.applyRule(workingSet, implied, stats)
1587
9a3a18c494f9 WIP new inferencer. no vars yet.
drewp@bigasterisk.com
parents:
diff changeset
363
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
364 def _logRuleApplicationHeader(self, workingSet, i, r: Rule):
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
365 if not log.isEnabledFor(logging.DEBUG):
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
366 return
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
367
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
368 log.debug('')
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
369 log.debug(f'{INDENT*2} workingSet:')
1597
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
370 for j, stmt in enumerate(sorted(workingSet)):
387a9cb66517 logging adjustments
drewp@bigasterisk.com
parents: 1596
diff changeset
371 log.debug(f'{INDENT*3} ({j}) {stmt}')
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
372
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
373 log.debug('')
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
374 log.debug(f'{INDENT*2}-applying rule {i}')
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
375 log.debug(f'{INDENT*3} rule def lhs: {graphDump(r.lhsGraph)}')
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
376 log.debug(f'{INDENT*3} rule def rhs: {graphDump(r.rhsGraph)}')
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
377
1588
0757fafbfdab WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents: 1587
diff changeset
378
1590
327202020892 WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents: 1589
diff changeset
379 def graphDump(g: Union[Graph, List[Triple]]):
327202020892 WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents: 1589
diff changeset
380 if not isinstance(g, Graph):
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
381 log.warning(f"it's a {type(g)}")
1590
327202020892 WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents: 1589
diff changeset
382 g2 = Graph()
1594
e58bcfa66093 cleanups and a few fixed cases
drewp@bigasterisk.com
parents: 1593
diff changeset
383 g2 += g
1590
327202020892 WIP inference- getting into more degenerate test cases
drewp@bigasterisk.com
parents: 1589
diff changeset
384 g = g2
1589
5c1055be3c36 WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents: 1588
diff changeset
385 g.bind('', ROOM)
5c1055be3c36 WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents: 1588
diff changeset
386 g.bind('ex', Namespace('http://example.com/'))
5c1055be3c36 WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents: 1588
diff changeset
387 lines = cast(bytes, g.serialize(format='n3')).decode('utf8').splitlines()
1612
272f78d4671a mark skipped tests. move applyRule into Rule. minor cleanups.
drewp@bigasterisk.com
parents: 1611
diff changeset
388 lines = [line.strip() for line in lines if not line.startswith('@prefix')]
1589
5c1055be3c36 WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents: 1588
diff changeset
389 return ' '.join(lines)
5c1055be3c36 WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents: 1588
diff changeset
390
5c1055be3c36 WIP more debugging, working towards bnode-matching support
drewp@bigasterisk.com
parents: 1588
diff changeset
391
1600
89a50242cb5e cleanup internal names, imports
drewp@bigasterisk.com
parents: 1599
diff changeset
392 def _organize(candidateTermMatches: Dict[BindableTerm, Set[Node]]) -> Tuple[List[BindableTerm], List[List[Node]]]:
1592
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
393 items = list(candidateTermMatches.items())
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
394 items.sort()
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
395 orderedVars: List[BindableTerm] = []
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
396 orderedValueSets: List[List[Node]] = []
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
397 for v, vals in items:
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
398 orderedVars.append(v)
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
399 orderedValues: List[Node] = list(vals)
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
400 orderedValues.sort(key=str)
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
401 orderedValueSets.append(orderedValues)
1588
0757fafbfdab WIP inferencer - partial var and function support
drewp@bigasterisk.com
parents: 1587
diff changeset
402
1592
d7b66234064b pure reordering of funcs to make the next diffs smaller
drewp@bigasterisk.com
parents: 1591
diff changeset
403 return orderedVars, orderedValueSets