comparison service/mqtt_to_rdf/inference.py @ 1622:38bd8ef9ef67

add CandidateTermMatches, unused so far
author drewp@bigasterisk.com
date Wed, 08 Sep 2021 18:53:26 -0700
parents da235054caa0
children cf901d219007
comparison
equal deleted inserted replaced
1621:da235054caa0 1622:38bd8ef9ef67
5 import itertools 5 import itertools
6 import logging 6 import logging
7 import time 7 import time
8 from collections import defaultdict 8 from collections import defaultdict
9 from dataclasses import dataclass, field 9 from dataclasses import dataclass, field
10 from typing import Dict, Iterator, List, Set, Tuple, Union, cast 10 from typing import Dict, Iterator, List, Optional, Set, Tuple, Union, cast
11 11
12 from prometheus_client import Summary 12 from prometheus_client import Summary
13 from rdflib import BNode, Graph, Namespace, URIRef 13 from rdflib import BNode, Graph, Namespace, URIRef
14 from rdflib.graph import ConjunctiveGraph, ReadOnlyGraphAggregate 14 from rdflib.graph import ConjunctiveGraph, ReadOnlyGraphAggregate
15 from rdflib.term import Node, Variable 15 from rdflib.term import Node, Variable
162 log.debug(f'{INDENT*3} resulting candidate terms:') 162 log.debug(f'{INDENT*3} resulting candidate terms:')
163 for v, vals in zip(orderedVars, orderedValueSets): 163 for v, vals in zip(orderedVars, orderedValueSets):
164 log.debug(f'{INDENT*4} {v!r} could be:') 164 log.debug(f'{INDENT*4} {v!r} could be:')
165 for val in vals: 165 for val in vals:
166 log.debug(f'{INDENT*5}{val!r}') 166 log.debug(f'{INDENT*5}{val!r}')
167
168
169 @dataclass
170 class CandidateTermMatches:
171 """lazily find the possible matches for this term"""
172 term: BindableTerm
173 lhs: Lhs
174 workingSet: Graph
175 boundSoFar: CandidateBinding
176
177 def __post_init__(self):
178 self.results: List[Node] = [] # we have to be able to repeat the results
179
180 res: Set[Node] = set()
181 for trueStmt in self.workingSet: # all bound
182 lStmts = list(self.lhsStmtsContainingTerm())
183 log.debug(f'{INDENT*4} {trueStmt=} {len(lStmts)}')
184 for pat in self.boundSoFar.apply(lStmts, returnBoundStatementsOnly=False):
185 log.debug(f'{INDENT*4} {pat=}')
186 implied = self._stmtImplies(pat, trueStmt)
187 if implied is not None:
188 res.add(implied)
189 self.results = list(res)
190 # self.results.sort()
191
192 log.debug(f'{INDENT*3} CandTermMatches: {self.term} {graphDump(self.lhs.graph)} {self.boundSoFar=} ===> {self.results=}')
193
194 def _stmtImplies(self, pat: Triple, trueStmt: Triple) -> Optional[Node]:
195 """what value, if any, do we learn for our term from this LHS pattern statement and this known-true stmt"""
196 r = None
197 for p, t in zip(pat, trueStmt):
198 if isinstance(p, (Variable, BNode)):
199 if p != self.term:
200 # stmt is unbound in more than just our term
201 continue # unsure what to do - err on the side of too many bindings, since they get rechecked later
202 if r is None:
203 r = t
204 log.debug(f'{INDENT*4} implied term value {p=} {t=}')
205 elif r != t:
206 # (?x c ?x) matched with (a b c) doesn't work
207 return None
208 return r
209
210 def lhsStmtsContainingTerm(self):
211 # lhs could precompute this
212 for lhsStmt in self.lhs.graph:
213 if self.term in lhsStmt:
214 yield lhsStmt
215
216 def __iter__(self):
217 return iter(self.results)
167 218
168 219
169 @dataclass 220 @dataclass
170 class BoundLhs: 221 class BoundLhs:
171 lhs: Lhs 222 lhs: Lhs