Mercurial > code > home > repos > homeauto
comparison service/mqtt_to_rdf/stmt_chunk.py @ 1660:31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
author | drewp@bigasterisk.com |
---|---|
date | Sun, 19 Sep 2021 15:39:37 -0700 |
parents | 15e84c71beee |
children | 00a5624d1d14 |
comparison
equal
deleted
inserted
replaced
1659:15e84c71beee | 1660:31f7dab6a60b |
---|---|
13 | 13 |
14 log = logging.getLogger('infer') | 14 log = logging.getLogger('infer') |
15 | 15 |
16 INDENT = ' ' | 16 INDENT = ' ' |
17 | 17 |
18 ChunkPrimaryTriple = Tuple[Optional[Node], Node, Optional[Node]] | |
19 | |
18 | 20 |
19 @dataclass | 21 @dataclass |
20 class Chunk: # rename this | 22 class Chunk: # rename this |
21 """a statement, maybe with variables in it, except *the object can be an rdf list*. | 23 """a statement, maybe with variables in it, except *the object can be an rdf list*. |
22 This is done to optimize list comparisons (a lot) at the very minor expense of not | 24 This is done to optimize list comparisons (a lot) at the very minor expense of not |
25 Also the subject could be a list, e.g. for (?x ?y) math:sum ?z . | 27 Also the subject could be a list, e.g. for (?x ?y) math:sum ?z . |
26 | 28 |
27 Also a function call in a rule is always contained in exactly one chunk. | 29 Also a function call in a rule is always contained in exactly one chunk. |
28 """ | 30 """ |
29 # all immutable | 31 # all immutable |
30 primary: Tuple[Optional[Node], Node, Optional[Node]] | 32 primary: ChunkPrimaryTriple |
31 subjList: Optional[List[Node]] = None | 33 subjList: Optional[List[Node]] = None |
32 objList: Optional[List[Node]] = None | 34 objList: Optional[List[Node]] = None |
33 | 35 |
34 def __post_init__(self): | 36 def __post_init__(self): |
35 if not (((self.primary[0] is not None) ^ (self.subjList is not None)) and | 37 if not (((self.primary[0] is not None) ^ (self.subjList is not None)) and |
42 return hash(self.sortKey) | 44 return hash(self.sortKey) |
43 | 45 |
44 def __gt__(self, other): | 46 def __gt__(self, other): |
45 return self.sortKey > other.sortKey | 47 return self.sortKey > other.sortKey |
46 | 48 |
49 def _allTerms(self) -> Iterator[Node]: | |
50 """the terms in `primary` plus the lists. Output order is undefined but stable between same-sized Chunks""" | |
51 yield self.primary[1] | |
52 if self.primary[0] is not None: | |
53 yield self.primary[0] | |
54 else: | |
55 yield from cast(List[Node], self.subjList) | |
56 if self.primary[2] is not None: | |
57 yield self.primary[2] | |
58 else: | |
59 yield from cast(List[Node], self.objList) | |
47 | 60 |
48 def totalBindingIfThisStmtWereTrue(self, prevBindings: CandidateBinding, proposed: 'Chunk') -> CandidateBinding: | 61 def totalBindingIfThisStmtWereTrue(self, prevBindings: CandidateBinding, proposed: 'Chunk') -> CandidateBinding: |
49 outBinding = prevBindings.copy() | 62 outBinding = prevBindings.copy() |
50 for rt, ct in zip(self.primary, proposed.primary): | 63 for rt, ct in zip(self._allTerms(), proposed._allTerms()): |
51 if isinstance(rt, (Variable, BNode)): | 64 if isinstance(rt, (Variable, BNode)): |
52 if outBinding.contains(rt) and outBinding.applyTerm(rt) != ct: | 65 if outBinding.contains(rt) and outBinding.applyTerm(rt) != ct: |
53 msg = f'{rt=} {ct=} {outBinding=}' if log.isEnabledFor(logging.DEBUG) else '' | 66 msg = f'{rt=} {ct=} {outBinding=}' if log.isEnabledFor(logging.DEBUG) else '' |
54 raise Inconsistent(msg) | 67 raise Inconsistent(msg) |
55 outBinding.addNewBindings(CandidateBinding({rt: ct})) | 68 outBinding.addNewBindings(CandidateBinding({rt: ct})) |
60 out: List['Chunk'] = [] | 73 out: List['Chunk'] = [] |
61 log.debug(f'{INDENT*6} {self}.myMatches({g}') | 74 log.debug(f'{INDENT*6} {self}.myMatches({g}') |
62 for ch in g.allChunks(): | 75 for ch in g.allChunks(): |
63 if self.matches(ch): | 76 if self.matches(ch): |
64 out.append(ch) | 77 out.append(ch) |
65 #out.sort() # probably leftover- remove? | |
66 return out | 78 return out |
67 | 79 |
68 # could combine this and totalBindingIf into a single ChunkMatch object | 80 # could combine this and totalBindingIf into a single ChunkMatch object |
69 def matches(self, other: 'Chunk') -> bool: | 81 def matches(self, other: 'Chunk') -> bool: |
70 """does this Chunk with potential BindableTerm wildcards match other?""" | 82 """does this Chunk with potential BindableTerm wildcards match other?""" |
71 for selfTerm, otherTerm in zip(self.primary, other.primary): | 83 for selfTerm, otherTerm in zip(self._allTerms(), other._allTerms()): |
72 if not isinstance(selfTerm, (Variable, BNode)) and selfTerm != otherTerm: | 84 if not isinstance(selfTerm, (Variable, BNode)) and selfTerm != otherTerm: |
73 return False | 85 return False |
74 return True | 86 return True |
75 | 87 |
76 def __repr__(self): | 88 def __repr__(self): |
80 | 92 |
81 def isFunctionCall(self, functionsFor) -> bool: | 93 def isFunctionCall(self, functionsFor) -> bool: |
82 return bool(list(functionsFor(cast(URIRef, self.predicate)))) | 94 return bool(list(functionsFor(cast(URIRef, self.predicate)))) |
83 | 95 |
84 def isStatic(self) -> bool: | 96 def isStatic(self) -> bool: |
85 return (_stmtIsStatic(self.primary) and all(_termIsStatic(s) for s in (self.subjList or [])) and | 97 return all(_termIsStatic(s) for s in self._allTerms()) |
86 all(_termIsStatic(s) for s in (self.objList or []))) | 98 |
99 def apply(self, cb: CandidateBinding, returnBoundStatementsOnly=True) -> 'Chunk': | |
100 """Chunk like this one but with cb substitutions applied. If the flag is | |
101 True, we raise BindingUnknown instead of leaving a term unbound""" | |
102 fn = lambda t: cb.applyTerm(t, returnBoundStatementsOnly) | |
103 return Chunk( | |
104 ( | |
105 fn(self.primary[0]) if self.primary[0] is not None else None, # | |
106 fn(self.primary[1]), # | |
107 fn(self.primary[2]) if self.primary[2] is not None else None), | |
108 subjList=[fn(t) for t in self.subjList] if self.subjList else None, | |
109 objList=[fn(t) for t in self.objList] if self.objList else None, | |
110 ) | |
87 | 111 |
88 | 112 |
89 def _stmtIsStatic(stmt: Triple) -> bool: | 113 def _termIsStatic(term: Optional[Node]) -> bool: |
90 return all(_termIsStatic(t) for t in stmt) | |
91 | |
92 | |
93 def _termIsStatic(term: Node) -> bool: | |
94 return isinstance(term, (URIRef, Literal)) or term is None | 114 return isinstance(term, (URIRef, Literal)) or term is None |
95 | 115 |
96 | 116 |
97 def applyChunky(cb: CandidateBinding, g: Iterable[Chunk], returnBoundStatementsOnly=True) -> Iterator[Chunk]: | 117 def applyChunky(cb: CandidateBinding, g: Iterable[Chunk], returnBoundStatementsOnly=True) -> Iterator[Chunk]: |
98 for stmt in g: | 118 for chunk in g: |
99 try: | 119 try: |
100 bound = Chunk( | 120 bound = chunk.apply(cb, returnBoundStatementsOnly=returnBoundStatementsOnly) |
101 ( | |
102 cb.applyTerm(stmt.primary[0], returnBoundStatementsOnly), # | |
103 cb.applyTerm(stmt.primary[1], returnBoundStatementsOnly), # | |
104 cb.applyTerm(stmt.primary[2], returnBoundStatementsOnly)), | |
105 subjList=None, | |
106 objList=None) | |
107 except BindingUnknown: | 121 except BindingUnknown: |
108 log.debug(f'{INDENT*7} CB.apply cant bind {stmt} using {cb.binding}') | 122 log.debug(f'{INDENT*7} CB.apply cant bind {chunk} using {cb.binding}') |
109 | 123 |
110 continue | 124 continue |
111 log.debug(f'{INDENT*7} CB.apply took {stmt} to {bound}') | 125 log.debug(f'{INDENT*7} CB.apply took {chunk} to {bound}') |
112 | 126 |
113 yield bound | 127 yield bound |
114 | 128 |
115 | 129 |
116 class ChunkedGraph: | 130 class ChunkedGraph: |
176 return f'ChunkedGraph({self.__dict__})' | 190 return f'ChunkedGraph({self.__dict__})' |
177 | 191 |
178 def allChunks(self) -> Iterable[Chunk]: | 192 def allChunks(self) -> Iterable[Chunk]: |
179 yield from itertools.chain(self.staticChunks, self.patternChunks, self.chunksUsedByFuncs) | 193 yield from itertools.chain(self.staticChunks, self.patternChunks, self.chunksUsedByFuncs) |
180 | 194 |
181 def value(self, subj, pred) -> Node: # throwaway | |
182 for s in self.allChunks(): | |
183 s = s.primary | |
184 if (s[0], s[1]) == (subj, pred): | |
185 return s[2] | |
186 raise ValueError("value not found") | |
187 | |
188 def __contains__(self, ch: Chunk) -> bool: | 195 def __contains__(self, ch: Chunk) -> bool: |
189 return ch in self.allChunks() | 196 return ch in self.allChunks() |