comparison service/mqtt_to_rdf/stmt_chunk.py @ 1660:31f7dab6a60b

function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
author drewp@bigasterisk.com
date Sun, 19 Sep 2021 15:39:37 -0700
parents 15e84c71beee
children 00a5624d1d14
comparison
equal deleted inserted replaced
1659:15e84c71beee 1660:31f7dab6a60b
13 13
14 log = logging.getLogger('infer') 14 log = logging.getLogger('infer')
15 15
16 INDENT = ' ' 16 INDENT = ' '
17 17
18 ChunkPrimaryTriple = Tuple[Optional[Node], Node, Optional[Node]]
19
18 20
19 @dataclass 21 @dataclass
20 class Chunk: # rename this 22 class Chunk: # rename this
21 """a statement, maybe with variables in it, except *the object can be an rdf list*. 23 """a statement, maybe with variables in it, except *the object can be an rdf list*.
22 This is done to optimize list comparisons (a lot) at the very minor expense of not 24 This is done to optimize list comparisons (a lot) at the very minor expense of not
25 Also the subject could be a list, e.g. for (?x ?y) math:sum ?z . 27 Also the subject could be a list, e.g. for (?x ?y) math:sum ?z .
26 28
27 Also a function call in a rule is always contained in exactly one chunk. 29 Also a function call in a rule is always contained in exactly one chunk.
28 """ 30 """
29 # all immutable 31 # all immutable
30 primary: Tuple[Optional[Node], Node, Optional[Node]] 32 primary: ChunkPrimaryTriple
31 subjList: Optional[List[Node]] = None 33 subjList: Optional[List[Node]] = None
32 objList: Optional[List[Node]] = None 34 objList: Optional[List[Node]] = None
33 35
34 def __post_init__(self): 36 def __post_init__(self):
35 if not (((self.primary[0] is not None) ^ (self.subjList is not None)) and 37 if not (((self.primary[0] is not None) ^ (self.subjList is not None)) and
42 return hash(self.sortKey) 44 return hash(self.sortKey)
43 45
44 def __gt__(self, other): 46 def __gt__(self, other):
45 return self.sortKey > other.sortKey 47 return self.sortKey > other.sortKey
46 48
49 def _allTerms(self) -> Iterator[Node]:
50 """the terms in `primary` plus the lists. Output order is undefined but stable between same-sized Chunks"""
51 yield self.primary[1]
52 if self.primary[0] is not None:
53 yield self.primary[0]
54 else:
55 yield from cast(List[Node], self.subjList)
56 if self.primary[2] is not None:
57 yield self.primary[2]
58 else:
59 yield from cast(List[Node], self.objList)
47 60
48 def totalBindingIfThisStmtWereTrue(self, prevBindings: CandidateBinding, proposed: 'Chunk') -> CandidateBinding: 61 def totalBindingIfThisStmtWereTrue(self, prevBindings: CandidateBinding, proposed: 'Chunk') -> CandidateBinding:
49 outBinding = prevBindings.copy() 62 outBinding = prevBindings.copy()
50 for rt, ct in zip(self.primary, proposed.primary): 63 for rt, ct in zip(self._allTerms(), proposed._allTerms()):
51 if isinstance(rt, (Variable, BNode)): 64 if isinstance(rt, (Variable, BNode)):
52 if outBinding.contains(rt) and outBinding.applyTerm(rt) != ct: 65 if outBinding.contains(rt) and outBinding.applyTerm(rt) != ct:
53 msg = f'{rt=} {ct=} {outBinding=}' if log.isEnabledFor(logging.DEBUG) else '' 66 msg = f'{rt=} {ct=} {outBinding=}' if log.isEnabledFor(logging.DEBUG) else ''
54 raise Inconsistent(msg) 67 raise Inconsistent(msg)
55 outBinding.addNewBindings(CandidateBinding({rt: ct})) 68 outBinding.addNewBindings(CandidateBinding({rt: ct}))
60 out: List['Chunk'] = [] 73 out: List['Chunk'] = []
61 log.debug(f'{INDENT*6} {self}.myMatches({g}') 74 log.debug(f'{INDENT*6} {self}.myMatches({g}')
62 for ch in g.allChunks(): 75 for ch in g.allChunks():
63 if self.matches(ch): 76 if self.matches(ch):
64 out.append(ch) 77 out.append(ch)
65 #out.sort() # probably leftover- remove?
66 return out 78 return out
67 79
68 # could combine this and totalBindingIf into a single ChunkMatch object 80 # could combine this and totalBindingIf into a single ChunkMatch object
69 def matches(self, other: 'Chunk') -> bool: 81 def matches(self, other: 'Chunk') -> bool:
70 """does this Chunk with potential BindableTerm wildcards match other?""" 82 """does this Chunk with potential BindableTerm wildcards match other?"""
71 for selfTerm, otherTerm in zip(self.primary, other.primary): 83 for selfTerm, otherTerm in zip(self._allTerms(), other._allTerms()):
72 if not isinstance(selfTerm, (Variable, BNode)) and selfTerm != otherTerm: 84 if not isinstance(selfTerm, (Variable, BNode)) and selfTerm != otherTerm:
73 return False 85 return False
74 return True 86 return True
75 87
76 def __repr__(self): 88 def __repr__(self):
80 92
81 def isFunctionCall(self, functionsFor) -> bool: 93 def isFunctionCall(self, functionsFor) -> bool:
82 return bool(list(functionsFor(cast(URIRef, self.predicate)))) 94 return bool(list(functionsFor(cast(URIRef, self.predicate))))
83 95
84 def isStatic(self) -> bool: 96 def isStatic(self) -> bool:
85 return (_stmtIsStatic(self.primary) and all(_termIsStatic(s) for s in (self.subjList or [])) and 97 return all(_termIsStatic(s) for s in self._allTerms())
86 all(_termIsStatic(s) for s in (self.objList or []))) 98
99 def apply(self, cb: CandidateBinding, returnBoundStatementsOnly=True) -> 'Chunk':
100 """Chunk like this one but with cb substitutions applied. If the flag is
101 True, we raise BindingUnknown instead of leaving a term unbound"""
102 fn = lambda t: cb.applyTerm(t, returnBoundStatementsOnly)
103 return Chunk(
104 (
105 fn(self.primary[0]) if self.primary[0] is not None else None, #
106 fn(self.primary[1]), #
107 fn(self.primary[2]) if self.primary[2] is not None else None),
108 subjList=[fn(t) for t in self.subjList] if self.subjList else None,
109 objList=[fn(t) for t in self.objList] if self.objList else None,
110 )
87 111
88 112
89 def _stmtIsStatic(stmt: Triple) -> bool: 113 def _termIsStatic(term: Optional[Node]) -> bool:
90 return all(_termIsStatic(t) for t in stmt)
91
92
93 def _termIsStatic(term: Node) -> bool:
94 return isinstance(term, (URIRef, Literal)) or term is None 114 return isinstance(term, (URIRef, Literal)) or term is None
95 115
96 116
97 def applyChunky(cb: CandidateBinding, g: Iterable[Chunk], returnBoundStatementsOnly=True) -> Iterator[Chunk]: 117 def applyChunky(cb: CandidateBinding, g: Iterable[Chunk], returnBoundStatementsOnly=True) -> Iterator[Chunk]:
98 for stmt in g: 118 for chunk in g:
99 try: 119 try:
100 bound = Chunk( 120 bound = chunk.apply(cb, returnBoundStatementsOnly=returnBoundStatementsOnly)
101 (
102 cb.applyTerm(stmt.primary[0], returnBoundStatementsOnly), #
103 cb.applyTerm(stmt.primary[1], returnBoundStatementsOnly), #
104 cb.applyTerm(stmt.primary[2], returnBoundStatementsOnly)),
105 subjList=None,
106 objList=None)
107 except BindingUnknown: 121 except BindingUnknown:
108 log.debug(f'{INDENT*7} CB.apply cant bind {stmt} using {cb.binding}') 122 log.debug(f'{INDENT*7} CB.apply cant bind {chunk} using {cb.binding}')
109 123
110 continue 124 continue
111 log.debug(f'{INDENT*7} CB.apply took {stmt} to {bound}') 125 log.debug(f'{INDENT*7} CB.apply took {chunk} to {bound}')
112 126
113 yield bound 127 yield bound
114 128
115 129
116 class ChunkedGraph: 130 class ChunkedGraph:
176 return f'ChunkedGraph({self.__dict__})' 190 return f'ChunkedGraph({self.__dict__})'
177 191
178 def allChunks(self) -> Iterable[Chunk]: 192 def allChunks(self) -> Iterable[Chunk]:
179 yield from itertools.chain(self.staticChunks, self.patternChunks, self.chunksUsedByFuncs) 193 yield from itertools.chain(self.staticChunks, self.patternChunks, self.chunksUsedByFuncs)
180 194
181 def value(self, subj, pred) -> Node: # throwaway
182 for s in self.allChunks():
183 s = s.primary
184 if (s[0], s[1]) == (subj, pred):
185 return s[2]
186 raise ValueError("value not found")
187
188 def __contains__(self, ch: Chunk) -> bool: 195 def __contains__(self, ch: Chunk) -> bool:
189 return ch in self.allChunks() 196 return ch in self.allChunks()