Mercurial > code > home > repos > homeauto
annotate service/mqtt_to_rdf/stmt_chunk.py @ 1660:31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
author | drewp@bigasterisk.com |
---|---|
date | Sun, 19 Sep 2021 15:39:37 -0700 |
parents | 15e84c71beee |
children | 00a5624d1d14 |
rev | line source |
---|---|
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
1 import itertools |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
2 import logging |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
3 from dataclasses import dataclass |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
4 from typing import Iterable, Iterator, List, Optional, Set, Tuple, cast |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
5 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
6 from rdflib.graph import Graph |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
7 from rdflib.namespace import RDF |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
8 from rdflib.term import BNode, Literal, Node, URIRef, Variable |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
9 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
10 from candidate_binding import CandidateBinding |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
11 from inference_types import BindingUnknown, Inconsistent, Triple |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
12 from rdf_debug import graphDump |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
13 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
14 log = logging.getLogger('infer') |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
15 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
16 INDENT = ' ' |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
17 |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
18 ChunkPrimaryTriple = Tuple[Optional[Node], Node, Optional[Node]] |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
19 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
20 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
21 @dataclass |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
22 class Chunk: # rename this |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
23 """a statement, maybe with variables in it, except *the object can be an rdf list*. |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
24 This is done to optimize list comparisons (a lot) at the very minor expense of not |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
25 handling certain exotic cases, such as a branching list. |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
26 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
27 Also the subject could be a list, e.g. for (?x ?y) math:sum ?z . |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
28 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
29 Also a function call in a rule is always contained in exactly one chunk. |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
30 """ |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
31 # all immutable |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
32 primary: ChunkPrimaryTriple |
1653 | 33 subjList: Optional[List[Node]] = None |
34 objList: Optional[List[Node]] = None | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
35 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
36 def __post_init__(self): |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
37 if not (((self.primary[0] is not None) ^ (self.subjList is not None)) and |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
38 ((self.primary[2] is not None) ^ (self.objList is not None))): |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
39 raise TypeError("invalid chunk init") |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
40 self.predicate = self.primary[1] |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
41 self.sortKey = (self.primary, tuple(self.subjList or []), tuple(self.objList or [])) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
42 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
43 def __hash__(self): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
44 return hash(self.sortKey) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
45 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
46 def __gt__(self, other): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
47 return self.sortKey > other.sortKey |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
48 |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
49 def _allTerms(self) -> Iterator[Node]: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
50 """the terms in `primary` plus the lists. Output order is undefined but stable between same-sized Chunks""" |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
51 yield self.primary[1] |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
52 if self.primary[0] is not None: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
53 yield self.primary[0] |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
54 else: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
55 yield from cast(List[Node], self.subjList) |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
56 if self.primary[2] is not None: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
57 yield self.primary[2] |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
58 else: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
59 yield from cast(List[Node], self.objList) |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
60 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
61 def totalBindingIfThisStmtWereTrue(self, prevBindings: CandidateBinding, proposed: 'Chunk') -> CandidateBinding: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
62 outBinding = prevBindings.copy() |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
63 for rt, ct in zip(self._allTerms(), proposed._allTerms()): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
64 if isinstance(rt, (Variable, BNode)): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
65 if outBinding.contains(rt) and outBinding.applyTerm(rt) != ct: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
66 msg = f'{rt=} {ct=} {outBinding=}' if log.isEnabledFor(logging.DEBUG) else '' |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
67 raise Inconsistent(msg) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
68 outBinding.addNewBindings(CandidateBinding({rt: ct})) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
69 return outBinding |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
70 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
71 def myMatches(self, g: 'ChunkedGraph') -> List['Chunk']: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
72 """Chunks from g where self, which may have BindableTerm wildcards, could match that chunk in g.""" |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
73 out: List['Chunk'] = [] |
1652 | 74 log.debug(f'{INDENT*6} {self}.myMatches({g}') |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
75 for ch in g.allChunks(): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
76 if self.matches(ch): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
77 out.append(ch) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
78 return out |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
79 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
80 # could combine this and totalBindingIf into a single ChunkMatch object |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
81 def matches(self, other: 'Chunk') -> bool: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
82 """does this Chunk with potential BindableTerm wildcards match other?""" |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
83 for selfTerm, otherTerm in zip(self._allTerms(), other._allTerms()): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
84 if not isinstance(selfTerm, (Variable, BNode)) and selfTerm != otherTerm: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
85 return False |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
86 return True |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
87 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
88 def __repr__(self): |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
89 pre = ('+'.join('%s' % elem for elem in self.subjList) + '+' if self.subjList else '') |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
90 post = ('+' + '+'.join('%s' % elem for elem in self.objList) if self.objList else '') |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
91 return pre + repr(self.primary) + post |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
92 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
93 def isFunctionCall(self, functionsFor) -> bool: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
94 return bool(list(functionsFor(cast(URIRef, self.predicate)))) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
95 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
96 def isStatic(self) -> bool: |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
97 return all(_termIsStatic(s) for s in self._allTerms()) |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
98 |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
99 def apply(self, cb: CandidateBinding, returnBoundStatementsOnly=True) -> 'Chunk': |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
100 """Chunk like this one but with cb substitutions applied. If the flag is |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
101 True, we raise BindingUnknown instead of leaving a term unbound""" |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
102 fn = lambda t: cb.applyTerm(t, returnBoundStatementsOnly) |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
103 return Chunk( |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
104 ( |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
105 fn(self.primary[0]) if self.primary[0] is not None else None, # |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
106 fn(self.primary[1]), # |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
107 fn(self.primary[2]) if self.primary[2] is not None else None), |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
108 subjList=[fn(t) for t in self.subjList] if self.subjList else None, |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
109 objList=[fn(t) for t in self.objList] if self.objList else None, |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
110 ) |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
111 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
112 |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
113 def _termIsStatic(term: Optional[Node]) -> bool: |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
114 return isinstance(term, (URIRef, Literal)) or term is None |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
115 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
116 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
117 def applyChunky(cb: CandidateBinding, g: Iterable[Chunk], returnBoundStatementsOnly=True) -> Iterator[Chunk]: |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
118 for chunk in g: |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
119 try: |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
120 bound = chunk.apply(cb, returnBoundStatementsOnly=returnBoundStatementsOnly) |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
121 except BindingUnknown: |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
122 log.debug(f'{INDENT*7} CB.apply cant bind {chunk} using {cb.binding}') |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
123 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
124 continue |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
125 log.debug(f'{INDENT*7} CB.apply took {chunk} to {bound}') |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
126 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
127 yield bound |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
128 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
129 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
130 class ChunkedGraph: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
131 """a Graph converts 1-to-1 with a ChunkedGraph, where the Chunks have |
1652 | 132 combined some statements together. (The only exception is that bnodes for |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
133 rdf lists are lost)""" |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
134 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
135 def __init__( |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
136 self, |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
137 graph: Graph, |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
138 functionsFor # get rid of this- i'm just working around a circular import |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
139 ): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
140 self.chunksUsedByFuncs: Set[Chunk] = set() |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
141 self.staticChunks: Set[Chunk] = set() |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
142 self.patternChunks: Set[Chunk] = set() |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
143 |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
144 firstNodes = {} |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
145 restNodes = {} |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
146 graphStmts = set() |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
147 for s, p, o in graph: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
148 if p == RDF['first']: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
149 firstNodes[s] = o |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
150 elif p == RDF['rest']: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
151 restNodes[s] = o |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
152 else: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
153 graphStmts.add((s, p, o)) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
154 |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
155 def gatherList(start): |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
156 lst = [] |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
157 cur = start |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
158 while cur != RDF['nil']: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
159 lst.append(firstNodes[cur]) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
160 cur = restNodes[cur] |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
161 return lst |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
162 |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
163 for s, p, o in graphStmts: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
164 subjList = objList = None |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
165 if s in firstNodes: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
166 subjList = gatherList(s) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
167 s = None |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
168 if o in firstNodes: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
169 objList = gatherList(o) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
170 o = None |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
171 c = Chunk((s, p, o), subjList=subjList, objList=objList) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
172 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
173 if c.isFunctionCall(functionsFor): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
174 self.chunksUsedByFuncs.add(c) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
175 elif c.isStatic(): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
176 self.staticChunks.add(c) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
177 else: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
178 self.patternChunks.add(c) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
179 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
180 def allPredicatesExceptFunctions(self) -> Set[Node]: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
181 return set(ch.predicate for ch in itertools.chain(self.staticChunks, self.patternChunks)) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
182 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
183 def noPredicatesAppear(self, preds: Iterable[Node]) -> bool: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
184 return self.allPredicatesExceptFunctions().isdisjoint(preds) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
185 |
1654
d47832373b34
__nonzero__ is called __bool__ in py3! thanks for nothing, linters
drewp@bigasterisk.com
parents:
1653
diff
changeset
|
186 def __bool__(self): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
187 return bool(self.chunksUsedByFuncs) or bool(self.staticChunks) or bool(self.patternChunks) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
188 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
189 def __repr__(self): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
190 return f'ChunkedGraph({self.__dict__})' |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
191 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
192 def allChunks(self) -> Iterable[Chunk]: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
193 yield from itertools.chain(self.staticChunks, self.patternChunks, self.chunksUsedByFuncs) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
194 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
195 def __contains__(self, ch: Chunk) -> bool: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
196 return ch in self.allChunks() |