Mercurial > code > home > repos > homeauto
annotate service/mqtt_to_rdf/stmt_chunk.py @ 1661:00a5624d1d14
cleanups and optimizations
author | drewp@bigasterisk.com |
---|---|
date | Sun, 19 Sep 2021 16:51:51 -0700 |
parents | 31f7dab6a60b |
children | 1a7c1261302c |
rev | line source |
---|---|
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
1 import itertools |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
2 import logging |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
3 from dataclasses import dataclass |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
4 from typing import Iterable, Iterator, List, Optional, Set, Tuple, cast |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
5 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
6 from rdflib.graph import Graph |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
7 from rdflib.namespace import RDF |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
8 from rdflib.term import BNode, Literal, Node, URIRef, Variable |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
9 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
10 from candidate_binding import CandidateBinding |
1661 | 11 from inference_types import BindingUnknown, Inconsistent |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
12 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
13 log = logging.getLogger('infer') |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
14 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
15 INDENT = ' ' |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
16 |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
17 ChunkPrimaryTriple = Tuple[Optional[Node], Node, Optional[Node]] |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
18 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
19 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
20 @dataclass |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
21 class Chunk: # rename this |
1661 | 22 """A statement, maybe with variables in it, except *the subject or object |
23 can be rdf lists*. This is done to optimize list comparisons (a lot) at the | |
24 very minor expense of not handling certain exotic cases, such as a branching | |
25 list. | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
26 |
1661 | 27 Example: (?x ?y) math:sum ?z . <-- this becomes one Chunk. |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
28 |
1661 | 29 A function call in a rule is always contained in exactly one chunk. |
30 | |
31 https://www.w3.org/TeamSubmission/n3/#:~:text=Implementations%20may%20treat%20list%20as%20a%20data%20type | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
32 """ |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
33 # all immutable |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
34 primary: ChunkPrimaryTriple |
1653 | 35 subjList: Optional[List[Node]] = None |
36 objList: Optional[List[Node]] = None | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
37 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
38 def __post_init__(self): |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
39 if not (((self.primary[0] is not None) ^ (self.subjList is not None)) and |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
40 ((self.primary[2] is not None) ^ (self.objList is not None))): |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
41 raise TypeError("invalid chunk init") |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
42 self.predicate = self.primary[1] |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
43 self.sortKey = (self.primary, tuple(self.subjList or []), tuple(self.objList or [])) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
44 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
45 def __hash__(self): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
46 return hash(self.sortKey) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
47 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
48 def __gt__(self, other): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
49 return self.sortKey > other.sortKey |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
50 |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
51 def _allTerms(self) -> Iterator[Node]: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
52 """the terms in `primary` plus the lists. Output order is undefined but stable between same-sized Chunks""" |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
53 yield self.primary[1] |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
54 if self.primary[0] is not None: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
55 yield self.primary[0] |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
56 else: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
57 yield from cast(List[Node], self.subjList) |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
58 if self.primary[2] is not None: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
59 yield self.primary[2] |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
60 else: |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
61 yield from cast(List[Node], self.objList) |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
62 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
63 def totalBindingIfThisStmtWereTrue(self, prevBindings: CandidateBinding, proposed: 'Chunk') -> CandidateBinding: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
64 outBinding = prevBindings.copy() |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
65 for rt, ct in zip(self._allTerms(), proposed._allTerms()): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
66 if isinstance(rt, (Variable, BNode)): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
67 if outBinding.contains(rt) and outBinding.applyTerm(rt) != ct: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
68 msg = f'{rt=} {ct=} {outBinding=}' if log.isEnabledFor(logging.DEBUG) else '' |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
69 raise Inconsistent(msg) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
70 outBinding.addNewBindings(CandidateBinding({rt: ct})) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
71 return outBinding |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
72 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
73 def myMatches(self, g: 'ChunkedGraph') -> List['Chunk']: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
74 """Chunks from g where self, which may have BindableTerm wildcards, could match that chunk in g.""" |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
75 out: List['Chunk'] = [] |
1661 | 76 if log.isEnabledFor(logging.DEBUG): |
77 log.debug(f'{INDENT*6} {self}.myMatches({g}') | |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
78 for ch in g.allChunks(): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
79 if self.matches(ch): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
80 out.append(ch) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
81 return out |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
82 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
83 # could combine this and totalBindingIf into a single ChunkMatch object |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
84 def matches(self, other: 'Chunk') -> bool: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
85 """does this Chunk with potential BindableTerm wildcards match other?""" |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
86 for selfTerm, otherTerm in zip(self._allTerms(), other._allTerms()): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
87 if not isinstance(selfTerm, (Variable, BNode)) and selfTerm != otherTerm: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
88 return False |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
89 return True |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
90 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
91 def __repr__(self): |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
92 pre = ('+'.join('%s' % elem for elem in self.subjList) + '+' if self.subjList else '') |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
93 post = ('+' + '+'.join('%s' % elem for elem in self.objList) if self.objList else '') |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
94 return pre + repr(self.primary) + post |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
95 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
96 def isFunctionCall(self, functionsFor) -> bool: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
97 return bool(list(functionsFor(cast(URIRef, self.predicate)))) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
98 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
99 def isStatic(self) -> bool: |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
100 return all(_termIsStatic(s) for s in self._allTerms()) |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
101 |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
102 def apply(self, cb: CandidateBinding, returnBoundStatementsOnly=True) -> 'Chunk': |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
103 """Chunk like this one but with cb substitutions applied. If the flag is |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
104 True, we raise BindingUnknown instead of leaving a term unbound""" |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
105 fn = lambda t: cb.applyTerm(t, returnBoundStatementsOnly) |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
106 return Chunk( |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
107 ( |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
108 fn(self.primary[0]) if self.primary[0] is not None else None, # |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
109 fn(self.primary[1]), # |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
110 fn(self.primary[2]) if self.primary[2] is not None else None), |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
111 subjList=[fn(t) for t in self.subjList] if self.subjList else None, |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
112 objList=[fn(t) for t in self.objList] if self.objList else None, |
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
113 ) |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
114 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
115 |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
116 def _termIsStatic(term: Optional[Node]) -> bool: |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
117 return isinstance(term, (URIRef, Literal)) or term is None |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
118 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
119 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
120 def applyChunky(cb: CandidateBinding, g: Iterable[Chunk], returnBoundStatementsOnly=True) -> Iterator[Chunk]: |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
121 for chunk in g: |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
122 try: |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
123 bound = chunk.apply(cb, returnBoundStatementsOnly=returnBoundStatementsOnly) |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
124 except BindingUnknown: |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
125 log.debug(f'{INDENT*7} CB.apply cant bind {chunk} using {cb.binding}') |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
126 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
127 continue |
1660
31f7dab6a60b
function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents:
1659
diff
changeset
|
128 log.debug(f'{INDENT*7} CB.apply took {chunk} to {bound}') |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
129 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
130 yield bound |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
131 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
132 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
133 class ChunkedGraph: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
134 """a Graph converts 1-to-1 with a ChunkedGraph, where the Chunks have |
1652 | 135 combined some statements together. (The only exception is that bnodes for |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
136 rdf lists are lost)""" |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
137 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
138 def __init__( |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
139 self, |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
140 graph: Graph, |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
141 functionsFor # get rid of this- i'm just working around a circular import |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
142 ): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
143 self.chunksUsedByFuncs: Set[Chunk] = set() |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
144 self.staticChunks: Set[Chunk] = set() |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
145 self.patternChunks: Set[Chunk] = set() |
1659
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
146 |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
147 firstNodes = {} |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
148 restNodes = {} |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
149 graphStmts = set() |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
150 for s, p, o in graph: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
151 if p == RDF['first']: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
152 firstNodes[s] = o |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
153 elif p == RDF['rest']: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
154 restNodes[s] = o |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
155 else: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
156 graphStmts.add((s, p, o)) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
157 |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
158 def gatherList(start): |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
159 lst = [] |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
160 cur = start |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
161 while cur != RDF['nil']: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
162 lst.append(firstNodes[cur]) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
163 cur = restNodes[cur] |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
164 return lst |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
165 |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
166 for s, p, o in graphStmts: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
167 subjList = objList = None |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
168 if s in firstNodes: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
169 subjList = gatherList(s) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
170 s = None |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
171 if o in firstNodes: |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
172 objList = gatherList(o) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
173 o = None |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
174 c = Chunk((s, p, o), subjList=subjList, objList=objList) |
15e84c71beee
parse lists from graph into the Chunks
drewp@bigasterisk.com
parents:
1654
diff
changeset
|
175 |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
176 if c.isFunctionCall(functionsFor): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
177 self.chunksUsedByFuncs.add(c) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
178 elif c.isStatic(): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
179 self.staticChunks.add(c) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
180 else: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
181 self.patternChunks.add(c) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
182 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
183 def allPredicatesExceptFunctions(self) -> Set[Node]: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
184 return set(ch.predicate for ch in itertools.chain(self.staticChunks, self.patternChunks)) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
185 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
186 def noPredicatesAppear(self, preds: Iterable[Node]) -> bool: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
187 return self.allPredicatesExceptFunctions().isdisjoint(preds) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
188 |
1654
d47832373b34
__nonzero__ is called __bool__ in py3! thanks for nothing, linters
drewp@bigasterisk.com
parents:
1653
diff
changeset
|
189 def __bool__(self): |
1651
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
190 return bool(self.chunksUsedByFuncs) or bool(self.staticChunks) or bool(self.patternChunks) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
191 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
192 def __repr__(self): |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
193 return f'ChunkedGraph({self.__dict__})' |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
194 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
195 def allChunks(self) -> Iterable[Chunk]: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
196 yield from itertools.chain(self.staticChunks, self.patternChunks, self.chunksUsedByFuncs) |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
197 |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
198 def __contains__(self, ch: Chunk) -> bool: |
20474ad4968e
WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents:
diff
changeset
|
199 return ch in self.allChunks() |