annotate service/mqtt_to_rdf/lhs_evaluation.py @ 1660:31f7dab6a60b

function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
author drewp@bigasterisk.com
date Sun, 19 Sep 2021 15:39:37 -0700
parents 7ec2483d61b5
children 00a5624d1d14
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1605
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
1 import logging
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
2 from decimal import Decimal
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
3 from typing import (Dict, Iterator, List, Optional, Set, Tuple, Type, Union, cast)
1605
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
4
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
5 from prometheus_client import Summary
1636
3252bdc284bc rm dead code from previous tries
drewp@bigasterisk.com
parents: 1634
diff changeset
6 from rdflib import RDF, Literal, Namespace, URIRef
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
7 from rdflib.term import Node, Variable
1605
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
8
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
9 from candidate_binding import CandidateBinding
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
10 from inference_types import BindableTerm, Triple
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
11 from stmt_chunk import Chunk, ChunkedGraph
1607
b21885181e35 more modules, types. Maybe less repeated computation on BoundLhs
drewp@bigasterisk.com
parents: 1605
diff changeset
12
1605
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
13 log = logging.getLogger('infer')
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
14
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
15 INDENT = ' '
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
16
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
17 ROOM = Namespace("http://projects.bigasterisk.com/room/")
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
18 LOG = Namespace('http://www.w3.org/2000/10/swap/log#')
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
19 MATH = Namespace('http://www.w3.org/2000/10/swap/math#')
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
20
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
21
1658
7ec2483d61b5 refactor inference_functions
drewp@bigasterisk.com
parents: 1651
diff changeset
22 def _numericNode(n: Node):
1605
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
23 if not isinstance(n, Literal):
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
24 raise TypeError(f'expected Literal, got {n=}')
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
25 val = n.toPython()
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
26 if not isinstance(val, (int, float, Decimal)):
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
27 raise TypeError(f'expected number, got {val=}')
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
28 return val
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
29
449746d1598f WIP move evaluation to new file
drewp@bigasterisk.com
parents:
diff changeset
30
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
31 class Function:
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
32 """any rule stmt that runs a function (not just a statement match)"""
1640
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
33 pred: URIRef
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
34
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
35 def __init__(self, chunk: Chunk, ruleGraph: ChunkedGraph):
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
36 self.chunk = chunk
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
37 if chunk.predicate != self.pred:
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
38 raise TypeError
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
39 self.ruleGraph = ruleGraph
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
40
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
41 def getOperandNodes(self, existingBinding: CandidateBinding) -> List[Node]:
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
42 raise NotImplementedError
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
43
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
44 def getNumericOperands(self, existingBinding: CandidateBinding) -> List[Union[int, float, Decimal]]:
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
45 out = []
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
46 for op in self.getOperandNodes(existingBinding):
1658
7ec2483d61b5 refactor inference_functions
drewp@bigasterisk.com
parents: 1651
diff changeset
47 out.append(_numericNode(op))
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
48
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
49 return out
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
50
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
51 def bind(self, existingBinding: CandidateBinding) -> Optional[CandidateBinding]:
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
52 """either any new bindings this function makes (could be 0), or None if it doesn't match"""
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
53 raise NotImplementedError
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
54
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
55 def valueInObjectTerm(self, value: Node) -> Optional[CandidateBinding]:
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
56 objVar = self.chunk.primary[2]
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
57 if not isinstance(objVar, Variable):
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
58 raise TypeError(f'expected Variable, got {objVar!r}')
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
59 return CandidateBinding({cast(BindableTerm, objVar): value})
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
60
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
61
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
62 class SubjectFunction(Function):
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
63 """function that depends only on the subject term"""
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
64
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
65 def getOperandNodes(self, existingBinding: CandidateBinding) -> List[Node]:
1660
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
66 if self.chunk.primary[0] is None:
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
67 raise ValueError(f'expected one operand on {self.chunk}')
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
68 return [existingBinding.applyTerm(self.chunk.primary[0])]
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
69
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
70
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
71 class SubjectObjectFunction(Function):
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
72 """a filter function that depends on the subject and object terms"""
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
73
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
74 def getOperandNodes(self, existingBinding: CandidateBinding) -> List[Node]:
1660
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
75 if self.chunk.primary[0] is None or self.chunk.primary[2] is None:
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
76 raise ValueError(f'expected one operand on each side of {self.chunk}')
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
77 return [existingBinding.applyTerm(self.chunk.primary[0]), existingBinding.applyTerm(self.chunk.primary[2])]
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
78
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
79
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
80 class ListFunction(Function):
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
81 """function that takes an rdf list as input"""
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
82
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
83 def usedStatements(self) -> Set[Triple]:
1660
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
84 raise NotImplementedError
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
85 if self.chunk.subjist is None:
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
86 raise ValueError(f'expected subject list on {self.chunk}')
1658
7ec2483d61b5 refactor inference_functions
drewp@bigasterisk.com
parents: 1651
diff changeset
87 _, used = _parseList(self.ruleGraph, self.chunk.primary[0])
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
88 return used
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
89
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
90 def getOperandNodes(self, existingBinding: CandidateBinding) -> List[Node]:
1660
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
91 if self.chunk.subjList is None:
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
92 raise ValueError(f'expected subject list on {self.chunk}')
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
93 return [existingBinding.applyTerm(x) for x in self.chunk.subjList]
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
94
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
95
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
96 _registeredFunctionTypes: List[Type['Function']] = []
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
97
1660
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
98
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
99 def register(cls: Type['Function']):
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
100 _registeredFunctionTypes.append(cls)
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
101 return cls
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
102
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
103
31f7dab6a60b function evaluation uses Chunk lists now and runs fast. Only a few edge cases still broken
drewp@bigasterisk.com
parents: 1658
diff changeset
104 import inference_functions # calls register() on some classes
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
105
1658
7ec2483d61b5 refactor inference_functions
drewp@bigasterisk.com
parents: 1651
diff changeset
106 _byPred: Dict[URIRef, Type[Function]] = dict((cls.pred, cls) for cls in _registeredFunctionTypes)
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
107
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
108
1640
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
109 def functionsFor(pred: URIRef) -> Iterator[Type[Function]]:
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
110 try:
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
111 yield _byPred[pred]
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
112 except KeyError:
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
113 return
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
114
1637
ec3f98d0c1d8 refactor rules eval
drewp@bigasterisk.com
parents: 1636
diff changeset
115
1651
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
116 # def lhsStmtsUsedByFuncs(graph: ChunkedGraph) -> Set[Chunk]:
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
117 # usedByFuncs: Set[Triple] = set() # don't worry about matching these
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
118 # for s in graph:
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
119 # for cls in functionsFor(pred=s[1]):
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
120 # usedByFuncs.update(cls(s, graph).usedStatements())
20474ad4968e WIP - functions are broken as i move most layers to work in Chunks not Triples
drewp@bigasterisk.com
parents: 1648
diff changeset
121 # return usedByFuncs
1640
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
122
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
123
4bb6f593ebf3 speedups: abort some rules faster
drewp@bigasterisk.com
parents: 1637
diff changeset
124 def rulePredicates() -> Set[URIRef]:
1658
7ec2483d61b5 refactor inference_functions
drewp@bigasterisk.com
parents: 1651
diff changeset
125 return set(c.pred for c in _registeredFunctionTypes)