changeset 100:27dcc13f9958

new Multigraph over rdflib's ConjunctiveGraph
author drewp@bigasterisk.com
date Mon, 30 May 2022 20:31:15 -0700
parents 22f81cb04da4
children 05492457f04b
files rdfdb/multigraph.py
diffstat 1 files changed, 68 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rdfdb/multigraph.py	Mon May 30 20:31:15 2022 -0700
@@ -0,0 +1,68 @@
+import logging
+from typing import Iterable, Tuple, cast
+from rdfdb.rdflibpatch import fixContextToUri, inGraph
+
+from rdflib import ConjunctiveGraph, URIRef
+from rdflib.term import Node
+
+from rdfdb.patch import Patch
+
+Triple = Tuple[Node, Node, Node]
+log = logging.getLogger('multigraph')
+
+
+class Multigraph:
+    """mostly ConjunctiveGraph but I can optimize some patterns"""
+
+    def __init__(self):
+        self._g = ConjunctiveGraph()
+
+    def setSubgraph(self, uri: URIRef, triples: Iterable[Triple]):
+        # is this allowed, or do we need to use patch?
+        self._g.remove((None, None, None, uri))
+        for s, p, o in triples:
+            self._g.add((s, p, o, uri))
+
+    def getSubgraph(self, uri: URIRef) -> Iterable[Triple]:
+        for s, p, o, g in self._g.quads((None, None, None, uri)):
+            yield (s, p, o)
+
+    def patch(self, p: Patch, perfect=True):
+        toDelete = []
+        for spoc in p.delQuads:
+            spoc = fixContextToUri(spoc)
+
+            if perfect:
+                if inGraph(spoc, self._g):
+                    toDelete.append(spoc)
+                else:
+                    raise ValueError("%r not in %r" % (spoc[:3], spoc[3]))
+            else:
+                self._g.remove(spoc)
+        for spoc in toDelete:
+            self._g.remove(spoc)
+
+        addQuads = list(p.addQuads)
+        if perfect:
+            for spoc in addQuads:
+                spoc = fixContextToUri(spoc)
+                if inGraph(spoc, self._g):
+                    raise ValueError("%r already in %r" % (spoc[:3], spoc[3]))
+        self._g.addN(addQuads)
+
+    def __len__(self):
+        return len(self._g)
+
+    def summarizeToLog(self):
+        log.info("contexts in graph (%s total stmts):" % len(self._g))
+        for c in self._g.contexts():
+            ci = cast(URIRef, c.identifier)
+            g = self.getSubgraph(ci)
+            n = len(list(g))  # todo
+            log.info("  %s: %s statements" % (c.identifier, n))
+
+    def serialize(self, *a, **kw):
+        return self._g.serialize(*a, **kw)
+
+    def quads(self, *a, **kw):
+        return self._g.quads(*a, **kw)
\ No newline at end of file