Mercurial > code > home > repos > rdfdb
changeset 100:27dcc13f9958
new Multigraph over rdflib's ConjunctiveGraph
author | drewp@bigasterisk.com |
---|---|
date | Mon, 30 May 2022 20:31:15 -0700 |
parents | 22f81cb04da4 |
children | 05492457f04b |
files | rdfdb/multigraph.py |
diffstat | 1 files changed, 68 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rdfdb/multigraph.py Mon May 30 20:31:15 2022 -0700 @@ -0,0 +1,68 @@ +import logging +from typing import Iterable, Tuple, cast +from rdfdb.rdflibpatch import fixContextToUri, inGraph + +from rdflib import ConjunctiveGraph, URIRef +from rdflib.term import Node + +from rdfdb.patch import Patch + +Triple = Tuple[Node, Node, Node] +log = logging.getLogger('multigraph') + + +class Multigraph: + """mostly ConjunctiveGraph but I can optimize some patterns""" + + def __init__(self): + self._g = ConjunctiveGraph() + + def setSubgraph(self, uri: URIRef, triples: Iterable[Triple]): + # is this allowed, or do we need to use patch? + self._g.remove((None, None, None, uri)) + for s, p, o in triples: + self._g.add((s, p, o, uri)) + + def getSubgraph(self, uri: URIRef) -> Iterable[Triple]: + for s, p, o, g in self._g.quads((None, None, None, uri)): + yield (s, p, o) + + def patch(self, p: Patch, perfect=True): + toDelete = [] + for spoc in p.delQuads: + spoc = fixContextToUri(spoc) + + if perfect: + if inGraph(spoc, self._g): + toDelete.append(spoc) + else: + raise ValueError("%r not in %r" % (spoc[:3], spoc[3])) + else: + self._g.remove(spoc) + for spoc in toDelete: + self._g.remove(spoc) + + addQuads = list(p.addQuads) + if perfect: + for spoc in addQuads: + spoc = fixContextToUri(spoc) + if inGraph(spoc, self._g): + raise ValueError("%r already in %r" % (spoc[:3], spoc[3])) + self._g.addN(addQuads) + + def __len__(self): + return len(self._g) + + def summarizeToLog(self): + log.info("contexts in graph (%s total stmts):" % len(self._g)) + for c in self._g.contexts(): + ci = cast(URIRef, c.identifier) + g = self.getSubgraph(ci) + n = len(list(g)) # todo + log.info(" %s: %s statements" % (c.identifier, n)) + + def serialize(self, *a, **kw): + return self._g.serialize(*a, **kw) + + def quads(self, *a, **kw): + return self._g.quads(*a, **kw) \ No newline at end of file