Mercurial > code > home > repos > rdfdb
changeset 83:bbf4595b34ae
refactor and fix up types
author | drewp@bigasterisk.com |
---|---|
date | Mon, 04 Apr 2022 21:25:53 -0700 |
parents | 8a9f8dc65da8 |
children | 36f4318442f2 |
files | rdfdb/compact_turtle.py rdfdb/graphfile.py |
diffstat | 2 files changed, 89 insertions(+), 85 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rdfdb/compact_turtle.py Mon Apr 04 21:25:53 2022 -0700 @@ -0,0 +1,80 @@ +from typing import cast, Any +from rdflib import RDF, XSD, Literal +from rdflib.plugins.serializers.turtle import (_GEN_QNAME_FOR_DT, OBJECT, VERB, TurtleSerializer) + +originalWrite = TurtleSerializer.write + + +def patchN3SerializerToUseLessWhitespace(cutColumn=75): + # todo: make a n3serializer subclass with whitespace settings + + def write(self, text): + lines = text.split('\n') + if len(lines) > 1: + self._column = len(lines[-1]) + else: + self._column += len(lines[0]) + return originalWrite(self, text) + + TurtleSerializer.write = write + + def predicateList(self, subject, newline=False): + properties = self.buildPredicateHash(subject) + propList = self.sortProperties(properties) + if len(propList) == 0: + return + self.verb(propList[0], newline=newline) + self.objectList(properties[propList[0]]) + for predicate in propList[1:]: + self.write(';') + # can't do proper wrapping since we don't know how much is coming + if self._column > cutColumn: + self.write('\n' + self.indent(1)) + self.verb(predicate, newline=False) + self.objectList(properties[predicate]) + + def objectList(self, objects): + count = len(objects) + if count == 0: + return + depthmod = (count == 1) and 0 or 1 + self.depth += depthmod + self.path(objects[0], OBJECT) + for obj in objects[1:]: + self.write(', ') + self.path(obj, OBJECT, newline=True) + self.depth -= depthmod + + originalStatement = TurtleSerializer.statement + + def statement(self, subject) -> bool: + if list(self.store.triples((subject, RDF.type, None))): + self.write('\n') + originalStatement(self, subject) + return False # suppress blank line for 'minor' statements + + TurtleSerializer.statement = cast(Any, statement) + TurtleSerializer.predicateList = predicateList + TurtleSerializer.objectList = objectList + + def custom_literal(node, qname_callback): + if node.datatype == XSD['double']: + num = node.toPython() + return '%g' % num + return node._literal_n3(use_plain=True, qname_callback=qname_callback) + + def label(self, node, position): + if node == RDF.nil: + return '()' + if position is VERB and node in self.keywords: + return self.keywords[node] + if isinstance(node, Literal): + return custom_literal( + node, # <- switch to this + qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT)) + else: + node = self.relativize(node) + + return self.getQName(node, position == VERB) or node.n3() + + TurtleSerializer.label = label
--- a/rdfdb/graphfile.py Mon Apr 04 21:10:40 2022 -0700 +++ b/rdfdb/graphfile.py Mon Apr 04 21:25:53 2022 -0700 @@ -2,98 +2,23 @@ import os import time import traceback -from typing import Dict, Optional, Protocol +from typing import Dict, Optional, Protocol, cast -from rdflib import RDF, XSD, Graph, Literal, URIRef -from twisted.internet import reactor +import twisted.internet.reactor +from rdflib import Graph, URIRef from twisted.internet.inotify import INotify, humanReadableMask -from twisted.internet.interfaces import IDelayedCall +from twisted.internet.interfaces import IDelayedCall, IReactorCore from twisted.python.filepath import FilePath +from rdfdb.compact_turtle import patchN3SerializerToUseLessWhitespace from rdfdb.patch import Patch from rdfdb.rdflibpatch import inContext +reactor = cast(IReactorCore, twisted.internet.reactor) + log = logging.getLogger('graphfile') iolog = logging.getLogger('io') - -def patchN3SerializerToUseLessWhitespace(cutColumn=75): - # todo: make a n3serializer subclass with whitespace settings - from rdflib.plugins.serializers.turtle import (_GEN_QNAME_FOR_DT, OBJECT, VERB, TurtleSerializer) - originalWrite = TurtleSerializer.write - - def write(self, s): - lines = s.split('\n') - if len(lines) > 1: - self._column = len(lines[-1]) - else: - self._column += len(lines[0]) - return originalWrite(self, s) - - TurtleSerializer.write = write # type: ignore - - def predicateList(self, subject, newline=False): - properties = self.buildPredicateHash(subject) - propList = self.sortProperties(properties) - if len(propList) == 0: - return - self.verb(propList[0], newline=newline) - self.objectList(properties[propList[0]]) - for predicate in propList[1:]: - self.write(';') - # can't do proper wrapping since we don't know how much is coming - if self._column > cutColumn: - self.write('\n' + self.indent(1)) - self.verb(predicate, newline=False) - self.objectList(properties[predicate]) - - def objectList(self, objects): - count = len(objects) - if count == 0: - return - depthmod = (count == 1) and 0 or 1 - self.depth += depthmod - self.path(objects[0], OBJECT) - for obj in objects[1:]: - self.write(', ') - self.path(obj, OBJECT, newline=True) - self.depth -= depthmod - - originalStatement = TurtleSerializer.statement - - def statement(self, subject): - if list(self.store.triples((subject, RDF.type, None))): - self.write('\n') - originalStatement(self, subject) - return False # suppress blank line for 'minor' statements - - TurtleSerializer.statement = statement # type: ignore - TurtleSerializer.predicateList = predicateList # type: ignore - TurtleSerializer.objectList = objectList # type: ignore - - def custom_literal(node, qname_callback): - if node.datatype == XSD['double']: - num = node.toPython() - return '%g' % num - return node._literal_n3(use_plain=True, qname_callback=qname_callback) - - def label(self, node, position): - if node == RDF.nil: - return '()' - if position is VERB and node in self.keywords: - return self.keywords[node] - if isinstance(node, Literal): - return custom_literal( - node, # <- switch to this - qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT)) - else: - node = self.relativize(node) - - return self.getQName(node, position == VERB) or node.n3() - - TurtleSerializer.label = label # type: ignore - - patchN3SerializerToUseLessWhitespace() @@ -273,10 +198,9 @@ self.graphToWrite = graph if self.writeCall: - self.writeCall.reset(self.flushDelay) + self.writeCall.reset(self.flushDelay) # type: ignore else: - # This awkward assignment is just to hide from mypy. - setattr(self, 'writeCall', reactor.callLater(self.flushDelay, self.flush)) + self.writeCall = reactor.callLater(self.flushDelay, self.flush) def flush(self) -> None: self.writeCall = None