Mercurial > code > home > repos > rdfdb
changeset 106:b0f922c8c728
rm old code and probably a bunch of inotify workarounds
author | drewp@bigasterisk.com |
---|---|
date | Mon, 30 May 2022 22:43:07 -0700 |
parents | 4681ea3fcdf6 |
children | 19100db34354 |
files | rdfdb/graphfile.py rdfdb/watched_files.py |
diffstat | 2 files changed, 12 insertions(+), 223 deletions(-) [+] |
line wrap: on
line diff
--- a/rdfdb/graphfile.py Mon May 30 20:38:53 2022 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,223 +0,0 @@ -import logging -import os -import time -import traceback -from typing import Dict, Optional, Protocol, cast -from pathlib import Path - -import twisted.internet.reactor -from rdflib import Graph, URIRef -from twisted.internet.inotify import INotify, humanReadableMask -from twisted.internet.interfaces import IDelayedCall, IReactorCore -from twisted.python.filepath import FilePath - -from rdfdb.compact_turtle import patchN3SerializerToUseLessWhitespace -from rdfdb.patch import Patch -from rdfdb.rdflibpatch import inContext - -raise NotImplementedError('deleteme') -reactor = cast(IReactorCore, twisted.internet.reactor) - -log = logging.getLogger('graphfile') -iolog = logging.getLogger('io') - -patchN3SerializerToUseLessWhitespace() - - -class PatchCb(Protocol): - - def __call__(self, patch: Patch, dueToFileChange: bool = False) -> None: - ... - - -class GetSubgraph(Protocol): - - def __call__(self, uri: URIRef) -> Graph: - ... - - -class GraphFile(object): - """ - one rdf file that we read from, write to, and notice external changes to - """ - - def __init__(self, notifier: INotify, path: Path, uri: URIRef, patch: PatchCb, getSubgraph: GetSubgraph, globalPrefixes: Dict[str, URIRef], - ctxPrefixes: Dict[str, URIRef]): - """ - uri is the context for the triples in this file. We assume - sometimes that we're the only ones with triples in this - context. - - this does not include an initial reread() call - - Prefixes are mutable dicts. The caller may add to them later. - """ - self.path, self.uri = path, uri - self.patch, self.getSubgraph = patch, getSubgraph - - self.lastWriteTimestamp = 0.0 # mtime from the last time _we_ wrote - - self.globalPrefixes = globalPrefixes - self.ctxPrefixes = ctxPrefixes - self.readPrefixes: Dict[str, URIRef] = {} - - if not os.path.exists(path): - # can't start notify until file exists - try: - os.makedirs(os.path.dirname(path)) - except OSError: - pass - f = open(path, "w") - f.write("#new\n") - f.close() - iolog.info("%s created", path) - # this was supposed to cut out some extra reads but it - # didn't work: - self.lastWriteTimestamp = os.path.getmtime(path) - - self.flushDelay = 2 # seconds until we have to call flush() when dirty - self.writeCall: Optional[IDelayedCall] = None - - self.notifier = notifier - self.addWatch() - - def addWatch(self) -> None: - - # emacs save comes in as IN_MOVE_SELF, maybe - - # I was hoping not to watch IN_CHANGED and get lots of - # half-written files, but emacs doesn't close its files after - # a write, so there's no other event. I could try to sleep - # until after all the writes are done, but I think the only - # bug left is that we'll retry too agressively on a file - # that's being written - - # See twisted.internet.inotify for IN_CHANGED event, etc. - - log.info("add watch on %s", self.path) - self.notifier.watch(FilePath(self.path), callbacks=[self.notify]) - - def notify(self, notifier: INotify, filepath: FilePath, mask: int) -> None: - try: - maskNames = humanReadableMask(mask) - if maskNames[0] == 'delete_self': - if not filepath.exists(): - log.info("%s delete_self", filepath) - self.fileGone() - return - else: - log.warn("%s delete_self event but file is here. " "probably a new version moved in", filepath) - - # we could filter these out in the watch() call, but I want - # the debugging - if maskNames[0] in ['open', 'access', 'close_nowrite', 'attrib']: - log.debug("%s %s event, ignoring" % (filepath, maskNames)) - return - - try: - if filepath.getModificationTime() == self.lastWriteTimestamp: - log.debug("%s changed, but we did this write", filepath) - return - except OSError as e: - log.error("%s: %r" % (filepath, e)) - # getting OSError no such file, followed by no future reads - reactor.callLater(.5, self.addWatch) # ? - - return - - log.info("reread %s because of %s event", filepath, maskNames) - - self.reread() - except Exception: - traceback.print_exc() - - def fileGone(self) -> None: - """ - our file is gone; remove the statements from that context - """ - myQuads = [(s, p, o, self.uri) for s, p, o in self.getSubgraph(self.uri)] - log.debug("dropping all statements from context %s", self.uri) - if myQuads: - self.patch(Patch(delQuads=myQuads), dueToFileChange=True) - - def reread(self) -> None: - """update the graph with any diffs from this file - - n3 parser fails on "1.e+0" even though rdflib was emitting that itself - """ - old = self.getSubgraph(self.uri) - new = Graph() - try: - contents = open(self.path).read() - if contents.startswith("#new"): - log.debug("%s ignoring empty contents of my new file", self.path) - # this is a new file we're starting, and we should not - # patch our graph as if it had just been cleared. We - # shouldn't even be here reading this, but - # lastWriteTimestamp didn't work. - return - - new.parse(location=self.path, format='n3') - self.readPrefixes = dict(new.namespaces()) - except SyntaxError as e: - print(e) - traceback.print_exc() - log.error("%s syntax error", self.path) - # todo: likely bug- if a file has this error upon first - # read, I think we don't retry it right. - return - except IOError as e: - log.error("%s rereading %s: %r", self.path, self.uri, e) - return - - old = inContext(old, self.uri) - new = inContext(new, self.uri) - - p = Patch.fromDiff(old, new) - if p: - log.debug("%s applying patch for changes in file", self.path) - self.patch(p, dueToFileChange=True) - else: - log.debug("old == new after reread of %s", self.path) - - def dirty(self, graph: Graph) -> None: - """ - there are new contents to write to our file - - graph is the rdflib.Graph that contains the contents of the - file. It is allowed to change. Note that dirty() will probably - do the save later when the graph might be different. - - after a timer has passed, write it out. Any scheduling issues - between files? i don't think so. the timer might be kind of - huge, and then we might want to take a hint from a client that - it's a good time to save the files that it was editing, like - when the mouse moves out of the client's window and might be - going towards a text file editor - - """ - log.debug("%s dirty, needs write", self.path) - - self.graphToWrite = graph - if self.writeCall: - self.writeCall.reset(self.flushDelay) # type: ignore - else: - self.writeCall = reactor.callLater(self.flushDelay, self.flush) - - def flush(self) -> None: - self.writeCall = None - - tmpOut = str(self.path) + ".rdfdb-temp" - f = open(tmpOut, 'wb') - t1 = time.time() - for p, n in (list(self.globalPrefixes.items()) + list(self.readPrefixes.items()) + list(self.ctxPrefixes.items())): - self.graphToWrite.bind(p, n) - self.graphToWrite.serialize(destination=f, format='n3', encoding='utf8') - serializeTime = time.time() - t1 - f.close() - self.lastWriteTimestamp = os.path.getmtime(tmpOut) - os.rename(tmpOut, self.path) - iolog.info("%s rewrote in %.1f ms", self.path, serializeTime * 1000) - - def __repr__(self) -> str: - return "%s(path=%r, uri=%r, ...)" % (self.__class__.__name__, self.path, self.uri)
--- a/rdfdb/watched_files.py Mon May 30 20:38:53 2022 -0700 +++ b/rdfdb/watched_files.py Mon May 30 22:43:07 2022 -0700 @@ -23,6 +23,18 @@ class WatchedFiles: """ filenames and text content only; yields FileEditEvents + + Notes from a previous version: + + # emacs save comes in as IN_MOVE_SELF, maybe + + # I was hoping not to watch IN_CHANGED and get lots of + # half-written files, but emacs doesn't close its files after + # a write, so there's no other event. I could try to sleep + # until after all the writes are done, but I think the only + # bug left is that we'll retry too agressively on a file + # that's being written + """ """queue of file contents that the caller should use""" fileEditEvents: asyncio.Queue[FileEditEvent]