changeset 106:b0f922c8c728

rm old code and probably a bunch of inotify workarounds
author drewp@bigasterisk.com
date Mon, 30 May 2022 22:43:07 -0700
parents 4681ea3fcdf6
children 19100db34354
files rdfdb/graphfile.py rdfdb/watched_files.py
diffstat 2 files changed, 12 insertions(+), 223 deletions(-) [+]
line wrap: on
line diff
--- a/rdfdb/graphfile.py	Mon May 30 20:38:53 2022 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,223 +0,0 @@
-import logging
-import os
-import time
-import traceback
-from typing import Dict, Optional, Protocol, cast
-from pathlib import Path
-
-import twisted.internet.reactor
-from rdflib import Graph, URIRef
-from twisted.internet.inotify import INotify, humanReadableMask
-from twisted.internet.interfaces import IDelayedCall, IReactorCore
-from twisted.python.filepath import FilePath
-
-from rdfdb.compact_turtle import patchN3SerializerToUseLessWhitespace
-from rdfdb.patch import Patch
-from rdfdb.rdflibpatch import inContext
-
-raise NotImplementedError('deleteme')
-reactor = cast(IReactorCore, twisted.internet.reactor)
-
-log = logging.getLogger('graphfile')
-iolog = logging.getLogger('io')
-
-patchN3SerializerToUseLessWhitespace()
-
-
-class PatchCb(Protocol):
-
-    def __call__(self, patch: Patch, dueToFileChange: bool = False) -> None:
-        ...
-
-
-class GetSubgraph(Protocol):
-
-    def __call__(self, uri: URIRef) -> Graph:
-        ...
-
-
-class GraphFile(object):
-    """
-    one rdf file that we read from, write to, and notice external changes to
-    """
-
-    def __init__(self, notifier: INotify, path: Path, uri: URIRef, patch: PatchCb, getSubgraph: GetSubgraph, globalPrefixes: Dict[str, URIRef],
-                 ctxPrefixes: Dict[str, URIRef]):
-        """
-        uri is the context for the triples in this file. We assume
-        sometimes that we're the only ones with triples in this
-        context.
-
-        this does not include an initial reread() call
-
-        Prefixes are mutable dicts. The caller may add to them later.
-        """
-        self.path, self.uri = path, uri
-        self.patch, self.getSubgraph = patch, getSubgraph
-
-        self.lastWriteTimestamp = 0.0  # mtime from the last time _we_ wrote
-
-        self.globalPrefixes = globalPrefixes
-        self.ctxPrefixes = ctxPrefixes
-        self.readPrefixes: Dict[str, URIRef] = {}
-
-        if not os.path.exists(path):
-            # can't start notify until file exists
-            try:
-                os.makedirs(os.path.dirname(path))
-            except OSError:
-                pass
-            f = open(path, "w")
-            f.write("#new\n")
-            f.close()
-            iolog.info("%s created", path)
-            # this was supposed to cut out some extra reads but it
-            # didn't work:
-            self.lastWriteTimestamp = os.path.getmtime(path)
-
-        self.flushDelay = 2  # seconds until we have to call flush() when dirty
-        self.writeCall: Optional[IDelayedCall] = None
-
-        self.notifier = notifier
-        self.addWatch()
-
-    def addWatch(self) -> None:
-
-        # emacs save comes in as IN_MOVE_SELF, maybe
-
-        # I was hoping not to watch IN_CHANGED and get lots of
-        # half-written files, but emacs doesn't close its files after
-        # a write, so there's no other event. I could try to sleep
-        # until after all the writes are done, but I think the only
-        # bug left is that we'll retry too agressively on a file
-        # that's being written
-
-        # See twisted.internet.inotify for IN_CHANGED event, etc.
-
-        log.info("add watch on %s", self.path)
-        self.notifier.watch(FilePath(self.path), callbacks=[self.notify])
-
-    def notify(self, notifier: INotify, filepath: FilePath, mask: int) -> None:
-        try:
-            maskNames = humanReadableMask(mask)
-            if maskNames[0] == 'delete_self':
-                if not filepath.exists():
-                    log.info("%s delete_self", filepath)
-                    self.fileGone()
-                    return
-                else:
-                    log.warn("%s delete_self event but file is here. " "probably a new version moved in", filepath)
-
-            # we could filter these out in the watch() call, but I want
-            # the debugging
-            if maskNames[0] in ['open', 'access', 'close_nowrite', 'attrib']:
-                log.debug("%s %s event, ignoring" % (filepath, maskNames))
-                return
-
-            try:
-                if filepath.getModificationTime() == self.lastWriteTimestamp:
-                    log.debug("%s changed, but we did this write", filepath)
-                    return
-            except OSError as e:
-                log.error("%s: %r" % (filepath, e))
-                # getting OSError no such file, followed by no future reads
-                reactor.callLater(.5, self.addWatch)  # ?
-
-                return
-
-            log.info("reread %s because of %s event", filepath, maskNames)
-
-            self.reread()
-        except Exception:
-            traceback.print_exc()
-
-    def fileGone(self) -> None:
-        """
-        our file is gone; remove the statements from that context
-        """
-        myQuads = [(s, p, o, self.uri) for s, p, o in self.getSubgraph(self.uri)]
-        log.debug("dropping all statements from context %s", self.uri)
-        if myQuads:
-            self.patch(Patch(delQuads=myQuads), dueToFileChange=True)
-
-    def reread(self) -> None:
-        """update the graph with any diffs from this file
-
-        n3 parser fails on "1.e+0" even though rdflib was emitting that itself
-        """
-        old = self.getSubgraph(self.uri)
-        new = Graph()
-        try:
-            contents = open(self.path).read()
-            if contents.startswith("#new"):
-                log.debug("%s ignoring empty contents of my new file", self.path)
-                # this is a new file we're starting, and we should not
-                # patch our graph as if it had just been cleared. We
-                # shouldn't even be here reading this, but
-                # lastWriteTimestamp didn't work.
-                return
-
-            new.parse(location=self.path, format='n3')
-            self.readPrefixes = dict(new.namespaces())
-        except SyntaxError as e:
-            print(e)
-            traceback.print_exc()
-            log.error("%s syntax error", self.path)
-            # todo: likely bug- if a file has this error upon first
-            # read, I think we don't retry it right.
-            return
-        except IOError as e:
-            log.error("%s rereading %s: %r", self.path, self.uri, e)
-            return
-
-        old = inContext(old, self.uri)
-        new = inContext(new, self.uri)
-
-        p = Patch.fromDiff(old, new)
-        if p:
-            log.debug("%s applying patch for changes in file", self.path)
-            self.patch(p, dueToFileChange=True)
-        else:
-            log.debug("old == new after reread of %s", self.path)
-
-    def dirty(self, graph: Graph) -> None:
-        """
-        there are new contents to write to our file
-
-        graph is the rdflib.Graph that contains the contents of the
-        file. It is allowed to change. Note that dirty() will probably
-        do the save later when the graph might be different.
-
-        after a timer has passed, write it out. Any scheduling issues
-        between files? i don't think so. the timer might be kind of
-        huge, and then we might want to take a hint from a client that
-        it's a good time to save the files that it was editing, like
-        when the mouse moves out of the client's window and might be
-        going towards a text file editor
-
-        """
-        log.debug("%s dirty, needs write", self.path)
-
-        self.graphToWrite = graph
-        if self.writeCall:
-            self.writeCall.reset(self.flushDelay)  # type: ignore
-        else:
-            self.writeCall = reactor.callLater(self.flushDelay, self.flush)
-
-    def flush(self) -> None:
-        self.writeCall = None
-
-        tmpOut = str(self.path) + ".rdfdb-temp"
-        f = open(tmpOut, 'wb')
-        t1 = time.time()
-        for p, n in (list(self.globalPrefixes.items()) + list(self.readPrefixes.items()) + list(self.ctxPrefixes.items())):
-            self.graphToWrite.bind(p, n)
-        self.graphToWrite.serialize(destination=f, format='n3', encoding='utf8')
-        serializeTime = time.time() - t1
-        f.close()
-        self.lastWriteTimestamp = os.path.getmtime(tmpOut)
-        os.rename(tmpOut, self.path)
-        iolog.info("%s rewrote in %.1f ms", self.path, serializeTime * 1000)
-
-    def __repr__(self) -> str:
-        return "%s(path=%r, uri=%r, ...)" % (self.__class__.__name__, self.path, self.uri)
--- a/rdfdb/watched_files.py	Mon May 30 20:38:53 2022 -0700
+++ b/rdfdb/watched_files.py	Mon May 30 22:43:07 2022 -0700
@@ -23,6 +23,18 @@
 class WatchedFiles:
     """
     filenames and text content only; yields FileEditEvents
+
+    Notes from a previous version:
+
+        # emacs save comes in as IN_MOVE_SELF, maybe
+
+        # I was hoping not to watch IN_CHANGED and get lots of
+        # half-written files, but emacs doesn't close its files after
+        # a write, so there's no other event. I could try to sleep
+        # until after all the writes are done, but I think the only
+        # bug left is that we'll retry too agressively on a file
+        # that's being written
+
     """
     """queue of file contents that the caller should use"""
     fileEditEvents: asyncio.Queue[FileEditEvent]