changeset 265:32cc1eda8389

sourceSubstr feature, untested Ignore-this: a7e5e11bef7fceffc2d2687ce9edcf6e
author drewp@bigasterisk.com
date Thu, 14 Apr 2016 00:11:12 -0700
parents 570b0e73d2bc
children 1ae78fe6f36e
files service/reasoning/reasoning.py
diffstat 1 files changed, 23 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/service/reasoning/reasoning.py	Thu Apr 14 00:10:26 2016 -0700
+++ b/service/reasoning/reasoning.py	Thu Apr 14 00:11:12 2016 -0700
@@ -46,7 +46,7 @@
 
 
 class InputGraph(object):
-    def __init__(self, inputDirs, onChange):
+    def __init__(self, inputDirs, onChange, sourceSubstr=None):
         """
         this has one Graph that's made of:
           - all .n3 files from inputDirs (read at startup)
@@ -61,13 +61,18 @@
         ones with the predicates on the boring list. onChange(self,
         oneShot=True) means: don't store the result of this change
         anywhere; it needs to be processed only once
+
+        sourceSubstr filters to only pull from sources containing the
+        string (for debugging).
         """
         self.inputDirs = inputDirs
         self.onChange = onChange
+        self.sourceSubstr = sourceSubstr
         self._fileGraph = Graph()
         self._remoteGraph = None
         self._combinedGraph = None
         self._oneShotAdditionGraph = None
+        self._lastErrLog = {} # source: error
 
     def updateFileData(self):
         """
@@ -102,22 +107,32 @@
         @inlineCallbacks
         def fetchOne(source):
             try:
-                # this part could be parallelized
-                fetchTime = yield addTrig(g, source)
+                fetchTime = yield addTrig(g, source, timeout=5)
             except Exception, e:
-                log.error("  can't add source %s: %s", source, e)
-                g.add((URIRef(source), ROOM['graphLoadError'], Literal(str(e))))
+                e = str(e)
+                if self._lastErrLog.get(source) != e:
+                    log.error("  can't add source %s: %s", source, e)
+                    self._lastErrLog[source] = e
+                g.add((URIRef(source), ROOM['graphLoadError'], Literal(e)))
                 g.add((URIRef(source), RDF.type, ROOM['FailedGraphLoad']))
             else:
+                if self._lastErrLog.get(source):
+                    log.warning("  source %s is back", source)
+                    self._lastErrLog[source] = None
                 g.add((URIRef(source), ROOM['graphLoadMs'],
                        Literal(round(fetchTime * 1000, 1))))
 
         fetchDone = []
+        filtered = 0
         for source in self._fileGraph.objects(ROOM['reasoning'],
                                               ROOM['source']):
+            if self.sourceSubstr and self.sourceSubstr not in source:
+                filtered += 1
+                continue
             fetchDone.append(fetchOne(source))
         yield gatherResults(fetchDone, consumeErrors=True)
-        log.debug("loaded all in %.1f ms", 1000 * (time.time() - t1))
+        log.debug("loaded %s (skipping %s) in %.1f ms", len(fetchDone),
+                  filtered, 1000 * (time.time() - t1))
         
         prevGraph = self._remoteGraph
         self._remoteGraph = g
@@ -414,7 +429,8 @@
     arg = docopt("""
     Usage: reasoning.py [options]
 
-    -v   Verbose (and slow updates)
+    -v                Verbose (and slow updates)
+    --source=<substr>  Limit sources to those with this string.
     """)
     
     r = Reasoning()