diff service/reasoning/inference.py @ 1092:54de5144900d

switch from evtiming to greplin.scales. Optimize rules reader to reuse previous data (400ms -> 0.6ms) Ignore-this: a655f4c56db51b09b3f14d7f09e354cb darcs-hash:4ffd7012f404392375434243104eba065ffb8086
author drewp <drewp@bigasterisk.com>
date Mon, 09 May 2016 00:32:08 -0700
parents cb7fa2f30df9
children e03696277b32
line wrap: on
line diff
--- a/service/reasoning/inference.py	Sun May 08 03:05:27 2016 -0700
+++ b/service/reasoning/inference.py	Mon May 09 00:32:08 2016 -0700
@@ -2,7 +2,7 @@
 see ./reasoning for usage
 """
 
-import sys, os
+import sys, os, contextlib
 try:
     from rdflib.Graph import Graph
 except ImportError:
@@ -18,62 +18,80 @@
 from rdflib import plugin, Namespace
 from rdflib.store import Store
 
-sys.path.append('../../../ffg/ffg')
-import evtiming
+from greplin import scales 
+STATS = scales.collection('/web',
+                          scales.PmfStat('readRules'))
 
 from escapeoutputstatements import escapeOutputStatements
 ROOM = Namespace("http://projects.bigasterisk.com/room/")
 
+def _loadAndEscape(ruleStore, n3, outputPatterns):
+    ruleGraph = Graph(ruleStore)
+
+    # Can't escapeOutputStatements in the ruleStore since it
+    # doesn't support removals. Can't copy plainGraph into
+    # ruleGraph since something went wrong with traversing the
+    # triples inside quoted graphs, and I lose all the bodies
+    # of my rules. This serialize/parse version is very slow (400ms),
+    # but it only runs when the file changes.
+    plainGraph = Graph()
+    plainGraph.parse(StringInputSource(n3), format='n3') # for inference
+    escapeOutputStatements(plainGraph, outputPatterns=outputPatterns)
+    expandedN3 = plainGraph.serialize(format='n3')
+
+    ruleGraph.parse(StringInputSource(expandedN3), format='n3')
+
 _rulesCache = (None, None, None, None)
-@evtiming.serviceLevel.timed('readRules')
 def readRules(rulesPath, outputPatterns):
     """
-    returns (rulesN3, ruleGraph)
+    returns (rulesN3, ruleStore)
 
     This includes escaping certain statements in the output
     (implied) subgraaphs so they're not confused with input
     statements.
     """
     global _rulesCache
-    mtime = os.path.getmtime(rulesPath)
-    key = (rulesPath, mtime)
-    if _rulesCache[:2] == key:
-        _, _, rulesN3, expandedN3 = _rulesCache
-    else:
-        rulesN3 = open(rulesPath).read() # for web display
 
-        plainGraph = Graph()
-        plainGraph.parse(StringInputSource(rulesN3),
-                         format='n3') # for inference
-        escapeOutputStatements(plainGraph, outputPatterns=outputPatterns)
-        expandedN3 = plainGraph.serialize(format='n3')
-        _rulesCache = key + (rulesN3, expandedN3)
+    with STATS.readRules.time():
+        mtime = os.path.getmtime(rulesPath)
+        key = (rulesPath, mtime)
+        if _rulesCache[:2] == key:
+            _, _, rulesN3, ruleStore = _rulesCache
+        else:
+            rulesN3 = open(rulesPath).read() # for web display
 
-    # the rest needs to happen each time since inference is
-    # consuming the ruleGraph somehow
-    ruleStore = N3RuleStore()
-    ruleGraph = Graph(ruleStore)
-
-    ruleGraph.parse(StringInputSource(expandedN3), format='n3')
-    log.debug('%s rules' % len(ruleStore.rules))
-    return rulesN3, ruleGraph
+            ruleStore = N3RuleStore()
+            _loadAndEscape(ruleStore, rulesN3, outputPatterns)
+            log.debug('%s rules' % len(ruleStore.rules))
+            
+            _rulesCache = key + (rulesN3, ruleStore)
+        return rulesN3, ruleStore
 
 def infer(graph, rules):
     """
-    returns new graph of inferred statements
+    returns new graph of inferred statements. Plain rete api seems to
+    alter rules.formulae and rules.rules, but this function does not
+    alter the incoming rules object, so you can cache it.
     """
     # based on fuxi/tools/rdfpipe.py
-    store = plugin.get('IOMemory',Store)()        
-    store.open('')
-
     target = Graph()
     tokenSet = generateTokenSet(graph)
-    network = ReteNetwork(rules, inferredTarget=target)
-    network.feedFactsToAdd(tokenSet)
-
-    store.rollback()
+    with _dontChangeRulesStore(rules):
+        network = ReteNetwork(rules, inferredTarget=target)
+        network.feedFactsToAdd(tokenSet)
+    
     return target
 
+@contextlib.contextmanager
+def _dontChangeRulesStore(rules):
+    if not hasattr(rules, '_stashOriginalRules'):
+        rules._stashOriginalRules = rules.rules[:]
+    yield
+    for k in rules.formulae.keys():
+        if not k.startswith('_:Formula'):
+            del rules.formulae[k]
+    rules.rules = rules._stashOriginalRules[:]
+    
 import time, logging
 log = logging.getLogger()
 def logTime(func):