Mercurial > code > home > repos > homeauto
diff service/reasoning/inference.py @ 1092:54de5144900d
switch from evtiming to greplin.scales. Optimize rules reader to reuse previous data (400ms -> 0.6ms)
Ignore-this: a655f4c56db51b09b3f14d7f09e354cb
darcs-hash:4ffd7012f404392375434243104eba065ffb8086
author | drewp <drewp@bigasterisk.com> |
---|---|
date | Mon, 09 May 2016 00:32:08 -0700 |
parents | cb7fa2f30df9 |
children | e03696277b32 |
line wrap: on
line diff
--- a/service/reasoning/inference.py Sun May 08 03:05:27 2016 -0700 +++ b/service/reasoning/inference.py Mon May 09 00:32:08 2016 -0700 @@ -2,7 +2,7 @@ see ./reasoning for usage """ -import sys, os +import sys, os, contextlib try: from rdflib.Graph import Graph except ImportError: @@ -18,62 +18,80 @@ from rdflib import plugin, Namespace from rdflib.store import Store -sys.path.append('../../../ffg/ffg') -import evtiming +from greplin import scales +STATS = scales.collection('/web', + scales.PmfStat('readRules')) from escapeoutputstatements import escapeOutputStatements ROOM = Namespace("http://projects.bigasterisk.com/room/") +def _loadAndEscape(ruleStore, n3, outputPatterns): + ruleGraph = Graph(ruleStore) + + # Can't escapeOutputStatements in the ruleStore since it + # doesn't support removals. Can't copy plainGraph into + # ruleGraph since something went wrong with traversing the + # triples inside quoted graphs, and I lose all the bodies + # of my rules. This serialize/parse version is very slow (400ms), + # but it only runs when the file changes. + plainGraph = Graph() + plainGraph.parse(StringInputSource(n3), format='n3') # for inference + escapeOutputStatements(plainGraph, outputPatterns=outputPatterns) + expandedN3 = plainGraph.serialize(format='n3') + + ruleGraph.parse(StringInputSource(expandedN3), format='n3') + _rulesCache = (None, None, None, None) -@evtiming.serviceLevel.timed('readRules') def readRules(rulesPath, outputPatterns): """ - returns (rulesN3, ruleGraph) + returns (rulesN3, ruleStore) This includes escaping certain statements in the output (implied) subgraaphs so they're not confused with input statements. """ global _rulesCache - mtime = os.path.getmtime(rulesPath) - key = (rulesPath, mtime) - if _rulesCache[:2] == key: - _, _, rulesN3, expandedN3 = _rulesCache - else: - rulesN3 = open(rulesPath).read() # for web display - plainGraph = Graph() - plainGraph.parse(StringInputSource(rulesN3), - format='n3') # for inference - escapeOutputStatements(plainGraph, outputPatterns=outputPatterns) - expandedN3 = plainGraph.serialize(format='n3') - _rulesCache = key + (rulesN3, expandedN3) + with STATS.readRules.time(): + mtime = os.path.getmtime(rulesPath) + key = (rulesPath, mtime) + if _rulesCache[:2] == key: + _, _, rulesN3, ruleStore = _rulesCache + else: + rulesN3 = open(rulesPath).read() # for web display - # the rest needs to happen each time since inference is - # consuming the ruleGraph somehow - ruleStore = N3RuleStore() - ruleGraph = Graph(ruleStore) - - ruleGraph.parse(StringInputSource(expandedN3), format='n3') - log.debug('%s rules' % len(ruleStore.rules)) - return rulesN3, ruleGraph + ruleStore = N3RuleStore() + _loadAndEscape(ruleStore, rulesN3, outputPatterns) + log.debug('%s rules' % len(ruleStore.rules)) + + _rulesCache = key + (rulesN3, ruleStore) + return rulesN3, ruleStore def infer(graph, rules): """ - returns new graph of inferred statements + returns new graph of inferred statements. Plain rete api seems to + alter rules.formulae and rules.rules, but this function does not + alter the incoming rules object, so you can cache it. """ # based on fuxi/tools/rdfpipe.py - store = plugin.get('IOMemory',Store)() - store.open('') - target = Graph() tokenSet = generateTokenSet(graph) - network = ReteNetwork(rules, inferredTarget=target) - network.feedFactsToAdd(tokenSet) - - store.rollback() + with _dontChangeRulesStore(rules): + network = ReteNetwork(rules, inferredTarget=target) + network.feedFactsToAdd(tokenSet) + return target +@contextlib.contextmanager +def _dontChangeRulesStore(rules): + if not hasattr(rules, '_stashOriginalRules'): + rules._stashOriginalRules = rules.rules[:] + yield + for k in rules.formulae.keys(): + if not k.startswith('_:Formula'): + del rules.formulae[k] + rules.rules = rules._stashOriginalRules[:] + import time, logging log = logging.getLogger() def logTime(func):