diff service/reasoning/rdflibtrig.py @ 249:e5c27d2f11ab

fetch all source graphs in parallel Ignore-this: 666d61fa9c69f78846987e0ccea750d4
author drewp@bigasterisk.com
date Tue, 09 Feb 2016 22:01:19 -0800
parents 0c306e76d8c5
children 3c18b4b3b72c
line wrap: on
line diff
--- a/service/reasoning/rdflibtrig.py	Mon Feb 08 23:49:03 2016 -0800
+++ b/service/reasoning/rdflibtrig.py	Tue Feb 09 22:01:19 2016 -0800
@@ -1,14 +1,25 @@
-import time
-import requests
+import time, logging
 from rdflib import ConjunctiveGraph
-        
+from rdflib.parser import StringInputSource
+import treq
+from twisted.internet.defer import inlineCallbacks, returnValue
+log = logging.getLogger('fetch')
+
+from private_ipv6_addresses import ipv6Addresses
+
+@inlineCallbacks
 def addTrig(graph, url, timeout=2):
     t1 = time.time()
-    response = requests.get(url, stream=True, timeout=timeout)
-    if response.status_code != 200:
-        raise ValueError("status %s from %s" % (response.status, url))
+    # workaround for some reason my ipv6 names don't resolve
+    for name, addr in ipv6Addresses.iteritems():
+        url = url.replace('/' + name + ':', '/[' + addr + ']:')
+    log.debug('    fetching %r', url)
+    response = yield treq.get(url, headers={'accept': ['application/trig']}, timeout=timeout)
+    if response.code != 200:
+        raise ValueError("status %s from %s" % (response.code, url))
     g = ConjunctiveGraph()
-    g.parse(response.raw, format='trig')
+    g.parse(StringInputSource((yield response.content())), format='trig')
     fetchTime = time.time() - t1
+    log.debug('    %r done in %.04f sec', url, fetchTime)
     graph.addN(g.quads())
-    return fetchTime
+    returnValue(fetchTime)