diff service/reasoning/rdflibtrig.py @ 1054:bbaf0576f653

fetch all source graphs in parallel Ignore-this: 666d61fa9c69f78846987e0ccea750d4 darcs-hash:fee0fc60a6fd66102d3a2b73bf980b393e1349c6
author drewp <drewp@bigasterisk.com>
date Tue, 09 Feb 2016 22:01:19 -0800
parents a328cc370b22
children 3c18b4b3b72c
line wrap: on
line diff
--- a/service/reasoning/rdflibtrig.py	Mon Feb 08 23:49:03 2016 -0800
+++ b/service/reasoning/rdflibtrig.py	Tue Feb 09 22:01:19 2016 -0800
@@ -1,14 +1,25 @@
-import time
-import requests
+import time, logging
 from rdflib import ConjunctiveGraph
-        
+from rdflib.parser import StringInputSource
+import treq
+from twisted.internet.defer import inlineCallbacks, returnValue
+log = logging.getLogger('fetch')
+
+from private_ipv6_addresses import ipv6Addresses
+
+@inlineCallbacks
 def addTrig(graph, url, timeout=2):
     t1 = time.time()
-    response = requests.get(url, stream=True, timeout=timeout)
-    if response.status_code != 200:
-        raise ValueError("status %s from %s" % (response.status, url))
+    # workaround for some reason my ipv6 names don't resolve
+    for name, addr in ipv6Addresses.iteritems():
+        url = url.replace('/' + name + ':', '/[' + addr + ']:')
+    log.debug('    fetching %r', url)
+    response = yield treq.get(url, headers={'accept': ['application/trig']}, timeout=timeout)
+    if response.code != 200:
+        raise ValueError("status %s from %s" % (response.code, url))
     g = ConjunctiveGraph()
-    g.parse(response.raw, format='trig')
+    g.parse(StringInputSource((yield response.content())), format='trig')
     fetchTime = time.time() - t1
+    log.debug('    %r done in %.04f sec', url, fetchTime)
     graph.addN(g.quads())
-    return fetchTime
+    returnValue(fetchTime)