view service/reasoning/rdflibtrig.py @ 249:e5c27d2f11ab

fetch all source graphs in parallel Ignore-this: 666d61fa9c69f78846987e0ccea750d4
author drewp@bigasterisk.com
date Tue, 09 Feb 2016 22:01:19 -0800
parents 0c306e76d8c5
children 3c18b4b3b72c
line wrap: on
line source

import time, logging
from rdflib import ConjunctiveGraph
from rdflib.parser import StringInputSource
import treq
from twisted.internet.defer import inlineCallbacks, returnValue
log = logging.getLogger('fetch')

from private_ipv6_addresses import ipv6Addresses

@inlineCallbacks
def addTrig(graph, url, timeout=2):
    t1 = time.time()
    # workaround for some reason my ipv6 names don't resolve
    for name, addr in ipv6Addresses.iteritems():
        url = url.replace('/' + name + ':', '/[' + addr + ']:')
    log.debug('    fetching %r', url)
    response = yield treq.get(url, headers={'accept': ['application/trig']}, timeout=timeout)
    if response.code != 200:
        raise ValueError("status %s from %s" % (response.code, url))
    g = ConjunctiveGraph()
    g.parse(StringInputSource((yield response.content())), format='trig')
    fetchTime = time.time() - t1
    log.debug('    %r done in %.04f sec', url, fetchTime)
    graph.addN(g.quads())
    returnValue(fetchTime)