0
|
1 """
|
|
2 requesting /graph/foo returns an SSE patch stream that's the
|
|
3 result of fetching multiple other SSE patch streams. The result stream
|
|
4 may include new statements injected by this service.
|
|
5
|
|
6 Future:
|
|
7 - filter out unneeded stmts from the sources
|
|
8 - give a time resolution and concatenate any patches that come faster than that res
|
|
9 """
|
|
10 import collections
|
|
11 import json
|
|
12 import logging
|
|
13 import time
|
6
|
14 from typing import (Callable, Dict, List, Optional, Sequence, Set, Tuple, Union)
|
0
|
15
|
|
16 import cyclone.sse
|
|
17 import cyclone.web
|
|
18 from docopt import docopt
|
9
|
19 from patchablegraph.patchablegraph import jsonFromPatch
|
0
|
20 from patchablegraph.patchsource import PatchSource, ReconnectingPatchSource
|
6
|
21 from prometheus_client import Summary
|
0
|
22 from prometheus_client.exposition import generate_latest
|
|
23 from prometheus_client.registry import REGISTRY
|
|
24 from rdfdb.patch import Patch
|
6
|
25 from rdflib import Namespace, URIRef
|
1
|
26 from rdflib.term import Node
|
0
|
27 from standardservice.logsetup import enableTwistedLog, log
|
|
28 from twisted.internet import defer, reactor
|
|
29
|
|
30 from collector_config import config
|
|
31
|
9
|
32 import cyclone.sse
|
|
33 def py3_sendEvent(self, message, event=None, eid=None, retry=None):
|
|
34
|
|
35 if isinstance(message, dict):
|
|
36 message = cyclone.sse.escape.json_encode(message)
|
|
37 if isinstance(message, str):
|
|
38 message = message.encode("utf-8")
|
|
39 assert isinstance(message, bytes)
|
|
40 if eid:
|
|
41 self.transport.write(b"id: %s\n" % eid)
|
|
42 if event:
|
|
43 self.transport.write(b"event: %s\n" % event)
|
|
44 if retry:
|
|
45 self.transport.write(b"retry: %s\n" % retry)
|
|
46 self.transport.write(b"data: %s\n\n" % message)
|
|
47 cyclone.sse.SSEHandler.sendEvent = py3_sendEvent
|
|
48
|
|
49
|
1
|
50 Statement = Tuple[Node, Node, Node, Node]
|
0
|
51
|
6
|
52
|
|
53 # SourceUri = NewType('SourceUri', URIRef) # doesn't work
|
0
|
54 class SourceUri(URIRef):
|
|
55 pass
|
|
56
|
|
57
|
|
58 ROOM = Namespace("http://projects.bigasterisk.com/room/")
|
|
59 COLLECTOR = SourceUri(URIRef('http://bigasterisk.com/sse_collector/'))
|
|
60
|
|
61 GET_STATE_CALLS = Summary("get_state_calls", 'calls')
|
|
62 LOCAL_STATEMENTS_PATCH_CALLS = Summary("local_statements_patch_calls", 'calls')
|
|
63 MAKE_SYNC_PATCH_CALLS = Summary("make_sync_patch_calls", 'calls')
|
|
64 ON_PATCH_CALLS = Summary("on_patch_calls", 'calls')
|
|
65 SEND_UPDATE_PATCH_CALLS = Summary("send_update_patch_calls", 'calls')
|
|
66 REPLACE_SOURCE_STATEMENTS_CALLS = Summary("replace_source_statements_calls", 'calls')
|
|
67
|
|
68
|
|
69 class Metrics(cyclone.web.RequestHandler):
|
|
70
|
|
71 def get(self):
|
|
72 self.add_header('content-type', 'text/plain')
|
|
73 self.write(generate_latest(REGISTRY))
|
|
74
|
|
75
|
|
76 class LocalStatements(object):
|
|
77 """
|
|
78 functions that make statements originating from sse_collector itself
|
|
79 """
|
|
80
|
6
|
81 def __init__(self, applyPatch: Callable[[SourceUri, Patch], None]):
|
0
|
82 self.applyPatch = applyPatch
|
6
|
83 self._sourceState: Dict[SourceUri, Optional[URIRef]] = {} # source: state URIRef
|
0
|
84
|
|
85 @LOCAL_STATEMENTS_PATCH_CALLS.time()
|
6
|
86 def setSourceState(self, source: SourceUri, state: Optional[URIRef]):
|
0
|
87 """
|
|
88 add a patch to the COLLECTOR graph about the state of this
|
|
89 source. state=None to remove the source.
|
|
90 """
|
|
91 oldState = self._sourceState.get(source, None)
|
|
92 if state == oldState:
|
|
93 return
|
|
94 log.info('source state %s -> %s', source, state)
|
|
95 if oldState is None:
|
|
96 self._sourceState[source] = state
|
|
97 self.applyPatch(COLLECTOR, Patch(addQuads=[
|
|
98 (COLLECTOR, ROOM['source'], source, COLLECTOR),
|
|
99 (source, ROOM['state'], state, COLLECTOR),
|
|
100 ]))
|
|
101 elif state is None:
|
|
102 del self._sourceState[source]
|
|
103 self.applyPatch(COLLECTOR, Patch(delQuads=[
|
|
104 (COLLECTOR, ROOM['source'], source, COLLECTOR),
|
|
105 (source, ROOM['state'], oldState, COLLECTOR),
|
|
106 ]))
|
|
107 else:
|
|
108 self._sourceState[source] = state
|
|
109 self.applyPatch(COLLECTOR, Patch(addQuads=[
|
|
110 (source, ROOM['state'], state, COLLECTOR),
|
|
111 ], delQuads=[
|
|
112 (source, ROOM['state'], oldState, COLLECTOR),
|
|
113 ]))
|
|
114
|
|
115
|
|
116 def abbrevTerm(t: Union[URIRef, Node]) -> Union[str, Node]:
|
|
117 if isinstance(t, URIRef):
|
|
118 return (t.replace('http://projects.bigasterisk.com/room/', 'room:').replace('http://projects.bigasterisk.com/device/',
|
|
119 'dev:').replace('http://bigasterisk.com/sse_collector/', 'sc:'))
|
|
120 return t
|
|
121
|
|
122
|
|
123 def abbrevStmt(stmt: Statement) -> str:
|
|
124 return '(%s %s %s %s)' % (abbrevTerm(stmt[0]), abbrevTerm(stmt[1]), abbrevTerm(stmt[2]), abbrevTerm(stmt[3]))
|
|
125
|
|
126
|
|
127 class PatchSink(cyclone.sse.SSEHandler):
|
|
128 _handlerSerial = 0
|
|
129
|
|
130 def __init__(self, application: cyclone.web.Application, request):
|
|
131 cyclone.sse.SSEHandler.__init__(self, application, request)
|
|
132 self.bound = False
|
|
133 self.created = time.time()
|
|
134 self.graphClients = self.settings.graphClients
|
|
135
|
|
136 self._serial = PatchSink._handlerSerial
|
|
137 PatchSink._handlerSerial += 1
|
|
138 self.lastPatchSentTime: float = 0.0
|
|
139
|
|
140 def __repr__(self) -> str:
|
|
141 return '<Handler #%s>' % self._serial
|
|
142
|
|
143 def state(self) -> Dict:
|
|
144 return {
|
|
145 'created': round(self.created, 2),
|
|
146 'ageHours': round((time.time() - self.created) / 3600, 2),
|
|
147 'streamId': self.streamId,
|
|
148 'remoteIp': self.request.remote_ip, # wrong, need some forwarded-for thing
|
|
149 'foafAgent': self.request.headers.get('X-Foaf-Agent'),
|
|
150 'userAgent': self.request.headers.get('user-agent'),
|
|
151 }
|
|
152
|
|
153 def bind(self, *args, **kwargs):
|
|
154 self.streamId = args[0]
|
|
155
|
|
156 self.graphClients.addSseHandler(self)
|
|
157 # If something goes wrong with addSseHandler, I don't want to
|
|
158 # try removeSseHandler.
|
|
159 self.bound = True
|
|
160
|
|
161 def unbind(self) -> None:
|
|
162 if self.bound:
|
|
163 self.graphClients.removeSseHandler(self)
|
|
164
|
|
165
|
|
166 StatementTable = Dict[Statement, Tuple[Set[SourceUri], Set[PatchSink]]]
|
|
167
|
|
168
|
|
169 class PostDeleter(object):
|
|
170
|
|
171 def __init__(self, statements: StatementTable):
|
|
172 self.statements = statements
|
|
173
|
|
174 def __enter__(self):
|
|
175 self._garbage: List[Statement] = []
|
|
176 return self
|
|
177
|
|
178 def add(self, stmt: Statement):
|
|
179 self._garbage.append(stmt)
|
|
180
|
|
181 def __exit__(self, type, value, traceback):
|
|
182 if type is not None:
|
|
183 raise NotImplementedError()
|
|
184 for stmt in self._garbage:
|
|
185 del self.statements[stmt]
|
|
186
|
|
187
|
|
188 class ActiveStatements(object):
|
|
189
|
|
190 def __init__(self):
|
|
191 # This table holds statements asserted by any of our sources
|
|
192 # plus local statements that we introduce (source is
|
|
193 # http://bigasterisk.com/sse_collector/).
|
|
194 self.table: StatementTable = collections.defaultdict(lambda: (set(), set()))
|
|
195
|
|
196 def state(self) -> Dict:
|
|
197 return {
|
|
198 'len': len(self.table),
|
|
199 }
|
|
200
|
|
201 def postDeleteStatements(self) -> PostDeleter:
|
|
202 return PostDeleter(self.table)
|
|
203
|
|
204 def pprintTable(self) -> None:
|
|
205 for i, (stmt, (sources, handlers)) in enumerate(sorted(self.table.items())):
|
|
206 print("%03d. %-80s from %s to %s" % (i, abbrevStmt(stmt), [abbrevTerm(s) for s in sources], handlers))
|
|
207
|
|
208 @MAKE_SYNC_PATCH_CALLS.time()
|
|
209 def makeSyncPatch(self, handler: PatchSink, sources: Set[SourceUri]):
|
|
210 # todo: this could run all handlers at once, which is how we
|
|
211 # use it anyway
|
|
212 adds = []
|
|
213 dels = []
|
|
214
|
|
215 with self.postDeleteStatements() as garbage:
|
|
216 for stmt, (stmtSources, handlers) in self.table.items():
|
|
217 belongsInHandler = not sources.isdisjoint(stmtSources)
|
|
218 handlerHasIt = handler in handlers
|
|
219 # log.debug("%s belong=%s has=%s",
|
|
220 # abbrevStmt(stmt), belongsInHandler, handlerHasIt)
|
|
221 if belongsInHandler and not handlerHasIt:
|
|
222 adds.append(stmt)
|
|
223 handlers.add(handler)
|
|
224 elif not belongsInHandler and handlerHasIt:
|
|
225 dels.append(stmt)
|
|
226 handlers.remove(handler)
|
|
227 if not handlers and not stmtSources:
|
|
228 garbage.add(stmt)
|
|
229
|
|
230 return Patch(addQuads=adds, delQuads=dels)
|
|
231
|
|
232 def applySourcePatch(self, source: SourceUri, p: Patch):
|
|
233 for stmt in p.addQuads:
|
|
234 sourceUrls, handlers = self.table[stmt]
|
|
235 if source in sourceUrls:
|
|
236 raise ValueError("%s added stmt that it already had: %s" % (source, abbrevStmt(stmt)))
|
|
237 sourceUrls.add(source)
|
|
238
|
|
239 with self.postDeleteStatements() as garbage:
|
|
240 for stmt in p.delQuads:
|
|
241 sourceUrls, handlers = self.table[stmt]
|
|
242 if source not in sourceUrls:
|
|
243 raise ValueError("%s deleting stmt that it didn't have: %s" % (source, abbrevStmt(stmt)))
|
|
244 sourceUrls.remove(source)
|
|
245 # this is rare, since some handler probably still has
|
|
246 # the stmt we're deleting, but it can happen e.g. when
|
|
247 # a handler was just deleted
|
|
248 if not sourceUrls and not handlers:
|
|
249 garbage.add(stmt)
|
|
250
|
|
251 @REPLACE_SOURCE_STATEMENTS_CALLS.time()
|
|
252 def replaceSourceStatements(self, source: SourceUri, stmts: Sequence[Statement]):
|
|
253 log.debug('replaceSourceStatements with %s stmts', len(stmts))
|
|
254 newStmts = set(stmts)
|
|
255
|
|
256 with self.postDeleteStatements() as garbage:
|
|
257 for stmt, (sources, handlers) in self.table.items():
|
|
258 if source in sources:
|
|
259 if stmt not in stmts:
|
|
260 sources.remove(source)
|
|
261 if not sources and not handlers:
|
|
262 garbage.add(stmt)
|
|
263 else:
|
|
264 if stmt in stmts:
|
|
265 sources.add(source)
|
|
266 newStmts.discard(stmt)
|
|
267
|
|
268 self.applySourcePatch(source, Patch(addQuads=newStmts, delQuads=[]))
|
|
269
|
|
270 def discardHandler(self, handler: PatchSink):
|
|
271 with self.postDeleteStatements() as garbage:
|
|
272 for stmt, (sources, handlers) in self.table.items():
|
|
273 handlers.discard(handler)
|
|
274 if not sources and not handlers:
|
|
275 garbage.add(stmt)
|
|
276
|
|
277 def discardSource(self, source: SourceUri):
|
|
278 with self.postDeleteStatements() as garbage:
|
|
279 for stmt, (sources, handlers) in self.table.items():
|
|
280 sources.discard(source)
|
|
281 if not sources and not handlers:
|
|
282 garbage.add(stmt)
|
|
283
|
|
284
|
|
285 class GraphClients(object):
|
|
286 """
|
|
287 All the active PatchSources and SSEHandlers
|
|
288
|
|
289 To handle all the overlapping-statement cases, we store a set of
|
|
290 true statements along with the sources that are currently
|
|
291 asserting them and the requesters who currently know them. As
|
|
292 statements come and go, we make patches to send to requesters.
|
|
293 """
|
|
294
|
|
295 def __init__(self):
|
6
|
296 self.clients: Dict[SourceUri, Union[PatchSource, ReconnectingPatchSource]] = {} # (COLLECTOR is not listed)
|
0
|
297 self.handlers: Set[PatchSink] = set()
|
|
298 self.statements: ActiveStatements = ActiveStatements()
|
|
299
|
|
300 self._localStatements = LocalStatements(self._onPatch)
|
|
301
|
|
302 def state(self) -> Dict:
|
|
303 return {
|
|
304 'clients': sorted([ps.state() for ps in self.clients.values()], key=lambda r: r['reconnectedPatchSource']['url']),
|
|
305 'sseHandlers': sorted([h.state() for h in self.handlers], key=lambda r: (r['streamId'], r['created'])),
|
|
306 'statements': self.statements.state(),
|
|
307 }
|
|
308
|
|
309 def _sourcesForHandler(self, handler: PatchSink) -> List[SourceUri]:
|
|
310 streamId = handler.streamId
|
|
311 matches = [s for s in config['streams'] if s['id'] == streamId]
|
|
312 if len(matches) != 1:
|
|
313 raise ValueError("%s matches for %r" % (len(matches), streamId))
|
|
314 return [SourceUri(URIRef(s)) for s in matches[0]['sources']] + [COLLECTOR]
|
|
315
|
|
316 @ON_PATCH_CALLS.time()
|
|
317 def _onPatch(self, source: SourceUri, p: Patch, fullGraph: bool = False):
|
|
318 if fullGraph:
|
|
319 # a reconnect may need to resend the full graph even
|
|
320 # though we've already sent some statements
|
|
321 self.statements.replaceSourceStatements(source, p.addQuads)
|
|
322 else:
|
|
323 self.statements.applySourcePatch(source, p)
|
|
324
|
|
325 self._sendUpdatePatch()
|
|
326
|
|
327 if log.isEnabledFor(logging.DEBUG):
|
|
328 self.statements.pprintTable()
|
|
329
|
|
330 if source != COLLECTOR:
|
|
331 self._localStatements.setSourceState(source, ROOM['fullGraphReceived'] if fullGraph else ROOM['patchesReceived'])
|
|
332
|
|
333 @SEND_UPDATE_PATCH_CALLS.time()
|
|
334 def _sendUpdatePatch(self, handler: Optional[PatchSink] = None):
|
|
335 """
|
|
336 send a patch event out this handler to bring it up to date with
|
|
337 self.statements
|
|
338 """
|
|
339 now = time.time()
|
|
340 selected = self.handlers
|
|
341 if handler is not None:
|
|
342 if handler not in self.handlers:
|
|
343 log.error("called _sendUpdatePatch on a handler that's gone")
|
|
344 return
|
|
345 selected = {handler}
|
|
346 # reduce loops here- prepare all patches at once
|
|
347 for h in selected:
|
|
348 period = .9
|
|
349 if 'Raspbian' in h.request.headers.get('user-agent', ''):
|
|
350 period = 5
|
|
351 if h.lastPatchSentTime > now - period:
|
|
352 continue
|
|
353 p = self.statements.makeSyncPatch(h, set(self._sourcesForHandler(h)))
|
|
354 log.debug('makeSyncPatch for %r: %r', h, p.jsonRepr)
|
|
355 if not p.isNoop():
|
|
356 log.debug("send patch %s to %s", p.shortSummary(), h)
|
|
357 # This can be a giant line, which was a problem
|
|
358 # once. Might be nice for this service to try to break
|
|
359 # it up into multiple sends, although there's no
|
|
360 # guarantee at all since any single stmt could be any
|
|
361 # length.
|
9
|
362 h.sendEvent(message=jsonFromPatch(p), event=b'patch')
|
0
|
363 h.lastPatchSentTime = now
|
|
364 else:
|
|
365 log.debug('nothing to send to %s', h)
|
|
366
|
|
367 def addSseHandler(self, handler: PatchSink):
|
|
368 log.info('addSseHandler %r %r', handler, handler.streamId)
|
|
369
|
|
370 # fail early if id doesn't match
|
|
371 sources = self._sourcesForHandler(handler)
|
|
372
|
|
373 self.handlers.add(handler)
|
|
374
|
|
375 for source in sources:
|
|
376 if source not in self.clients and source != COLLECTOR:
|
|
377 log.debug('connect to patch source %s', source)
|
|
378 self._localStatements.setSourceState(source, ROOM['connect'])
|
|
379 self.clients[source] = ReconnectingPatchSource(source,
|
|
380 listener=lambda p, fullGraph, source=source: self._onPatch(source, p, fullGraph),
|
|
381 reconnectSecs=10)
|
|
382 log.debug('bring new client up to date')
|
|
383
|
|
384 self._sendUpdatePatch(handler)
|
|
385
|
|
386 def removeSseHandler(self, handler: PatchSink):
|
|
387 log.info('removeSseHandler %r', handler)
|
|
388 self.statements.discardHandler(handler)
|
|
389 for source in self._sourcesForHandler(handler):
|
|
390 for otherHandler in self.handlers:
|
|
391 if (otherHandler != handler and source in self._sourcesForHandler(otherHandler)):
|
|
392 # still in use
|
|
393 break
|
|
394 else:
|
|
395 self._stopClient(source)
|
|
396
|
|
397 self.handlers.remove(handler)
|
|
398
|
|
399 def _stopClient(self, url: SourceUri):
|
|
400 if url == COLLECTOR:
|
|
401 return
|
|
402
|
|
403 self.clients[url].stop()
|
|
404
|
|
405 self.statements.discardSource(url)
|
|
406
|
|
407 self._localStatements.setSourceState(url, None)
|
|
408 if url in self.clients:
|
|
409 del self.clients[url]
|
|
410
|
|
411 self.cleanup()
|
|
412
|
|
413 def cleanup(self):
|
|
414 """
|
|
415 despite the attempts above, we still get useless rows in the table
|
|
416 sometimes
|
|
417 """
|
|
418 with self.statements.postDeleteStatements() as garbage:
|
|
419 for stmt, (sources, handlers) in self.statements.table.items():
|
|
420 if not sources and not any(h in self.handlers for h in handlers):
|
|
421 garbage.add(stmt)
|
|
422
|
|
423
|
|
424 class State(cyclone.web.RequestHandler):
|
|
425
|
|
426 @GET_STATE_CALLS.time()
|
|
427 def get(self) -> None:
|
|
428 try:
|
|
429 state = self.settings.graphClients.state()
|
9
|
430 msg = json.dumps({'graphClients': state}, indent=2, default=lambda obj: '<unserializable>')
|
|
431 log.info(msg)
|
|
432 self.write(msg)
|
0
|
433 except Exception:
|
|
434 import traceback
|
|
435 traceback.print_exc()
|
|
436 raise
|
|
437
|
|
438
|
|
439 class GraphList(cyclone.web.RequestHandler):
|
|
440
|
|
441 def get(self) -> None:
|
|
442 self.write(json.dumps(config['streams']))
|
|
443
|
|
444
|
|
445 if __name__ == '__main__':
|
|
446 arg = docopt("""
|
|
447 Usage: sse_collector.py [options]
|
|
448
|
|
449 -v Verbose
|
|
450 -i Info level only
|
|
451 """)
|
|
452
|
9
|
453 if True:
|
0
|
454 enableTwistedLog()
|
|
455 log.setLevel(logging.DEBUG if arg['-v'] else logging.INFO)
|
|
456 defer.setDebugging(True)
|
|
457
|
|
458 graphClients = GraphClients()
|
|
459
|
6
|
460 reactor.listenTCP(
|
|
461 9072,
|
|
462 cyclone.web.Application( #
|
|
463 handlers=[
|
|
464 (r'/state', State),
|
|
465 (r'/graph/', GraphList),
|
|
466 (r'/graph/(.+)', PatchSink),
|
|
467 (r'/metrics', Metrics),
|
|
468 ],
|
|
469 graphClients=graphClients),
|
|
470 interface='::')
|
0
|
471 reactor.run()
|