0
|
1 """
|
|
2 requesting /graph/foo returns an SSE patch stream that's the
|
|
3 result of fetching multiple other SSE patch streams. The result stream
|
|
4 may include new statements injected by this service.
|
|
5
|
|
6 Future:
|
|
7 - filter out unneeded stmts from the sources
|
|
8 - give a time resolution and concatenate any patches that come faster than that res
|
|
9 """
|
|
10 import collections
|
|
11 import json
|
|
12 import logging
|
|
13 import time
|
|
14 from typing import (Any, Callable, Dict, List, NewType, Optional, Sequence, Set, Tuple, Union)
|
|
15
|
|
16 import cyclone.sse
|
|
17 import cyclone.web
|
|
18 from docopt import docopt
|
|
19 from patchablegraph import jsonFromPatch
|
|
20 from patchablegraph.patchsource import PatchSource, ReconnectingPatchSource
|
|
21 from prometheus_client import Counter, Gauge, Histogram, Summary
|
|
22 from prometheus_client.exposition import generate_latest
|
|
23 from prometheus_client.registry import REGISTRY
|
|
24 from rdfdb.patch import Patch
|
|
25 from rdflib import Namespace, URIRef
|
1
|
26 from rdflib.term import Node
|
0
|
27 from standardservice.logsetup import enableTwistedLog, log
|
|
28 from twisted.internet import defer, reactor
|
|
29
|
|
30 from collector_config import config
|
|
31
|
1
|
32 Statement = Tuple[Node, Node, Node, Node]
|
0
|
33
|
|
34 #SourceUri = NewType('SourceUri', URIRef) # doesn't work
|
|
35 class SourceUri(URIRef):
|
|
36 pass
|
|
37
|
|
38
|
|
39 ROOM = Namespace("http://projects.bigasterisk.com/room/")
|
|
40 COLLECTOR = SourceUri(URIRef('http://bigasterisk.com/sse_collector/'))
|
|
41
|
|
42 GET_STATE_CALLS = Summary("get_state_calls", 'calls')
|
|
43 LOCAL_STATEMENTS_PATCH_CALLS = Summary("local_statements_patch_calls", 'calls')
|
|
44 MAKE_SYNC_PATCH_CALLS = Summary("make_sync_patch_calls", 'calls')
|
|
45 ON_PATCH_CALLS = Summary("on_patch_calls", 'calls')
|
|
46 SEND_UPDATE_PATCH_CALLS = Summary("send_update_patch_calls", 'calls')
|
|
47 REPLACE_SOURCE_STATEMENTS_CALLS = Summary("replace_source_statements_calls", 'calls')
|
|
48
|
|
49
|
|
50 class Metrics(cyclone.web.RequestHandler):
|
|
51
|
|
52 def get(self):
|
|
53 self.add_header('content-type', 'text/plain')
|
|
54 self.write(generate_latest(REGISTRY))
|
|
55
|
|
56
|
|
57 class LocalStatements(object):
|
|
58 """
|
|
59 functions that make statements originating from sse_collector itself
|
|
60 """
|
|
61
|
|
62 def __init__(self, applyPatch: Callable[[URIRef, Patch], None]):
|
|
63 self.applyPatch = applyPatch
|
|
64 self._sourceState: Dict[SourceUri, URIRef] = {} # source: state URIRef
|
|
65
|
|
66 @LOCAL_STATEMENTS_PATCH_CALLS.time()
|
|
67 def setSourceState(self, source: SourceUri, state: URIRef):
|
|
68 """
|
|
69 add a patch to the COLLECTOR graph about the state of this
|
|
70 source. state=None to remove the source.
|
|
71 """
|
|
72 oldState = self._sourceState.get(source, None)
|
|
73 if state == oldState:
|
|
74 return
|
|
75 log.info('source state %s -> %s', source, state)
|
|
76 if oldState is None:
|
|
77 self._sourceState[source] = state
|
|
78 self.applyPatch(COLLECTOR, Patch(addQuads=[
|
|
79 (COLLECTOR, ROOM['source'], source, COLLECTOR),
|
|
80 (source, ROOM['state'], state, COLLECTOR),
|
|
81 ]))
|
|
82 elif state is None:
|
|
83 del self._sourceState[source]
|
|
84 self.applyPatch(COLLECTOR, Patch(delQuads=[
|
|
85 (COLLECTOR, ROOM['source'], source, COLLECTOR),
|
|
86 (source, ROOM['state'], oldState, COLLECTOR),
|
|
87 ]))
|
|
88 else:
|
|
89 self._sourceState[source] = state
|
|
90 self.applyPatch(COLLECTOR, Patch(addQuads=[
|
|
91 (source, ROOM['state'], state, COLLECTOR),
|
|
92 ], delQuads=[
|
|
93 (source, ROOM['state'], oldState, COLLECTOR),
|
|
94 ]))
|
|
95
|
|
96
|
|
97 def abbrevTerm(t: Union[URIRef, Node]) -> Union[str, Node]:
|
|
98 if isinstance(t, URIRef):
|
|
99 return (t.replace('http://projects.bigasterisk.com/room/', 'room:').replace('http://projects.bigasterisk.com/device/',
|
|
100 'dev:').replace('http://bigasterisk.com/sse_collector/', 'sc:'))
|
|
101 return t
|
|
102
|
|
103
|
|
104 def abbrevStmt(stmt: Statement) -> str:
|
|
105 return '(%s %s %s %s)' % (abbrevTerm(stmt[0]), abbrevTerm(stmt[1]), abbrevTerm(stmt[2]), abbrevTerm(stmt[3]))
|
|
106
|
|
107
|
|
108 class PatchSink(cyclone.sse.SSEHandler):
|
|
109 _handlerSerial = 0
|
|
110
|
|
111 def __init__(self, application: cyclone.web.Application, request):
|
|
112 cyclone.sse.SSEHandler.__init__(self, application, request)
|
|
113 self.bound = False
|
|
114 self.created = time.time()
|
|
115 self.graphClients = self.settings.graphClients
|
|
116
|
|
117 self._serial = PatchSink._handlerSerial
|
|
118 PatchSink._handlerSerial += 1
|
|
119 self.lastPatchSentTime: float = 0.0
|
|
120
|
|
121 def __repr__(self) -> str:
|
|
122 return '<Handler #%s>' % self._serial
|
|
123
|
|
124 def state(self) -> Dict:
|
|
125 return {
|
|
126 'created': round(self.created, 2),
|
|
127 'ageHours': round((time.time() - self.created) / 3600, 2),
|
|
128 'streamId': self.streamId,
|
|
129 'remoteIp': self.request.remote_ip, # wrong, need some forwarded-for thing
|
|
130 'foafAgent': self.request.headers.get('X-Foaf-Agent'),
|
|
131 'userAgent': self.request.headers.get('user-agent'),
|
|
132 }
|
|
133
|
|
134 def bind(self, *args, **kwargs):
|
|
135 self.streamId = args[0]
|
|
136
|
|
137 self.graphClients.addSseHandler(self)
|
|
138 # If something goes wrong with addSseHandler, I don't want to
|
|
139 # try removeSseHandler.
|
|
140 self.bound = True
|
|
141
|
|
142 def unbind(self) -> None:
|
|
143 if self.bound:
|
|
144 self.graphClients.removeSseHandler(self)
|
|
145
|
|
146
|
|
147 StatementTable = Dict[Statement, Tuple[Set[SourceUri], Set[PatchSink]]]
|
|
148
|
|
149
|
|
150 class PostDeleter(object):
|
|
151
|
|
152 def __init__(self, statements: StatementTable):
|
|
153 self.statements = statements
|
|
154
|
|
155 def __enter__(self):
|
|
156 self._garbage: List[Statement] = []
|
|
157 return self
|
|
158
|
|
159 def add(self, stmt: Statement):
|
|
160 self._garbage.append(stmt)
|
|
161
|
|
162 def __exit__(self, type, value, traceback):
|
|
163 if type is not None:
|
|
164 raise NotImplementedError()
|
|
165 for stmt in self._garbage:
|
|
166 del self.statements[stmt]
|
|
167
|
|
168
|
|
169 class ActiveStatements(object):
|
|
170
|
|
171 def __init__(self):
|
|
172 # This table holds statements asserted by any of our sources
|
|
173 # plus local statements that we introduce (source is
|
|
174 # http://bigasterisk.com/sse_collector/).
|
|
175 self.table: StatementTable = collections.defaultdict(lambda: (set(), set()))
|
|
176
|
|
177 def state(self) -> Dict:
|
|
178 return {
|
|
179 'len': len(self.table),
|
|
180 }
|
|
181
|
|
182 def postDeleteStatements(self) -> PostDeleter:
|
|
183 return PostDeleter(self.table)
|
|
184
|
|
185 def pprintTable(self) -> None:
|
|
186 for i, (stmt, (sources, handlers)) in enumerate(sorted(self.table.items())):
|
|
187 print("%03d. %-80s from %s to %s" % (i, abbrevStmt(stmt), [abbrevTerm(s) for s in sources], handlers))
|
|
188
|
|
189 @MAKE_SYNC_PATCH_CALLS.time()
|
|
190 def makeSyncPatch(self, handler: PatchSink, sources: Set[SourceUri]):
|
|
191 # todo: this could run all handlers at once, which is how we
|
|
192 # use it anyway
|
|
193 adds = []
|
|
194 dels = []
|
|
195
|
|
196 with self.postDeleteStatements() as garbage:
|
|
197 for stmt, (stmtSources, handlers) in self.table.items():
|
|
198 belongsInHandler = not sources.isdisjoint(stmtSources)
|
|
199 handlerHasIt = handler in handlers
|
|
200 # log.debug("%s belong=%s has=%s",
|
|
201 # abbrevStmt(stmt), belongsInHandler, handlerHasIt)
|
|
202 if belongsInHandler and not handlerHasIt:
|
|
203 adds.append(stmt)
|
|
204 handlers.add(handler)
|
|
205 elif not belongsInHandler and handlerHasIt:
|
|
206 dels.append(stmt)
|
|
207 handlers.remove(handler)
|
|
208 if not handlers and not stmtSources:
|
|
209 garbage.add(stmt)
|
|
210
|
|
211 return Patch(addQuads=adds, delQuads=dels)
|
|
212
|
|
213 def applySourcePatch(self, source: SourceUri, p: Patch):
|
|
214 for stmt in p.addQuads:
|
|
215 sourceUrls, handlers = self.table[stmt]
|
|
216 if source in sourceUrls:
|
|
217 raise ValueError("%s added stmt that it already had: %s" % (source, abbrevStmt(stmt)))
|
|
218 sourceUrls.add(source)
|
|
219
|
|
220 with self.postDeleteStatements() as garbage:
|
|
221 for stmt in p.delQuads:
|
|
222 sourceUrls, handlers = self.table[stmt]
|
|
223 if source not in sourceUrls:
|
|
224 raise ValueError("%s deleting stmt that it didn't have: %s" % (source, abbrevStmt(stmt)))
|
|
225 sourceUrls.remove(source)
|
|
226 # this is rare, since some handler probably still has
|
|
227 # the stmt we're deleting, but it can happen e.g. when
|
|
228 # a handler was just deleted
|
|
229 if not sourceUrls and not handlers:
|
|
230 garbage.add(stmt)
|
|
231
|
|
232 @REPLACE_SOURCE_STATEMENTS_CALLS.time()
|
|
233 def replaceSourceStatements(self, source: SourceUri, stmts: Sequence[Statement]):
|
|
234 log.debug('replaceSourceStatements with %s stmts', len(stmts))
|
|
235 newStmts = set(stmts)
|
|
236
|
|
237 with self.postDeleteStatements() as garbage:
|
|
238 for stmt, (sources, handlers) in self.table.items():
|
|
239 if source in sources:
|
|
240 if stmt not in stmts:
|
|
241 sources.remove(source)
|
|
242 if not sources and not handlers:
|
|
243 garbage.add(stmt)
|
|
244 else:
|
|
245 if stmt in stmts:
|
|
246 sources.add(source)
|
|
247 newStmts.discard(stmt)
|
|
248
|
|
249 self.applySourcePatch(source, Patch(addQuads=newStmts, delQuads=[]))
|
|
250
|
|
251 def discardHandler(self, handler: PatchSink):
|
|
252 with self.postDeleteStatements() as garbage:
|
|
253 for stmt, (sources, handlers) in self.table.items():
|
|
254 handlers.discard(handler)
|
|
255 if not sources and not handlers:
|
|
256 garbage.add(stmt)
|
|
257
|
|
258 def discardSource(self, source: SourceUri):
|
|
259 with self.postDeleteStatements() as garbage:
|
|
260 for stmt, (sources, handlers) in self.table.items():
|
|
261 sources.discard(source)
|
|
262 if not sources and not handlers:
|
|
263 garbage.add(stmt)
|
|
264
|
|
265
|
|
266 class GraphClients(object):
|
|
267 """
|
|
268 All the active PatchSources and SSEHandlers
|
|
269
|
|
270 To handle all the overlapping-statement cases, we store a set of
|
|
271 true statements along with the sources that are currently
|
|
272 asserting them and the requesters who currently know them. As
|
|
273 statements come and go, we make patches to send to requesters.
|
|
274 """
|
|
275
|
|
276 def __init__(self):
|
|
277 self.clients: Dict[SourceUri, PatchSource] = {} # (COLLECTOR is not listed)
|
|
278 self.handlers: Set[PatchSink] = set()
|
|
279 self.statements: ActiveStatements = ActiveStatements()
|
|
280
|
|
281 self._localStatements = LocalStatements(self._onPatch)
|
|
282
|
|
283 def state(self) -> Dict:
|
|
284 return {
|
|
285 'clients': sorted([ps.state() for ps in self.clients.values()], key=lambda r: r['reconnectedPatchSource']['url']),
|
|
286 'sseHandlers': sorted([h.state() for h in self.handlers], key=lambda r: (r['streamId'], r['created'])),
|
|
287 'statements': self.statements.state(),
|
|
288 }
|
|
289
|
|
290 def _sourcesForHandler(self, handler: PatchSink) -> List[SourceUri]:
|
|
291 streamId = handler.streamId
|
|
292 matches = [s for s in config['streams'] if s['id'] == streamId]
|
|
293 if len(matches) != 1:
|
|
294 raise ValueError("%s matches for %r" % (len(matches), streamId))
|
|
295 return [SourceUri(URIRef(s)) for s in matches[0]['sources']] + [COLLECTOR]
|
|
296
|
|
297 @ON_PATCH_CALLS.time()
|
|
298 def _onPatch(self, source: SourceUri, p: Patch, fullGraph: bool = False):
|
|
299 if fullGraph:
|
|
300 # a reconnect may need to resend the full graph even
|
|
301 # though we've already sent some statements
|
|
302 self.statements.replaceSourceStatements(source, p.addQuads)
|
|
303 else:
|
|
304 self.statements.applySourcePatch(source, p)
|
|
305
|
|
306 self._sendUpdatePatch()
|
|
307
|
|
308 if log.isEnabledFor(logging.DEBUG):
|
|
309 self.statements.pprintTable()
|
|
310
|
|
311 if source != COLLECTOR:
|
|
312 self._localStatements.setSourceState(source, ROOM['fullGraphReceived'] if fullGraph else ROOM['patchesReceived'])
|
|
313
|
|
314 @SEND_UPDATE_PATCH_CALLS.time()
|
|
315 def _sendUpdatePatch(self, handler: Optional[PatchSink] = None):
|
|
316 """
|
|
317 send a patch event out this handler to bring it up to date with
|
|
318 self.statements
|
|
319 """
|
|
320 now = time.time()
|
|
321 selected = self.handlers
|
|
322 if handler is not None:
|
|
323 if handler not in self.handlers:
|
|
324 log.error("called _sendUpdatePatch on a handler that's gone")
|
|
325 return
|
|
326 selected = {handler}
|
|
327 # reduce loops here- prepare all patches at once
|
|
328 for h in selected:
|
|
329 period = .9
|
|
330 if 'Raspbian' in h.request.headers.get('user-agent', ''):
|
|
331 period = 5
|
|
332 if h.lastPatchSentTime > now - period:
|
|
333 continue
|
|
334 p = self.statements.makeSyncPatch(h, set(self._sourcesForHandler(h)))
|
|
335 log.debug('makeSyncPatch for %r: %r', h, p.jsonRepr)
|
|
336 if not p.isNoop():
|
|
337 log.debug("send patch %s to %s", p.shortSummary(), h)
|
|
338 # This can be a giant line, which was a problem
|
|
339 # once. Might be nice for this service to try to break
|
|
340 # it up into multiple sends, although there's no
|
|
341 # guarantee at all since any single stmt could be any
|
|
342 # length.
|
|
343 h.sendEvent(message=jsonFromPatch(p).encode('utf8'), event=b'patch')
|
|
344 h.lastPatchSentTime = now
|
|
345 else:
|
|
346 log.debug('nothing to send to %s', h)
|
|
347
|
|
348 def addSseHandler(self, handler: PatchSink):
|
|
349 log.info('addSseHandler %r %r', handler, handler.streamId)
|
|
350
|
|
351 # fail early if id doesn't match
|
|
352 sources = self._sourcesForHandler(handler)
|
|
353
|
|
354 self.handlers.add(handler)
|
|
355
|
|
356 for source in sources:
|
|
357 if source not in self.clients and source != COLLECTOR:
|
|
358 log.debug('connect to patch source %s', source)
|
|
359 self._localStatements.setSourceState(source, ROOM['connect'])
|
|
360 self.clients[source] = ReconnectingPatchSource(source,
|
|
361 listener=lambda p, fullGraph, source=source: self._onPatch(source, p, fullGraph),
|
|
362 reconnectSecs=10)
|
|
363 log.debug('bring new client up to date')
|
|
364
|
|
365 self._sendUpdatePatch(handler)
|
|
366
|
|
367 def removeSseHandler(self, handler: PatchSink):
|
|
368 log.info('removeSseHandler %r', handler)
|
|
369 self.statements.discardHandler(handler)
|
|
370 for source in self._sourcesForHandler(handler):
|
|
371 for otherHandler in self.handlers:
|
|
372 if (otherHandler != handler and source in self._sourcesForHandler(otherHandler)):
|
|
373 # still in use
|
|
374 break
|
|
375 else:
|
|
376 self._stopClient(source)
|
|
377
|
|
378 self.handlers.remove(handler)
|
|
379
|
|
380 def _stopClient(self, url: SourceUri):
|
|
381 if url == COLLECTOR:
|
|
382 return
|
|
383
|
|
384 self.clients[url].stop()
|
|
385
|
|
386 self.statements.discardSource(url)
|
|
387
|
|
388 self._localStatements.setSourceState(url, None)
|
|
389 if url in self.clients:
|
|
390 del self.clients[url]
|
|
391
|
|
392 self.cleanup()
|
|
393
|
|
394 def cleanup(self):
|
|
395 """
|
|
396 despite the attempts above, we still get useless rows in the table
|
|
397 sometimes
|
|
398 """
|
|
399 with self.statements.postDeleteStatements() as garbage:
|
|
400 for stmt, (sources, handlers) in self.statements.table.items():
|
|
401 if not sources and not any(h in self.handlers for h in handlers):
|
|
402 garbage.add(stmt)
|
|
403
|
|
404
|
|
405 class State(cyclone.web.RequestHandler):
|
|
406
|
|
407 @GET_STATE_CALLS.time()
|
|
408 def get(self) -> None:
|
|
409 try:
|
|
410 state = self.settings.graphClients.state()
|
|
411 self.write(json.dumps({'graphClients': state}, indent=2, default=lambda obj: '<unserializable>'))
|
|
412 except Exception:
|
|
413 import traceback
|
|
414 traceback.print_exc()
|
|
415 raise
|
|
416
|
|
417
|
|
418 class GraphList(cyclone.web.RequestHandler):
|
|
419
|
|
420 def get(self) -> None:
|
|
421 self.write(json.dumps(config['streams']))
|
|
422
|
|
423
|
|
424 if __name__ == '__main__':
|
|
425 arg = docopt("""
|
|
426 Usage: sse_collector.py [options]
|
|
427
|
|
428 -v Verbose
|
|
429 -i Info level only
|
|
430 """)
|
|
431
|
|
432 if arg['-v'] or arg['-i']:
|
|
433 enableTwistedLog()
|
|
434 log.setLevel(logging.DEBUG if arg['-v'] else logging.INFO)
|
|
435 defer.setDebugging(True)
|
|
436
|
|
437 graphClients = GraphClients()
|
|
438
|
|
439 reactor.listenTCP(9072,
|
|
440 cyclone.web.Application(handlers=[
|
|
441 (r"/()", cyclone.web.StaticFileHandler, {
|
|
442 "path": ".",
|
|
443 "default_filename": "index.html"
|
|
444 }),
|
|
445 (r'/state', State),
|
|
446 (r'/graph/', GraphList),
|
|
447 (r'/graph/(.+)', PatchSink),
|
|
448 (r'/metrics', Metrics),
|
|
449 ],
|
|
450 graphClients=graphClients),
|
|
451 interface='::')
|
|
452 reactor.run()
|