Mercurial > code > home > repos > homeauto
annotate service/collector/collector.py @ 748:6d84cd3eb277
rename to s/sse//; use new browse link
Ignore-this: 912ae9ed9ffe9270445837f9a7308b5
author | drewp@bigasterisk.com |
---|---|
date | Thu, 13 Feb 2020 10:21:05 -0800 |
parents | service/collector/sse_collector.py@fe9cfc088a49 |
children | fafe86ae0b03 |
rev | line source |
---|---|
296 | 1 """ |
2 requesting /graph/foo returns an SSE patch stream that's the | |
3 result of fetching multiple other SSE patch streams. The result stream | |
4 may include new statements injected by this service. | |
5 | |
6 Future: | |
7 - filter out unneeded stmts from the sources | |
298
8d89da1915df
sse_collector now kind of gets concurrent requests right
drewp@bigasterisk.com
parents:
296
diff
changeset
|
8 - give a time resolution and concatenate any patches that come faster than that res |
296 | 9 """ |
10 from docopt import docopt | |
443
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
11 from greplin import scales |
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
12 from greplin.scales.cyclonehandler import StatsHandler |
470 | 13 from rdflib import Namespace, URIRef |
14 | |
15 from typing import TYPE_CHECKING | |
16 if TYPE_CHECKING: | |
17 from rdflib import StatementType | |
18 else: | |
19 class StatementType: pass # type: ignore | |
20 | |
692 | 21 |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
22 from rdflib.term import Node |
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
23 from twisted.internet import reactor, defer |
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
24 from typing import Callable, Dict, NewType, Tuple, Union, Any, Sequence, Set, List, Optional |
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
25 import cyclone.web, cyclone.sse |
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
26 import logging, collections, json, time |
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
27 |
595 | 28 from standardservice.logsetup import log, enableTwistedLog |
302 | 29 from patchablegraph import jsonFromPatch |
351
7716b1810d6c
reasoning & collector move into docker images
drewp@bigasterisk.com
parents:
316
diff
changeset
|
30 from rdfdb.patch import Patch |
449
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
31 |
595 | 32 from patchablegraph.patchsource import ReconnectingPatchSource |
449
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
33 |
715
fe9cfc088a49
consolidate debug page into ./index.html for now
drewp@bigasterisk.com
parents:
693
diff
changeset
|
34 from collector_config import config |
302 | 35 |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
36 #SourceUri = NewType('SourceUri', URIRef) # doesn't work |
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
37 class SourceUri(URIRef): pass |
449
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
38 |
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
39 |
300
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
40 ROOM = Namespace("http://projects.bigasterisk.com/room/") |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
41 COLLECTOR = SourceUri(URIRef('http://bigasterisk.com/sse_collector/')) |
300
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
42 |
443
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
43 STATS = scales.collection('/root', |
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
44 scales.PmfStat('getState'), |
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
45 scales.PmfStat('localStatementsPatch'), |
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
46 scales.PmfStat('makeSyncPatch'), |
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
47 scales.PmfStat('onPatch'), |
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
48 scales.PmfStat('sendUpdatePatch'), |
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
49 scales.PmfStat('replaceSourceStatements'), |
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
50 ) |
351
7716b1810d6c
reasoning & collector move into docker images
drewp@bigasterisk.com
parents:
316
diff
changeset
|
51 |
300
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
52 class LocalStatements(object): |
301 | 53 """ |
54 functions that make statements originating from sse_collector itself | |
55 """ | |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
56 def __init__(self, applyPatch: Callable[[URIRef, Patch], None]): |
300
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
57 self.applyPatch = applyPatch |
449
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
58 self._sourceState: Dict[SourceUri, URIRef] = {} # source: state URIRef |
306 | 59 |
443
2f7bc2ecf6b5
more of the stats and logging patch for collector
drewp@bigasterisk.com
parents:
442
diff
changeset
|
60 @STATS.localStatementsPatch.time() |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
61 def setSourceState(self, source: SourceUri, state: URIRef): |
300
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
62 """ |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
63 add a patch to the COLLECTOR graph about the state of this |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
64 source. state=None to remove the source. |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
65 """ |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
66 oldState = self._sourceState.get(source, None) |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
67 if state == oldState: |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
68 return |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
69 log.info('source state %s -> %s', source, state) |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
70 if oldState is None: |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
71 self._sourceState[source] = state |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
72 self.applyPatch(COLLECTOR, Patch(addQuads=[ |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
73 (COLLECTOR, ROOM['source'], source, COLLECTOR), |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
74 (source, ROOM['state'], state, COLLECTOR), |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
75 ])) |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
76 elif state is None: |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
77 del self._sourceState[source] |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
78 self.applyPatch(COLLECTOR, Patch(delQuads=[ |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
79 (COLLECTOR, ROOM['source'], source, COLLECTOR), |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
80 (source, ROOM['state'], oldState, COLLECTOR), |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
81 ])) |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
82 else: |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
83 self._sourceState[source] = state |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
84 self.applyPatch(COLLECTOR, Patch( |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
85 addQuads=[ |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
86 (source, ROOM['state'], state, COLLECTOR), |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
87 ], |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
88 delQuads=[ |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
89 (source, ROOM['state'], oldState, COLLECTOR), |
371af6e92b5e
local state statements and self.statements rewrite
drewp@bigasterisk.com
parents:
299
diff
changeset
|
90 ])) |
298
8d89da1915df
sse_collector now kind of gets concurrent requests right
drewp@bigasterisk.com
parents:
296
diff
changeset
|
91 |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
92 def abbrevTerm(t: Union[URIRef, Node]) -> Union[str, Node]: |
301 | 93 if isinstance(t, URIRef): |
94 return (t.replace('http://projects.bigasterisk.com/room/', 'room:') | |
446
346b85a9adbb
rollback the unicode(source) optimization. it was breaking all output to patch consumers
drewp@bigasterisk.com
parents:
444
diff
changeset
|
95 .replace('http://projects.bigasterisk.com/device/', 'dev:') |
301 | 96 .replace('http://bigasterisk.com/sse_collector/', 'sc:')) |
97 return t | |
98 | |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
99 def abbrevStmt(stmt: StatementType) -> str: |
470 | 100 return '(%s %s %s %s)' % (abbrevTerm(stmt[0]), abbrevTerm(stmt[1]), |
101 abbrevTerm(stmt[2]), abbrevTerm(stmt[3])) | |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
102 |
470 | 103 class PatchSink(cyclone.sse.SSEHandler): |
301 | 104 _handlerSerial = 0 |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
105 def __init__(self, application: cyclone.web.Application, request): |
296 | 106 cyclone.sse.SSEHandler.__init__(self, application, request) |
449
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
107 self.bound = False |
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
108 self.created = time.time() |
296 | 109 self.graphClients = self.settings.graphClients |
692 | 110 |
470 | 111 self._serial = PatchSink._handlerSerial |
112 PatchSink._handlerSerial += 1 | |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
113 self.lastPatchSentTime: float = 0.0 |
301 | 114 |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
115 def __repr__(self) -> str: |
301 | 116 return '<Handler #%s>' % self._serial |
306 | 117 |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
118 def state(self) -> Dict: |
306 | 119 return { |
439
124c921ad52d
stats->state to make room for greplin stats
drewp@bigasterisk.com
parents:
353
diff
changeset
|
120 'created': round(self.created, 2), |
124c921ad52d
stats->state to make room for greplin stats
drewp@bigasterisk.com
parents:
353
diff
changeset
|
121 'ageHours': round((time.time() - self.created) / 3600, 2), |
306 | 122 'streamId': self.streamId, |
650 | 123 'remoteIp': self.request.remote_ip, # wrong, need some forwarded-for thing |
124 'foafAgent': self.request.headers.get('X-Foaf-Agent'), | |
306 | 125 'userAgent': self.request.headers.get('user-agent'), |
126 } | |
692 | 127 |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
128 def bind(self, *args, **kwargs): |
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
129 self.streamId = args[0] |
449
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
130 |
301 | 131 self.graphClients.addSseHandler(self) |
449
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
132 # If something goes wrong with addSseHandler, I don't want to |
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
133 # try removeSseHandler. |
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
134 self.bound = True |
692 | 135 |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
136 def unbind(self) -> None: |
449
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
137 if self.bound: |
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
138 self.graphClients.removeSseHandler(self) |
296 | 139 |
692 | 140 |
470 | 141 StatementTable = Dict[StatementType, Tuple[Set[SourceUri], Set[PatchSink]]] |
142 | |
143 | |
144 class PostDeleter(object): | |
145 def __init__(self, statements: StatementTable): | |
146 self.statements = statements | |
147 | |
148 def __enter__(self): | |
149 self._garbage: List[StatementType] = [] | |
150 return self | |
692 | 151 |
470 | 152 def add(self, stmt: StatementType): |
153 self._garbage.append(stmt) | |
692 | 154 |
470 | 155 def __exit__(self, type, value, traceback): |
156 if type is not None: | |
157 raise | |
158 for stmt in self._garbage: | |
159 del self.statements[stmt] | |
160 | |
692 | 161 |
470 | 162 class ActiveStatements(object): |
163 def __init__(self): | |
164 # This table holds statements asserted by any of our sources | |
165 # plus local statements that we introduce (source is | |
166 # http://bigasterisk.com/sse_collector/). | |
167 self.table: StatementTable = collections.defaultdict( | |
168 lambda: (set(), set())) | |
169 | |
170 def state(self) -> Dict: | |
171 return { | |
172 'len': len(self.table), | |
173 } | |
692 | 174 |
470 | 175 def postDeleteStatements(self) -> PostDeleter: |
176 return PostDeleter(self.table) | |
692 | 177 |
470 | 178 def pprintTable(self) -> None: |
179 for i, (stmt, (sources, handlers)) in enumerate( | |
180 sorted(self.table.items())): | |
181 print("%03d. %-80s from %s to %s" % ( | |
182 i, | |
183 abbrevStmt(stmt), | |
184 [abbrevTerm(s) for s in sources], | |
185 handlers)) | |
186 | |
187 @STATS.makeSyncPatch.time() | |
188 def makeSyncPatch(self, handler: PatchSink, sources: Set[SourceUri]): | |
189 # todo: this could run all handlers at once, which is how we | |
190 # use it anyway | |
191 adds = [] | |
192 dels = [] | |
193 | |
194 with self.postDeleteStatements() as garbage: | |
195 for stmt, (stmtSources, handlers) in self.table.items(): | |
196 belongsInHandler = not sources.isdisjoint(stmtSources) | |
197 handlerHasIt = handler in handlers | |
198 #log.debug("%s belong=%s has=%s", | |
199 # abbrevStmt(stmt), belongsInHandler, handlerHasIt) | |
200 if belongsInHandler and not handlerHasIt: | |
201 adds.append(stmt) | |
202 handlers.add(handler) | |
203 elif not belongsInHandler and handlerHasIt: | |
204 dels.append(stmt) | |
205 handlers.remove(handler) | |
206 if not handlers and not stmtSources: | |
207 garbage.add(stmt) | |
208 | |
209 return Patch(addQuads=adds, delQuads=dels) | |
692 | 210 |
470 | 211 def applySourcePatch(self, source: SourceUri, p: Patch): |
212 for stmt in p.addQuads: | |
213 sourceUrls, handlers = self.table[stmt] | |
214 if source in sourceUrls: | |
215 raise ValueError("%s added stmt that it already had: %s" % | |
216 (source, abbrevStmt(stmt))) | |
217 sourceUrls.add(source) | |
692 | 218 |
470 | 219 with self.postDeleteStatements() as garbage: |
220 for stmt in p.delQuads: | |
221 sourceUrls, handlers = self.table[stmt] | |
222 if source not in sourceUrls: | |
223 raise ValueError("%s deleting stmt that it didn't have: %s" % | |
224 (source, abbrevStmt(stmt))) | |
225 sourceUrls.remove(source) | |
226 # this is rare, since some handler probably still has | |
227 # the stmt we're deleting, but it can happen e.g. when | |
228 # a handler was just deleted | |
229 if not sourceUrls and not handlers: | |
230 garbage.add(stmt) | |
231 | |
232 @STATS.replaceSourceStatements.time() | |
233 def replaceSourceStatements(self, source: SourceUri, | |
234 stmts: Sequence[StatementType]): | |
235 log.debug('replaceSourceStatements with %s stmts', len(stmts)) | |
236 newStmts = set(stmts) | |
237 | |
238 with self.postDeleteStatements() as garbage: | |
239 for stmt, (sources, handlers) in self.table.items(): | |
240 if source in sources: | |
241 if stmt not in stmts: | |
242 sources.remove(source) | |
243 if not sources and not handlers: | |
244 garbage.add(stmt) | |
245 else: | |
246 if stmt in stmts: | |
247 sources.add(source) | |
248 newStmts.discard(stmt) | |
249 | |
250 self.applySourcePatch(source, Patch(addQuads=newStmts, delQuads=[])) | |
251 | |
252 def discardHandler(self, handler: PatchSink): | |
253 with self.postDeleteStatements() as garbage: | |
254 for stmt, (sources, handlers) in self.table.items(): | |
255 handlers.discard(handler) | |
256 if not sources and not handlers: | |
257 garbage.add(stmt) | |
258 | |
259 def discardSource(self, source: SourceUri): | |
260 with self.postDeleteStatements() as garbage: | |
261 for stmt, (sources, handlers) in self.table.items(): | |
262 sources.discard(source) | |
263 if not sources and not handlers: | |
264 garbage.add(stmt) | |
265 | |
266 | |
692 | 267 |
470 | 268 class GraphClients(object): |
269 """ | |
270 All the active PatchSources and SSEHandlers | |
271 | |
272 To handle all the overlapping-statement cases, we store a set of | |
273 true statements along with the sources that are currently | |
274 asserting them and the requesters who currently know them. As | |
275 statements come and go, we make patches to send to requesters. | |
276 """ | |
277 def __init__(self): | |
278 self.clients: Dict[SourceUri, PatchSource] = {} # (COLLECTOR is not listed) | |
279 self.handlers: Set[PatchSink] = set() | |
280 self.statements: ActiveStatements = ActiveStatements() | |
692 | 281 |
470 | 282 self._localStatements = LocalStatements(self._onPatch) |
283 | |
284 def state(self) -> Dict: | |
285 return { | |
650 | 286 'clients': sorted([ps.state() for ps in self.clients.values()], |
287 key=lambda r: r['reconnectedPatchSource']['url']), | |
288 'sseHandlers': sorted([h.state() for h in self.handlers], | |
289 key=lambda r: (r['streamId'], r['created'])), | |
470 | 290 'statements': self.statements.state(), |
291 } | |
292 | |
293 def _sourcesForHandler(self, handler: PatchSink) -> List[SourceUri]: | |
294 streamId = handler.streamId | |
295 matches = [s for s in config['streams'] if s['id'] == streamId] | |
296 if len(matches) != 1: | |
297 raise ValueError("%s matches for %r" % (len(matches), streamId)) | |
298 return [SourceUri(URIRef(s)) for s in matches[0]['sources']] + [ | |
299 COLLECTOR] | |
300 | |
301 @STATS.onPatch.time() | |
302 def _onPatch(self, source: SourceUri, p: Patch, fullGraph: bool=False): | |
303 if fullGraph: | |
304 # a reconnect may need to resend the full graph even | |
305 # though we've already sent some statements | |
306 self.statements.replaceSourceStatements(source, p.addQuads) | |
307 else: | |
308 self.statements.applySourcePatch(source, p) | |
309 | |
310 self._sendUpdatePatch() | |
311 | |
312 if log.isEnabledFor(logging.DEBUG): | |
313 self.statements.pprintTable() | |
314 | |
315 if source != COLLECTOR: | |
316 self._localStatements.setSourceState( | |
317 source, | |
318 ROOM['fullGraphReceived'] if fullGraph else | |
319 ROOM['patchesReceived']) | |
320 | |
321 @STATS.sendUpdatePatch.time() | |
322 def _sendUpdatePatch(self, handler: Optional[PatchSink]=None): | |
323 """ | |
324 send a patch event out this handler to bring it up to date with | |
325 self.statements | |
326 """ | |
327 now = time.time() | |
328 selected = self.handlers | |
329 if handler is not None: | |
330 if handler not in self.handlers: | |
331 log.error("called _sendUpdatePatch on a handler that's gone") | |
332 return | |
333 selected = {handler} | |
334 # reduce loops here- prepare all patches at once | |
335 for h in selected: | |
336 period = .9 | |
337 if 'Raspbian' in h.request.headers.get('user-agent', ''): | |
338 period = 5 | |
339 if h.lastPatchSentTime > now - period: | |
340 continue | |
341 p = self.statements.makeSyncPatch(h, set(self._sourcesForHandler(h))) | |
342 log.debug('makeSyncPatch for %r: %r', h, p.jsonRepr) | |
343 if not p.isNoop(): | |
344 log.debug("send patch %s to %s", p.shortSummary(), h) | |
345 # This can be a giant line, which was a problem | |
346 # once. Might be nice for this service to try to break | |
347 # it up into multiple sends, although there's no | |
348 # guarantee at all since any single stmt could be any | |
349 # length. | |
350 h.sendEvent(message=jsonFromPatch(p).encode('utf8'), | |
351 event=b'patch') | |
352 h.lastPatchSentTime = now | |
353 else: | |
354 log.debug('nothing to send to %s', h) | |
692 | 355 |
470 | 356 def addSseHandler(self, handler: PatchSink): |
357 log.info('addSseHandler %r %r', handler, handler.streamId) | |
358 | |
359 # fail early if id doesn't match | |
360 sources = self._sourcesForHandler(handler) | |
361 | |
362 self.handlers.add(handler) | |
692 | 363 |
470 | 364 for source in sources: |
365 if source not in self.clients and source != COLLECTOR: | |
366 log.debug('connect to patch source %s', source) | |
367 self._localStatements.setSourceState(source, ROOM['connect']) | |
368 self.clients[source] = ReconnectingPatchSource( | |
369 source, | |
370 listener=lambda p, fullGraph, source=source: self._onPatch( | |
371 source, p, fullGraph), | |
372 reconnectSecs=10) | |
373 log.debug('bring new client up to date') | |
374 | |
375 self._sendUpdatePatch(handler) | |
692 | 376 |
470 | 377 def removeSseHandler(self, handler: PatchSink): |
378 log.info('removeSseHandler %r', handler) | |
379 self.statements.discardHandler(handler) | |
380 for source in self._sourcesForHandler(handler): | |
381 for otherHandler in self.handlers: | |
382 if (otherHandler != handler and | |
383 source in self._sourcesForHandler(otherHandler)): | |
384 # still in use | |
385 break | |
386 else: | |
387 self._stopClient(source) | |
692 | 388 |
470 | 389 self.handlers.remove(handler) |
390 | |
391 def _stopClient(self, url: SourceUri): | |
392 if url == COLLECTOR: | |
393 return | |
692 | 394 |
470 | 395 self.clients[url].stop() |
396 | |
397 self.statements.discardSource(url) | |
692 | 398 |
470 | 399 self._localStatements.setSourceState(url, None) |
400 if url in self.clients: | |
401 del self.clients[url] | |
402 | |
403 self.cleanup() | |
692 | 404 |
470 | 405 def cleanup(self): |
406 """ | |
407 despite the attempts above, we still get useless rows in the table | |
408 sometimes | |
409 """ | |
410 with self.statements.postDeleteStatements() as garbage: | |
411 for stmt, (sources, handlers) in self.statements.table.items(): | |
412 if not sources and not any(h in self.handlers for h in handlers): | |
413 garbage.add(stmt) | |
692 | 414 |
470 | 415 |
439
124c921ad52d
stats->state to make room for greplin stats
drewp@bigasterisk.com
parents:
353
diff
changeset
|
416 class State(cyclone.web.RequestHandler): |
124c921ad52d
stats->state to make room for greplin stats
drewp@bigasterisk.com
parents:
353
diff
changeset
|
417 @STATS.getState.time() |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
418 def get(self) -> None: |
306 | 419 try: |
439
124c921ad52d
stats->state to make room for greplin stats
drewp@bigasterisk.com
parents:
353
diff
changeset
|
420 state = self.settings.graphClients.state() |
595 | 421 self.write(json.dumps({'graphClients': state}, indent=2, |
422 default=lambda obj: '<unserializable>')) | |
423 except Exception: | |
306 | 424 import traceback; traceback.print_exc() |
425 raise | |
693
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
426 |
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
427 class GraphList(cyclone.web.RequestHandler): |
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
428 def get(self) -> None: |
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
429 self.write(json.dumps(config['streams'])) |
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
430 |
296 | 431 if __name__ == '__main__': |
432 arg = docopt(""" | |
433 Usage: sse_collector.py [options] | |
434 | |
435 -v Verbose | |
693
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
436 -i Info level only |
296 | 437 """) |
693
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
438 |
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
439 if arg['-v'] or arg['-i']: |
449
ef7eba0551f2
collector partial py3+types update. WIP
drewp@bigasterisk.com
parents:
446
diff
changeset
|
440 enableTwistedLog() |
693
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
441 log.setLevel(logging.DEBUG if arg['-v'] else logging.INFO) |
442
ee74dc3b58fb
collector build improvements; stats and logging
drewp@bigasterisk.com
parents:
439
diff
changeset
|
442 defer.setDebugging(True) |
296 | 443 |
444 | |
445 graphClients = GraphClients() | |
442
ee74dc3b58fb
collector build improvements; stats and logging
drewp@bigasterisk.com
parents:
439
diff
changeset
|
446 #exporter = InfluxExporter(... to export some stats values |
692 | 447 |
296 | 448 reactor.listenTCP( |
451
17a556ddc5ac
add types to sse_collector.py. Surprisingly few bugs found.
drewp@bigasterisk.com
parents:
449
diff
changeset
|
449 9072, |
296 | 450 cyclone.web.Application( |
451 handlers=[ | |
650 | 452 (r"/()", cyclone.web.StaticFileHandler, { |
715
fe9cfc088a49
consolidate debug page into ./index.html for now
drewp@bigasterisk.com
parents:
693
diff
changeset
|
453 "path": ".", "default_filename": "index.html"}), |
439
124c921ad52d
stats->state to make room for greplin stats
drewp@bigasterisk.com
parents:
353
diff
changeset
|
454 (r'/state', State), |
693
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
455 (r'/graph/', GraphList), |
be2fbdbdf549
collector: add /graphlist, plus logging updates
drewp@bigasterisk.com
parents:
692
diff
changeset
|
456 (r'/graph/(.+)', PatchSink), |
442
ee74dc3b58fb
collector build improvements; stats and logging
drewp@bigasterisk.com
parents:
439
diff
changeset
|
457 (r'/stats/(.*)', StatsHandler, {'serverName': 'collector'}), |
296 | 458 ], |
459 graphClients=graphClients), | |
460 interface='::') | |
461 reactor.run() |