Mercurial > code > home > repos > homeauto
annotate service/wifi/scrape.py @ 1729:41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
author | drewp@bigasterisk.com |
---|---|
date | Fri, 30 Jun 2023 22:04:36 -0700 |
parents | f88ff1021ee0 |
children |
rev | line source |
---|---|
1679 | 1 import base64 |
2 import json | |
3 import logging | |
4 import re | |
5 import time | |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
6 from typing import Awaitable, Callable, Iterable, List, cast |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
7 |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
8 import aiohttp |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
9 from rdflib import RDF, RDFS, Graph, Literal, Namespace, URIRef |
0 | 10 |
11 log = logging.getLogger() | |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
12 ROOM = Namespace("http://projects.bigasterisk.com/room/") |
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
13 AST = Namespace("http://bigasterisk.com/") |
0 | 14 |
1679 | 15 |
422 | 16 def macUri(macAddress: str) -> URIRef: |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
17 return URIRef("http://bigasterisk.com/mac/%s" % macAddress.lower()) |
51
d2842eedd56d
rewrite tomatowifi from restkit to cyclone httpclient
drewp@bigasterisk.com
parents:
36
diff
changeset
|
18 |
1679 | 19 |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
20 class SeenNode(object): |
1679 | 21 |
565
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
22 def __init__(self, uri: URIRef, mac: str, ip: str, stmts: Iterable): |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
23 self.connected = True |
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
24 self.uri = uri |
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
25 self.mac = mac |
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
26 self.ip = ip |
565
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
27 self.stmts = stmts |
1679 | 28 |
29 | |
0 | 30 class Wifi(object): |
31 """ | |
32 gather the users of wifi from the tomato routers | |
33 """ | |
1679 | 34 |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
35 def __init__(self, config: Graph): |
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
36 self.config = config |
51
d2842eedd56d
rewrite tomatowifi from restkit to cyclone httpclient
drewp@bigasterisk.com
parents:
36
diff
changeset
|
37 |
1679 | 38 async def getPresentMacAddrs(self) -> List[SeenNode]: |
39 rows = await self._loader()(self.config) | |
40 return rows | |
41 | |
42 def _loader(self) -> Callable[[Graph], Awaitable[List[SeenNode]]]: | |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
43 cls = self.config.value(ROOM['wifiScraper'], RDF.type) |
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
44 if cls == ROOM['OrbiScraper']: |
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
45 return loadOrbiData |
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
46 raise NotImplementedError(cls) |
175
c81a451f9b26
rewrites for better graph export, removal of dhcp reader
drewp@bigasterisk.com
parents:
62
diff
changeset
|
47 |
c81a451f9b26
rewrites for better graph export, removal of dhcp reader
drewp@bigasterisk.com
parents:
62
diff
changeset
|
48 |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
49 async def fetch(url, user, passwd) -> str: |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
50 basicAuth = '%s:%s' % (user, passwd) |
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
51 headers = { |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
52 'Authorization': 'Basic %s' % base64.encodebytes(basicAuth.encode('ascii')).strip().decode('ascii'), |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
53 } |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
54 async with aiohttp.ClientSession() as session: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
55 async with session.get(url, headers=headers) as response: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
56 if response.status != 200: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
57 raise ValueError(f'{response.status=}') |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
58 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
59 return await response.text() |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
60 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
61 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
62 async def loadConnectedMacsFromSatellites(satIp, user, passwd): |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
63 body = await fetch(f'http://{satIp}/refresh_dev.aspx', user, passwd) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
64 j = json.loads(body) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
65 out = [] |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
66 for row in j['device']: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
67 out.append({'mac': row['mac'], 'type': row['type']}) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
68 return out |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
69 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
70 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
71 def findSatellites(satMacs, jrows): |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
72 satIps = [] |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
73 for row in jrows: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
74 mac = row['mac'].lower() |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
75 for label, satMac in satMacs: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
76 if mac == satMac: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
77 satIps.append((label, row['ip'])) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
78 return satIps |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
79 |
1679 | 80 |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
81 async def loadOrbiData(config: Graph) -> List[SeenNode]: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
82 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
83 def confStr(s, p) -> str: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
84 return cast(Literal, config.value(s, p)).toPython() |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
85 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
86 user = confStr(ROOM['wifiScraper'], ROOM['user']) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
87 passwd = confStr(ROOM['wifiScraper'], ROOM['password']) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
88 uri = confStr(ROOM['wifiScraper'], ROOM['deviceInfoPage']) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
89 satelliteNamesAndMacs = [ |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
90 (confStr(s, RDFS.label), confStr(s, ROOM['mac'])) # |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
91 for s in config.objects(ROOM['wifiScraper'], ROOM['satellite']) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
92 ] |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
93 body = await fetch(f"{uri}?ts={time.time()}", user, passwd) |
421 | 94 |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
95 if not body.startswith(('device=', 'device_changed=0\ndevice=', 'device_changed=1\ndevice=')): |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
96 raise ValueError(body) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
97 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
98 outNodes = [] |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
99 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
100 orbiReportRows = json.loads(body.split('device=', 1)[-1]) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
101 satelliteNamesAndIps = findSatellites(satelliteNamesAndMacs, orbiReportRows) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
102 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
103 satNameForMac = {} |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
104 for sat, satIp in satelliteNamesAndIps: |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
105 for row in await loadConnectedMacsFromSatellites(satIp, user, passwd): |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
106 satNameForMac[row['mac'].lower()] = ROOM[sat] |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
107 |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
108 for rowNum, row in enumerate(orbiReportRows): |
1679 | 109 log.debug('response row [%d] %r', rowNum, row) |
110 if not re.match(r'\w\w:\w\w:\w\w:\w\w:\w\w:\w\w', row['mac']): | |
111 raise ValueError(f"corrupt response: mac was {row['mac']!r}") | |
565
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
112 triples = set() |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
113 uri = macUri(row['mac'].lower()) |
1679 | 114 |
565
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
115 if row['contype'] in ['2.4G', '5G']: |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
116 orbi = macUri(row['conn_orbi_mac']) |
641
5fd7aef3b2b2
index page updates. include wifi badg data.
drewp@bigasterisk.com
parents:
565
diff
changeset
|
117 ct = ROOM['wifiBand/%s' % row['contype']] |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
118 # triples.add((uri, ROOM['connectedToAp'], orbi)) # always reports the RBR50, i think |
641
5fd7aef3b2b2
index page updates. include wifi badg data.
drewp@bigasterisk.com
parents:
565
diff
changeset
|
119 triples.add((uri, ROOM['wifiBand'], ct)) |
565
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
120 triples.add((orbi, RDF.type, ROOM['AccessPoint'])) |
641
5fd7aef3b2b2
index page updates. include wifi badg data.
drewp@bigasterisk.com
parents:
565
diff
changeset
|
121 triples.add((orbi, ROOM['wifiBand'], ct)) |
1679 | 122 triples.add((orbi, ROOM['macAddress'], Literal(row['conn_orbi_mac'].lower()))) |
565
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
123 triples.add((orbi, RDFS.label, Literal(row['conn_orbi_name']))) |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
124 elif row['contype'] == 'wireless': |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
125 pass |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
126 elif row['contype'] == 'wired': |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
127 pass |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
128 elif row['contype'] == '-': |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
129 pass |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
130 else: |
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
131 pass |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
132 triples.add((uri, ROOM['connectedToNet'], ROOM['HouseNet'])) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
133 if sat := satNameForMac.get(row['mac'].lower()): |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
134 triples.add((uri, ROOM['connectedToAp'], sat)) |
565
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
135 |
423
e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
drewp@bigasterisk.com
parents:
422
diff
changeset
|
136 if row['model'] != 'Unknown': |
565
cbb4b3ccdb53
redo wifi's statements about access points so we can distinguish who is connecting to where, and on which wifi band
drewp@bigasterisk.com
parents:
427
diff
changeset
|
137 triples.add((uri, ROOM['networkModel'], Literal(row['model']))) |
1679 | 138 |
1729
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
139 outNodes.append(SeenNode(uri=uri, mac=row['mac'].lower(), ip=row['ip'], stmts=triples)) |
41394bc1d1b0
scrape satellites too, since they reveal what devs are connected to them
drewp@bigasterisk.com
parents:
1679
diff
changeset
|
140 return outNodes |