comparison service/wifi/scrape.py @ 1729:41394bc1d1b0

scrape satellites too, since they reveal what devs are connected to them
author drewp@bigasterisk.com
date Fri, 30 Jun 2023 22:04:36 -0700
parents f88ff1021ee0
children
comparison
equal deleted inserted replaced
1728:81aa0873b48d 1729:41394bc1d1b0
1 import base64 1 import base64
2 import json 2 import json
3 import logging 3 import logging
4 import re 4 import re
5 import time 5 import time
6 from typing import Awaitable, Callable, Iterable, List 6 from typing import Awaitable, Callable, Iterable, List, cast
7 7
8 from cyclone.httpclient import fetch 8 import aiohttp
9 from rdflib import Graph, Literal, Namespace, RDF, RDFS, URIRef 9 from rdflib import RDF, RDFS, Graph, Literal, Namespace, URIRef
10 10
11 log = logging.getLogger() 11 log = logging.getLogger()
12 ROOM = Namespace("http://projects.bigasterisk.com/room/") 12 ROOM = Namespace("http://projects.bigasterisk.com/room/")
13 AST = Namespace("http://bigasterisk.com/") 13 AST = Namespace("http://bigasterisk.com/")
14 14
44 if cls == ROOM['OrbiScraper']: 44 if cls == ROOM['OrbiScraper']:
45 return loadOrbiData 45 return loadOrbiData
46 raise NotImplementedError(cls) 46 raise NotImplementedError(cls)
47 47
48 48
49 async def loadOrbiData(config: Graph) -> List[SeenNode]: 49 async def fetch(url, user, passwd) -> str:
50 user = config.value(ROOM['wifiScraper'], ROOM['user'])
51 passwd = config.value(ROOM['wifiScraper'], ROOM['password'])
52 basicAuth = '%s:%s' % (user, passwd) 50 basicAuth = '%s:%s' % (user, passwd)
53 headers = { 51 headers = {
54 b'Authorization': [b'Basic %s' % base64.encodebytes(basicAuth.encode('utf8')).strip()], 52 'Authorization': 'Basic %s' % base64.encodebytes(basicAuth.encode('ascii')).strip().decode('ascii'),
55 } 53 }
56 uri = config.value(ROOM['wifiScraper'], ROOM['deviceInfoPage']) 54 async with aiohttp.ClientSession() as session:
57 resp = await fetch(f"{uri}?ts={time.time()}".encode('utf8'), method=b'GET', headers=headers) 55 async with session.get(url, headers=headers) as response:
56 if response.status != 200:
57 raise ValueError(f'{response.status=}')
58 58
59 if not resp.body.startswith((b'device=', b'device_changed=0\ndevice=', b'device_changed=1\ndevice=')): 59 return await response.text()
60 raise ValueError(resp.body)
61 60
62 61
63 rows = [] 62 async def loadConnectedMacsFromSatellites(satIp, user, passwd):
64 for rowNum, row in enumerate(json.loads(resp.body.split(b'device=', 1)[-1])): 63 body = await fetch(f'http://{satIp}/refresh_dev.aspx', user, passwd)
64 j = json.loads(body)
65 out = []
66 for row in j['device']:
67 out.append({'mac': row['mac'], 'type': row['type']})
68 return out
69
70
71 def findSatellites(satMacs, jrows):
72 satIps = []
73 for row in jrows:
74 mac = row['mac'].lower()
75 for label, satMac in satMacs:
76 if mac == satMac:
77 satIps.append((label, row['ip']))
78 return satIps
79
80
81 async def loadOrbiData(config: Graph) -> List[SeenNode]:
82
83 def confStr(s, p) -> str:
84 return cast(Literal, config.value(s, p)).toPython()
85
86 user = confStr(ROOM['wifiScraper'], ROOM['user'])
87 passwd = confStr(ROOM['wifiScraper'], ROOM['password'])
88 uri = confStr(ROOM['wifiScraper'], ROOM['deviceInfoPage'])
89 satelliteNamesAndMacs = [
90 (confStr(s, RDFS.label), confStr(s, ROOM['mac'])) #
91 for s in config.objects(ROOM['wifiScraper'], ROOM['satellite'])
92 ]
93 body = await fetch(f"{uri}?ts={time.time()}", user, passwd)
94
95 if not body.startswith(('device=', 'device_changed=0\ndevice=', 'device_changed=1\ndevice=')):
96 raise ValueError(body)
97
98 outNodes = []
99
100 orbiReportRows = json.loads(body.split('device=', 1)[-1])
101 satelliteNamesAndIps = findSatellites(satelliteNamesAndMacs, orbiReportRows)
102
103 satNameForMac = {}
104 for sat, satIp in satelliteNamesAndIps:
105 for row in await loadConnectedMacsFromSatellites(satIp, user, passwd):
106 satNameForMac[row['mac'].lower()] = ROOM[sat]
107
108 for rowNum, row in enumerate(orbiReportRows):
65 log.debug('response row [%d] %r', rowNum, row) 109 log.debug('response row [%d] %r', rowNum, row)
66 if not re.match(r'\w\w:\w\w:\w\w:\w\w:\w\w:\w\w', row['mac']): 110 if not re.match(r'\w\w:\w\w:\w\w:\w\w:\w\w:\w\w', row['mac']):
67 raise ValueError(f"corrupt response: mac was {row['mac']!r}") 111 raise ValueError(f"corrupt response: mac was {row['mac']!r}")
68 triples = set() 112 triples = set()
69 uri = macUri(row['mac'].lower()) 113 uri = macUri(row['mac'].lower())
70 114
71 if row['contype'] in ['2.4G', '5G']: 115 if row['contype'] in ['2.4G', '5G']:
72 orbi = macUri(row['conn_orbi_mac']) 116 orbi = macUri(row['conn_orbi_mac'])
73 ct = ROOM['wifiBand/%s' % row['contype']] 117 ct = ROOM['wifiBand/%s' % row['contype']]
74 triples.add((uri, ROOM['connectedToAp'], orbi)) 118 # triples.add((uri, ROOM['connectedToAp'], orbi)) # always reports the RBR50, i think
75 triples.add((uri, ROOM['wifiBand'], ct)) 119 triples.add((uri, ROOM['wifiBand'], ct))
76 triples.add((orbi, RDF.type, ROOM['AccessPoint'])) 120 triples.add((orbi, RDF.type, ROOM['AccessPoint']))
77 triples.add((orbi, ROOM['wifiBand'], ct)) 121 triples.add((orbi, ROOM['wifiBand'], ct))
78 triples.add((orbi, ROOM['macAddress'], Literal(row['conn_orbi_mac'].lower()))) 122 triples.add((orbi, ROOM['macAddress'], Literal(row['conn_orbi_mac'].lower())))
79 triples.add((orbi, RDFS.label, Literal(row['conn_orbi_name']))) 123 triples.add((orbi, RDFS.label, Literal(row['conn_orbi_name'])))
83 pass 127 pass
84 elif row['contype'] == '-': 128 elif row['contype'] == '-':
85 pass 129 pass
86 else: 130 else:
87 pass 131 pass
88 triples.add((uri, ROOM['connectedToNet'], ROOM['HouseOpenNet'])) 132 triples.add((uri, ROOM['connectedToNet'], ROOM['HouseNet']))
133 if sat := satNameForMac.get(row['mac'].lower()):
134 triples.add((uri, ROOM['connectedToAp'], sat))
89 135
90 if row['model'] != 'Unknown': 136 if row['model'] != 'Unknown':
91 triples.add((uri, ROOM['networkModel'], Literal(row['model']))) 137 triples.add((uri, ROOM['networkModel'], Literal(row['model'])))
92 138
93 rows.append(SeenNode(uri=uri, mac=row['mac'].lower(), ip=row['ip'], stmts=triples)) 139 outNodes.append(SeenNode(uri=uri, mac=row['mac'].lower(), ip=row['ip'], stmts=triples))
94 return rows 140 return outNodes