Mercurial > code > home > repos > homeauto
comparison service/wifi/scrape.py @ 423:e0703c7824e9
very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code
Ignore-this: bfecea6f4990d34b36cc6d97cc6c6fa2
author | drewp@bigasterisk.com |
---|---|
date | Sat, 30 Mar 2019 23:38:47 -0700 |
parents | 19460b3f7baf |
children | db031d9ec28e |
comparison
equal
deleted
inserted
replaced
422:19460b3f7baf | 423:e0703c7824e9 |
---|---|
1 import re, ast, logging, socket, json, base64 | 1 import logging, json, base64 |
2 from typing import List | |
3 | |
4 from cyclone.httpclient import fetch | |
5 from rdflib import Literal, Graph, RDF, URIRef, Namespace | |
2 from twisted.internet.defer import inlineCallbacks, returnValue | 6 from twisted.internet.defer import inlineCallbacks, returnValue |
3 from cyclone.httpclient import fetch | |
4 from rdflib import Literal, Graph, RDFS, URIRef | |
5 | 7 |
6 log = logging.getLogger() | 8 log = logging.getLogger() |
9 ROOM = Namespace("http://projects.bigasterisk.com/room/") | |
10 AST = Namespace("http://bigasterisk.com/") | |
7 | 11 |
8 def macUri(macAddress: str) -> URIRef: | 12 def macUri(macAddress: str) -> URIRef: |
9 return URIRef("http://bigasterisk.com/mac/%s" % dev['mac'].lower()) | 13 return URIRef("http://bigasterisk.com/mac/%s" % macAddress.lower()) |
10 | 14 |
15 class SeenNode(object): | |
16 def __init__(self, uri: URIRef, mac: str, ip: str, pred_objs: List): | |
17 self.connected = True | |
18 self.uri = uri | |
19 self.mac = mac | |
20 self.ip = ip | |
21 self.stmts = [(uri, p, o) for p, o in pred_objs] | |
22 | |
11 class Wifi(object): | 23 class Wifi(object): |
12 """ | 24 """ |
13 gather the users of wifi from the tomato routers | 25 gather the users of wifi from the tomato routers |
14 """ | 26 """ |
15 def __init__(self, accessN3="/my/proj/openid_proxy/access.n3"): | 27 def __init__(self, config: Graph): |
16 self.rereadConfig() | 28 self.config = config |
17 #self._loadRouters(accessN3, tomatoUrl) | 29 |
30 @inlineCallbacks | |
31 def getPresentMacAddrs(self): # returnValue List[SeenNode] | |
32 rows = yield self._loader()(self.config) | |
33 returnValue(rows) | |
18 | 34 |
19 def rereadConfig(self): | 35 def _loader(self): |
20 self.graph = Graph() | 36 cls = self.config.value(ROOM['wifiScraper'], RDF.type) |
21 self.graph.parse('config.n3', format='n3') | 37 if cls == ROOM['OrbiScraper']: |
22 | 38 return loadOrbiData |
23 | 39 raise NotImplementedError(cls) |
24 def _loadRouters(self, accessN3, tomatoUrl): | |
25 g = Graph() | |
26 g.parse(accessN3, format="n3") | |
27 repl = { | |
28 '/wifiRouter1/' : None, | |
29 #'/tomato2/' : None | |
30 } | |
31 for k in repl: | |
32 rows = list(g.query(''' | |
33 PREFIX p: <http://bigasterisk.com/openid_proxy#> | |
34 SELECT ?prefix WHERE { | |
35 ?site | |
36 p:requestPrefix ?public; | |
37 p:proxyUrlPrefix ?prefix | |
38 . | |
39 }''', initBindings={"public" : Literal(k)})) | |
40 repl[k] = str(rows[0][0]) | |
41 log.debug('repl %r', repl) | |
42 | |
43 self.routers = [] | |
44 for url in tomatoUrl: | |
45 name = url | |
46 for k, v in repl.items(): | |
47 url = url.replace(k, v) | |
48 | |
49 r = Router() | |
50 http, tail = url.split('//', 1) | |
51 userPass, tail = tail.split("@", 1) | |
52 r.url = http + '//' + tail | |
53 r.headers = {'Authorization': ['Basic %s' % userPass.encode('base64').strip()]} | |
54 r.name = {'wifiRouter1' : 'bigasterisk5', | |
55 'tomato2' : 'bigasterisk4'}[name.split('/')[1]] | |
56 self.routers.append(r) | |
57 | |
58 @inlineCallbacks | |
59 def getPresentMacAddrs(self): | |
60 self.rereadConfig() | |
61 rows = yield loadOrbiData() | |
62 for row in rows: | |
63 if 'clientHostname' in row: | |
64 row['name'] = row['clientHostname'] | |
65 mac = macUri(row['mac'].lower()) | |
66 label = self.graph.value(mac, RDFS.label) | |
67 if label: | |
68 row['name'] = label | |
69 returnValue(rows) | |
70 | |
71 @inlineCallbacks | |
72 def getPresentMacAddrs_multirouter(self): | |
73 rows = [] | |
74 | |
75 for router in self.routers: | |
76 log.debug("GET %s", router) | |
77 try: | |
78 resp = yield fetch(router.url, headers=router.headers, | |
79 timeout=2) | |
80 except socket.error: | |
81 log.warn("get on %s failed" % router) | |
82 continue | |
83 data = resp.body | |
84 if 'Wireless -- Authenticated Stations' in data: | |
85 # zyxel 'Station Info' page | |
86 rows.extend(self._parseZyxel(data, router.name)) | |
87 else: | |
88 # tomato page | |
89 rows.extend(self._parseTomato(data, router.name)) | |
90 | |
91 for r in rows: | |
92 try: | |
93 r['name'] = self.knownMacAddr[r['mac']] | |
94 except KeyError: | |
95 pass | |
96 | |
97 returnValue(rows) | |
98 | |
99 def _parseZyxel(self, data, routerName): | |
100 import lxml.html.soupparser | |
101 | |
102 root = lxml.html.soupparser.fromstring(data) | |
103 for tr in root.cssselect('tr'): | |
104 mac, assoc, uth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()] | |
105 if mac == "MAC": | |
106 continue | |
107 assoc = assoc.lower() == 'yes' | |
108 yield dict(router=routerName, mac=mac, assoc=assoc, connected=assoc) | |
109 | |
110 def _parseTomato(self, data, routerName): | |
111 for iface, mac, signal in jsValue(data, 'wldev'): | |
112 yield dict(router=routerName, mac=mac, signal=signal, connected=bool(signal)) | |
113 | 40 |
114 | 41 |
115 @inlineCallbacks | 42 @inlineCallbacks |
116 def loadUvaData(): | 43 def loadOrbiData(config): |
117 import lxml.html.soupparser | 44 user = config.value(ROOM['wifiScraper'], ROOM['user']) |
45 passwd = config.value(ROOM['wifiScraper'], ROOM['password']) | |
46 basicAuth = '%s:%s' % (user, passwd) | |
47 headers = { | |
48 b'Authorization': [ | |
49 b'Basic %s' % base64.encodebytes(basicAuth.encode('utf8')).strip()], | |
50 } | |
51 uri = config.value(ROOM['wifiScraper'], ROOM['deviceInfoPage']) | |
52 resp = yield fetch(uri.encode('utf8'), method=b'GET', headers=headers) | |
118 | 53 |
119 config = json.load(open("priv-uva.json")) | 54 if not resp.body.startswith((b'device=', |
120 headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]} | 55 b'device_changed=0\ndevice=', |
121 resp = yield fetch('http://10.2.0.2/wlstationlist.cmd', headers=headers) | 56 b'device_changed=1\ndevice=')): |
122 root = lxml.html.soupparser.fromstring(resp.body) | |
123 byMac = {} | |
124 for tr in root.cssselect('tr'): | |
125 mac, connected, auth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()] | |
126 if mac == "MAC": | |
127 continue | |
128 connected = connected.lower() == 'yes' | |
129 byMac[mac] = dict(mac=mac, connected=connected, auth=auth == 'Yes', ssid=ssid, iface=iface) | |
130 | |
131 resp = yield fetch('http://10.2.0.2/DHCPTable.asp', headers=headers) | |
132 for row in re.findall(r'new AAA\((.*)\)', resp.body): | |
133 clientHostname, ipaddr, mac, expires, iface = [s.strip("'") for s in row.rsplit(',', 4)] | |
134 if clientHostname == 'wlanadv.none': | |
135 continue | |
136 byMac.setdefault(mac, {}).update(dict( | |
137 clientHostname=clientHostname, connection=iface, ipaddr=ipaddr, dhcpExpires=expires)) | |
138 | |
139 returnValue(sorted(byMac.values())) | |
140 | |
141 @inlineCallbacks | |
142 def loadCiscoData(): | |
143 config = json.load(open("priv-uva.json")) | |
144 headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]} | |
145 print(headers) | |
146 resp = yield fetch('http://10.2.0.2/', headers=headers) | |
147 print(resp.body) | |
148 returnValue([]) | |
149 | |
150 @inlineCallbacks | |
151 def loadOrbiData(): | |
152 config = json.load(open("priv-uva.json")) | |
153 headers = {b'Authorization': [ | |
154 b'Basic %s' % base64.encodebytes(config['userPass'].encode('utf8')).strip()]} | |
155 resp = yield fetch(b'http://orbi.bigasterisk.com/DEV_device_info.htm', method=b'GET', headers=headers) | |
156 print('back from fetch') | |
157 | |
158 if not resp.body.startswith((b'device=', b'device_changed=0\ndevice=', b'device_changed=1\ndevice=')): | |
159 raise ValueError(resp.body) | 57 raise ValueError(resp.body) |
160 | 58 |
161 ret = [] | 59 log.debug(resp.body) |
60 rows = [] | |
162 for row in json.loads(resp.body.split(b'device=', 1)[-1]): | 61 for row in json.loads(resp.body.split(b'device=', 1)[-1]): |
163 ret.append(dict( | 62 extra = [] |
164 connected=True, | 63 extra.append((ROOM['connected'], { |
165 ipaddr=row['ip'], | 64 'wireless': AST['wifiAccessPoints'], |
65 '2.4G': AST['wifiAccessPoints'], | |
66 '5G': AST['wifiAccessPoints'], | |
67 '-': AST['wifiUnknownConnectionType'], | |
68 'Unknown': AST['wifiUnknownConnectionType'], | |
69 'wired': AST['houseOpenNet']}[row['contype']])) | |
70 if row['model'] != 'Unknown': | |
71 extra.append((ROOM['networkModel'], Literal(row['model']))) | |
72 | |
73 rows.append(SeenNode( | |
74 uri=macUri(row['mac'].lower()), | |
166 mac=row['mac'].lower(), | 75 mac=row['mac'].lower(), |
167 contype=row['contype'], | 76 ip=row['ip'], |
168 model=row['model'], | 77 pred_objs=extra)) |
169 clientHostname=row['name'] if row['name'] != 'Unknown' else None)) | 78 returnValue(rows) |
170 returnValue(ret) | |
171 | |
172 | |
173 def jsValue(js, variableName): | |
174 # using literal_eval instead of json parser to handle the trailing commas | |
175 val = re.search(variableName + r'\s*=\s*(.*?);', js, re.DOTALL).group(1) | |
176 return ast.literal_eval(val) |