comparison service/wifi/scrape.py @ 423:e0703c7824e9

very big rewrite. py3; orbi-only for now; n3 config file; delete or move out dead code Ignore-this: bfecea6f4990d34b36cc6d97cc6c6fa2
author drewp@bigasterisk.com
date Sat, 30 Mar 2019 23:38:47 -0700
parents 19460b3f7baf
children db031d9ec28e
comparison
equal deleted inserted replaced
422:19460b3f7baf 423:e0703c7824e9
1 import re, ast, logging, socket, json, base64 1 import logging, json, base64
2 from typing import List
3
4 from cyclone.httpclient import fetch
5 from rdflib import Literal, Graph, RDF, URIRef, Namespace
2 from twisted.internet.defer import inlineCallbacks, returnValue 6 from twisted.internet.defer import inlineCallbacks, returnValue
3 from cyclone.httpclient import fetch
4 from rdflib import Literal, Graph, RDFS, URIRef
5 7
6 log = logging.getLogger() 8 log = logging.getLogger()
9 ROOM = Namespace("http://projects.bigasterisk.com/room/")
10 AST = Namespace("http://bigasterisk.com/")
7 11
8 def macUri(macAddress: str) -> URIRef: 12 def macUri(macAddress: str) -> URIRef:
9 return URIRef("http://bigasterisk.com/mac/%s" % dev['mac'].lower()) 13 return URIRef("http://bigasterisk.com/mac/%s" % macAddress.lower())
10 14
15 class SeenNode(object):
16 def __init__(self, uri: URIRef, mac: str, ip: str, pred_objs: List):
17 self.connected = True
18 self.uri = uri
19 self.mac = mac
20 self.ip = ip
21 self.stmts = [(uri, p, o) for p, o in pred_objs]
22
11 class Wifi(object): 23 class Wifi(object):
12 """ 24 """
13 gather the users of wifi from the tomato routers 25 gather the users of wifi from the tomato routers
14 """ 26 """
15 def __init__(self, accessN3="/my/proj/openid_proxy/access.n3"): 27 def __init__(self, config: Graph):
16 self.rereadConfig() 28 self.config = config
17 #self._loadRouters(accessN3, tomatoUrl) 29
30 @inlineCallbacks
31 def getPresentMacAddrs(self): # returnValue List[SeenNode]
32 rows = yield self._loader()(self.config)
33 returnValue(rows)
18 34
19 def rereadConfig(self): 35 def _loader(self):
20 self.graph = Graph() 36 cls = self.config.value(ROOM['wifiScraper'], RDF.type)
21 self.graph.parse('config.n3', format='n3') 37 if cls == ROOM['OrbiScraper']:
22 38 return loadOrbiData
23 39 raise NotImplementedError(cls)
24 def _loadRouters(self, accessN3, tomatoUrl):
25 g = Graph()
26 g.parse(accessN3, format="n3")
27 repl = {
28 '/wifiRouter1/' : None,
29 #'/tomato2/' : None
30 }
31 for k in repl:
32 rows = list(g.query('''
33 PREFIX p: <http://bigasterisk.com/openid_proxy#>
34 SELECT ?prefix WHERE {
35 ?site
36 p:requestPrefix ?public;
37 p:proxyUrlPrefix ?prefix
38 .
39 }''', initBindings={"public" : Literal(k)}))
40 repl[k] = str(rows[0][0])
41 log.debug('repl %r', repl)
42
43 self.routers = []
44 for url in tomatoUrl:
45 name = url
46 for k, v in repl.items():
47 url = url.replace(k, v)
48
49 r = Router()
50 http, tail = url.split('//', 1)
51 userPass, tail = tail.split("@", 1)
52 r.url = http + '//' + tail
53 r.headers = {'Authorization': ['Basic %s' % userPass.encode('base64').strip()]}
54 r.name = {'wifiRouter1' : 'bigasterisk5',
55 'tomato2' : 'bigasterisk4'}[name.split('/')[1]]
56 self.routers.append(r)
57
58 @inlineCallbacks
59 def getPresentMacAddrs(self):
60 self.rereadConfig()
61 rows = yield loadOrbiData()
62 for row in rows:
63 if 'clientHostname' in row:
64 row['name'] = row['clientHostname']
65 mac = macUri(row['mac'].lower())
66 label = self.graph.value(mac, RDFS.label)
67 if label:
68 row['name'] = label
69 returnValue(rows)
70
71 @inlineCallbacks
72 def getPresentMacAddrs_multirouter(self):
73 rows = []
74
75 for router in self.routers:
76 log.debug("GET %s", router)
77 try:
78 resp = yield fetch(router.url, headers=router.headers,
79 timeout=2)
80 except socket.error:
81 log.warn("get on %s failed" % router)
82 continue
83 data = resp.body
84 if 'Wireless -- Authenticated Stations' in data:
85 # zyxel 'Station Info' page
86 rows.extend(self._parseZyxel(data, router.name))
87 else:
88 # tomato page
89 rows.extend(self._parseTomato(data, router.name))
90
91 for r in rows:
92 try:
93 r['name'] = self.knownMacAddr[r['mac']]
94 except KeyError:
95 pass
96
97 returnValue(rows)
98
99 def _parseZyxel(self, data, routerName):
100 import lxml.html.soupparser
101
102 root = lxml.html.soupparser.fromstring(data)
103 for tr in root.cssselect('tr'):
104 mac, assoc, uth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()]
105 if mac == "MAC":
106 continue
107 assoc = assoc.lower() == 'yes'
108 yield dict(router=routerName, mac=mac, assoc=assoc, connected=assoc)
109
110 def _parseTomato(self, data, routerName):
111 for iface, mac, signal in jsValue(data, 'wldev'):
112 yield dict(router=routerName, mac=mac, signal=signal, connected=bool(signal))
113 40
114 41
115 @inlineCallbacks 42 @inlineCallbacks
116 def loadUvaData(): 43 def loadOrbiData(config):
117 import lxml.html.soupparser 44 user = config.value(ROOM['wifiScraper'], ROOM['user'])
45 passwd = config.value(ROOM['wifiScraper'], ROOM['password'])
46 basicAuth = '%s:%s' % (user, passwd)
47 headers = {
48 b'Authorization': [
49 b'Basic %s' % base64.encodebytes(basicAuth.encode('utf8')).strip()],
50 }
51 uri = config.value(ROOM['wifiScraper'], ROOM['deviceInfoPage'])
52 resp = yield fetch(uri.encode('utf8'), method=b'GET', headers=headers)
118 53
119 config = json.load(open("priv-uva.json")) 54 if not resp.body.startswith((b'device=',
120 headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]} 55 b'device_changed=0\ndevice=',
121 resp = yield fetch('http://10.2.0.2/wlstationlist.cmd', headers=headers) 56 b'device_changed=1\ndevice=')):
122 root = lxml.html.soupparser.fromstring(resp.body)
123 byMac = {}
124 for tr in root.cssselect('tr'):
125 mac, connected, auth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()]
126 if mac == "MAC":
127 continue
128 connected = connected.lower() == 'yes'
129 byMac[mac] = dict(mac=mac, connected=connected, auth=auth == 'Yes', ssid=ssid, iface=iface)
130
131 resp = yield fetch('http://10.2.0.2/DHCPTable.asp', headers=headers)
132 for row in re.findall(r'new AAA\((.*)\)', resp.body):
133 clientHostname, ipaddr, mac, expires, iface = [s.strip("'") for s in row.rsplit(',', 4)]
134 if clientHostname == 'wlanadv.none':
135 continue
136 byMac.setdefault(mac, {}).update(dict(
137 clientHostname=clientHostname, connection=iface, ipaddr=ipaddr, dhcpExpires=expires))
138
139 returnValue(sorted(byMac.values()))
140
141 @inlineCallbacks
142 def loadCiscoData():
143 config = json.load(open("priv-uva.json"))
144 headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]}
145 print(headers)
146 resp = yield fetch('http://10.2.0.2/', headers=headers)
147 print(resp.body)
148 returnValue([])
149
150 @inlineCallbacks
151 def loadOrbiData():
152 config = json.load(open("priv-uva.json"))
153 headers = {b'Authorization': [
154 b'Basic %s' % base64.encodebytes(config['userPass'].encode('utf8')).strip()]}
155 resp = yield fetch(b'http://orbi.bigasterisk.com/DEV_device_info.htm', method=b'GET', headers=headers)
156 print('back from fetch')
157
158 if not resp.body.startswith((b'device=', b'device_changed=0\ndevice=', b'device_changed=1\ndevice=')):
159 raise ValueError(resp.body) 57 raise ValueError(resp.body)
160 58
161 ret = [] 59 log.debug(resp.body)
60 rows = []
162 for row in json.loads(resp.body.split(b'device=', 1)[-1]): 61 for row in json.loads(resp.body.split(b'device=', 1)[-1]):
163 ret.append(dict( 62 extra = []
164 connected=True, 63 extra.append((ROOM['connected'], {
165 ipaddr=row['ip'], 64 'wireless': AST['wifiAccessPoints'],
65 '2.4G': AST['wifiAccessPoints'],
66 '5G': AST['wifiAccessPoints'],
67 '-': AST['wifiUnknownConnectionType'],
68 'Unknown': AST['wifiUnknownConnectionType'],
69 'wired': AST['houseOpenNet']}[row['contype']]))
70 if row['model'] != 'Unknown':
71 extra.append((ROOM['networkModel'], Literal(row['model'])))
72
73 rows.append(SeenNode(
74 uri=macUri(row['mac'].lower()),
166 mac=row['mac'].lower(), 75 mac=row['mac'].lower(),
167 contype=row['contype'], 76 ip=row['ip'],
168 model=row['model'], 77 pred_objs=extra))
169 clientHostname=row['name'] if row['name'] != 'Unknown' else None)) 78 returnValue(rows)
170 returnValue(ret)
171
172
173 def jsValue(js, variableName):
174 # using literal_eval instead of json parser to handle the trailing commas
175 val = re.search(variableName + r'\s*=\s*(.*?);', js, re.DOTALL).group(1)
176 return ast.literal_eval(val)