comparison service/wifi/scrape.py @ 421:47d7dd31bb2c

port to py3 Ignore-this: e1a2e6bb730111e76f5a5dd2366d498a
author drewp@bigasterisk.com
date Sat, 30 Mar 2019 18:27:17 -0700
parents service/wifi/wifi.py@a530d9c5b280
children 19460b3f7baf
comparison
equal deleted inserted replaced
420:a530d9c5b280 421:47d7dd31bb2c
1 import re, ast, logging, socket, json, base64
2 from twisted.internet.defer import inlineCallbacks, returnValue
3 from cyclone.httpclient import fetch
4 from rdflib import Literal, Graph, RDFS, URIRef
5
6 log = logging.getLogger()
7
8 class Router(object):
9 def __repr__(self):
10 return repr(self.__dict__)
11
12 class Wifi(object):
13 """
14 gather the users of wifi from the tomato routers
15 """
16 def __init__(self, accessN3="/my/proj/openid_proxy/access.n3"):
17 self.rereadConfig()
18 #self._loadRouters(accessN3, tomatoUrl)
19
20 def rereadConfig(self):
21 self.graph = Graph()
22 self.graph.parse('config.n3', format='n3')
23
24
25 def _loadRouters(self, accessN3, tomatoUrl):
26 g = Graph()
27 g.parse(accessN3, format="n3")
28 repl = {
29 '/wifiRouter1/' : None,
30 #'/tomato2/' : None
31 }
32 for k in repl:
33 rows = list(g.query('''
34 PREFIX p: <http://bigasterisk.com/openid_proxy#>
35 SELECT ?prefix WHERE {
36 ?site
37 p:requestPrefix ?public;
38 p:proxyUrlPrefix ?prefix
39 .
40 }''', initBindings={"public" : Literal(k)}))
41 repl[k] = str(rows[0][0])
42 log.debug('repl %r', repl)
43
44 self.routers = []
45 for url in tomatoUrl:
46 name = url
47 for k, v in repl.items():
48 url = url.replace(k, v)
49
50 r = Router()
51 http, tail = url.split('//', 1)
52 userPass, tail = tail.split("@", 1)
53 r.url = http + '//' + tail
54 r.headers = {'Authorization': ['Basic %s' % userPass.encode('base64').strip()]}
55 r.name = {'wifiRouter1' : 'bigasterisk5',
56 'tomato2' : 'bigasterisk4'}[name.split('/')[1]]
57 self.routers.append(r)
58
59 @inlineCallbacks
60 def getPresentMacAddrs(self):
61 self.rereadConfig()
62 rows = yield loadOrbiData()
63 for row in rows:
64 if 'clientHostname' in row:
65 row['name'] = row['clientHostname']
66 mac = URIRef('http://bigasterisk.com/mac/%s' % row['mac'].lower())
67 label = self.graph.value(mac, RDFS.label)
68 if label:
69 row['name'] = label
70 returnValue(rows)
71
72 @inlineCallbacks
73 def getPresentMacAddrs_multirouter(self):
74 rows = []
75
76 for router in self.routers:
77 log.debug("GET %s", router)
78 try:
79 resp = yield fetch(router.url, headers=router.headers,
80 timeout=2)
81 except socket.error:
82 log.warn("get on %s failed" % router)
83 continue
84 data = resp.body
85 if 'Wireless -- Authenticated Stations' in data:
86 # zyxel 'Station Info' page
87 rows.extend(self._parseZyxel(data, router.name))
88 else:
89 # tomato page
90 rows.extend(self._parseTomato(data, router.name))
91
92 for r in rows:
93 try:
94 r['name'] = self.knownMacAddr[r['mac']]
95 except KeyError:
96 pass
97
98 returnValue(rows)
99
100 def _parseZyxel(self, data, routerName):
101 import lxml.html.soupparser
102
103 root = lxml.html.soupparser.fromstring(data)
104 for tr in root.cssselect('tr'):
105 mac, assoc, uth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()]
106 if mac == "MAC":
107 continue
108 assoc = assoc.lower() == 'yes'
109 yield dict(router=routerName, mac=mac, assoc=assoc, connected=assoc)
110
111 def _parseTomato(self, data, routerName):
112 for iface, mac, signal in jsValue(data, 'wldev'):
113 yield dict(router=routerName, mac=mac, signal=signal, connected=bool(signal))
114
115
116 @inlineCallbacks
117 def loadUvaData():
118 import lxml.html.soupparser
119
120 config = json.load(open("priv-uva.json"))
121 headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]}
122 resp = yield fetch('http://10.2.0.2/wlstationlist.cmd', headers=headers)
123 root = lxml.html.soupparser.fromstring(resp.body)
124 byMac = {}
125 for tr in root.cssselect('tr'):
126 mac, connected, auth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()]
127 if mac == "MAC":
128 continue
129 connected = connected.lower() == 'yes'
130 byMac[mac] = dict(mac=mac, connected=connected, auth=auth == 'Yes', ssid=ssid, iface=iface)
131
132 resp = yield fetch('http://10.2.0.2/DHCPTable.asp', headers=headers)
133 for row in re.findall(r'new AAA\((.*)\)', resp.body):
134 clientHostname, ipaddr, mac, expires, iface = [s.strip("'") for s in row.rsplit(',', 4)]
135 if clientHostname == 'wlanadv.none':
136 continue
137 byMac.setdefault(mac, {}).update(dict(
138 clientHostname=clientHostname, connection=iface, ipaddr=ipaddr, dhcpExpires=expires))
139
140 returnValue(sorted(byMac.values()))
141
142 @inlineCallbacks
143 def loadCiscoData():
144 config = json.load(open("priv-uva.json"))
145 headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]}
146 print(headers)
147 resp = yield fetch('http://10.2.0.2/', headers=headers)
148 print(resp.body)
149 returnValue([])
150
151 @inlineCallbacks
152 def loadOrbiData():
153 config = json.load(open("priv-uva.json"))
154 headers = {b'Authorization': [
155 b'Basic %s' % base64.encodebytes(config['userPass'].encode('utf8')).strip()]}
156 resp = yield fetch(b'http://orbi.bigasterisk.com/DEV_device_info.htm', method=b'GET', headers=headers)
157 print('back from fetch')
158
159 if not resp.body.startswith((b'device=', b'device_changed=0\ndevice=', b'device_changed=1\ndevice=')):
160 raise ValueError(resp.body)
161
162 ret = []
163 for row in json.loads(resp.body.split(b'device=', 1)[-1]):
164 ret.append(dict(
165 connected=True,
166 ipaddr=row['ip'],
167 mac=row['mac'].lower(),
168 contype=row['contype'],
169 model=row['model'],
170 clientHostname=row['name'] if row['name'] != 'Unknown' else None))
171 returnValue(ret)
172
173
174 def jsValue(js, variableName):
175 # using literal_eval instead of json parser to handle the trailing commas
176 val = re.search(variableName + r'\s*=\s*(.*?);', js, re.DOTALL).group(1)
177 return ast.literal_eval(val)