Mercurial > code > home > repos > homeauto
diff service/wifi/scrape.py @ 1224:e1202af42d4d
port to py3
Ignore-this: e1a2e6bb730111e76f5a5dd2366d498a
darcs-hash:491edbe1604e4024b3c61145a8022caaec25fbd5
author | drewp <drewp@bigasterisk.com> |
---|---|
date | Sat, 30 Mar 2019 18:27:17 -0700 |
parents | |
children | b8c0daabe5a5 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/service/wifi/scrape.py Sat Mar 30 18:27:17 2019 -0700 @@ -0,0 +1,177 @@ +import re, ast, logging, socket, json, base64 +from twisted.internet.defer import inlineCallbacks, returnValue +from cyclone.httpclient import fetch +from rdflib import Literal, Graph, RDFS, URIRef + +log = logging.getLogger() + +class Router(object): + def __repr__(self): + return repr(self.__dict__) + +class Wifi(object): + """ + gather the users of wifi from the tomato routers + """ + def __init__(self, accessN3="/my/proj/openid_proxy/access.n3"): + self.rereadConfig() + #self._loadRouters(accessN3, tomatoUrl) + + def rereadConfig(self): + self.graph = Graph() + self.graph.parse('config.n3', format='n3') + + + def _loadRouters(self, accessN3, tomatoUrl): + g = Graph() + g.parse(accessN3, format="n3") + repl = { + '/wifiRouter1/' : None, + #'/tomato2/' : None + } + for k in repl: + rows = list(g.query(''' + PREFIX p: <http://bigasterisk.com/openid_proxy#> + SELECT ?prefix WHERE { + ?site + p:requestPrefix ?public; + p:proxyUrlPrefix ?prefix + . + }''', initBindings={"public" : Literal(k)})) + repl[k] = str(rows[0][0]) + log.debug('repl %r', repl) + + self.routers = [] + for url in tomatoUrl: + name = url + for k, v in repl.items(): + url = url.replace(k, v) + + r = Router() + http, tail = url.split('//', 1) + userPass, tail = tail.split("@", 1) + r.url = http + '//' + tail + r.headers = {'Authorization': ['Basic %s' % userPass.encode('base64').strip()]} + r.name = {'wifiRouter1' : 'bigasterisk5', + 'tomato2' : 'bigasterisk4'}[name.split('/')[1]] + self.routers.append(r) + + @inlineCallbacks + def getPresentMacAddrs(self): + self.rereadConfig() + rows = yield loadOrbiData() + for row in rows: + if 'clientHostname' in row: + row['name'] = row['clientHostname'] + mac = URIRef('http://bigasterisk.com/mac/%s' % row['mac'].lower()) + label = self.graph.value(mac, RDFS.label) + if label: + row['name'] = label + returnValue(rows) + + @inlineCallbacks + def getPresentMacAddrs_multirouter(self): + rows = [] + + for router in self.routers: + log.debug("GET %s", router) + try: + resp = yield fetch(router.url, headers=router.headers, + timeout=2) + except socket.error: + log.warn("get on %s failed" % router) + continue + data = resp.body + if 'Wireless -- Authenticated Stations' in data: + # zyxel 'Station Info' page + rows.extend(self._parseZyxel(data, router.name)) + else: + # tomato page + rows.extend(self._parseTomato(data, router.name)) + + for r in rows: + try: + r['name'] = self.knownMacAddr[r['mac']] + except KeyError: + pass + + returnValue(rows) + + def _parseZyxel(self, data, routerName): + import lxml.html.soupparser + + root = lxml.html.soupparser.fromstring(data) + for tr in root.cssselect('tr'): + mac, assoc, uth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()] + if mac == "MAC": + continue + assoc = assoc.lower() == 'yes' + yield dict(router=routerName, mac=mac, assoc=assoc, connected=assoc) + + def _parseTomato(self, data, routerName): + for iface, mac, signal in jsValue(data, 'wldev'): + yield dict(router=routerName, mac=mac, signal=signal, connected=bool(signal)) + + +@inlineCallbacks +def loadUvaData(): + import lxml.html.soupparser + + config = json.load(open("priv-uva.json")) + headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]} + resp = yield fetch('http://10.2.0.2/wlstationlist.cmd', headers=headers) + root = lxml.html.soupparser.fromstring(resp.body) + byMac = {} + for tr in root.cssselect('tr'): + mac, connected, auth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()] + if mac == "MAC": + continue + connected = connected.lower() == 'yes' + byMac[mac] = dict(mac=mac, connected=connected, auth=auth == 'Yes', ssid=ssid, iface=iface) + + resp = yield fetch('http://10.2.0.2/DHCPTable.asp', headers=headers) + for row in re.findall(r'new AAA\((.*)\)', resp.body): + clientHostname, ipaddr, mac, expires, iface = [s.strip("'") for s in row.rsplit(',', 4)] + if clientHostname == 'wlanadv.none': + continue + byMac.setdefault(mac, {}).update(dict( + clientHostname=clientHostname, connection=iface, ipaddr=ipaddr, dhcpExpires=expires)) + + returnValue(sorted(byMac.values())) + +@inlineCallbacks +def loadCiscoData(): + config = json.load(open("priv-uva.json")) + headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]} + print(headers) + resp = yield fetch('http://10.2.0.2/', headers=headers) + print(resp.body) + returnValue([]) + +@inlineCallbacks +def loadOrbiData(): + config = json.load(open("priv-uva.json")) + headers = {b'Authorization': [ + b'Basic %s' % base64.encodebytes(config['userPass'].encode('utf8')).strip()]} + resp = yield fetch(b'http://orbi.bigasterisk.com/DEV_device_info.htm', method=b'GET', headers=headers) + print('back from fetch') + + if not resp.body.startswith((b'device=', b'device_changed=0\ndevice=', b'device_changed=1\ndevice=')): + raise ValueError(resp.body) + + ret = [] + for row in json.loads(resp.body.split(b'device=', 1)[-1]): + ret.append(dict( + connected=True, + ipaddr=row['ip'], + mac=row['mac'].lower(), + contype=row['contype'], + model=row['model'], + clientHostname=row['name'] if row['name'] != 'Unknown' else None)) + returnValue(ret) + + +def jsValue(js, variableName): + # using literal_eval instead of json parser to handle the trailing commas + val = re.search(variableName + r'\s*=\s*(.*?);', js, re.DOTALL).group(1) + return ast.literal_eval(val)