Mercurial > code > home > repos > homeauto
view service/wifi/scrape.py @ 421:47d7dd31bb2c
port to py3
Ignore-this: e1a2e6bb730111e76f5a5dd2366d498a
author | drewp@bigasterisk.com |
---|---|
date | Sat, 30 Mar 2019 18:27:17 -0700 |
parents | service/wifi/wifi.py@a530d9c5b280 |
children | 19460b3f7baf |
line wrap: on
line source
import re, ast, logging, socket, json, base64 from twisted.internet.defer import inlineCallbacks, returnValue from cyclone.httpclient import fetch from rdflib import Literal, Graph, RDFS, URIRef log = logging.getLogger() class Router(object): def __repr__(self): return repr(self.__dict__) class Wifi(object): """ gather the users of wifi from the tomato routers """ def __init__(self, accessN3="/my/proj/openid_proxy/access.n3"): self.rereadConfig() #self._loadRouters(accessN3, tomatoUrl) def rereadConfig(self): self.graph = Graph() self.graph.parse('config.n3', format='n3') def _loadRouters(self, accessN3, tomatoUrl): g = Graph() g.parse(accessN3, format="n3") repl = { '/wifiRouter1/' : None, #'/tomato2/' : None } for k in repl: rows = list(g.query(''' PREFIX p: <http://bigasterisk.com/openid_proxy#> SELECT ?prefix WHERE { ?site p:requestPrefix ?public; p:proxyUrlPrefix ?prefix . }''', initBindings={"public" : Literal(k)})) repl[k] = str(rows[0][0]) log.debug('repl %r', repl) self.routers = [] for url in tomatoUrl: name = url for k, v in repl.items(): url = url.replace(k, v) r = Router() http, tail = url.split('//', 1) userPass, tail = tail.split("@", 1) r.url = http + '//' + tail r.headers = {'Authorization': ['Basic %s' % userPass.encode('base64').strip()]} r.name = {'wifiRouter1' : 'bigasterisk5', 'tomato2' : 'bigasterisk4'}[name.split('/')[1]] self.routers.append(r) @inlineCallbacks def getPresentMacAddrs(self): self.rereadConfig() rows = yield loadOrbiData() for row in rows: if 'clientHostname' in row: row['name'] = row['clientHostname'] mac = URIRef('http://bigasterisk.com/mac/%s' % row['mac'].lower()) label = self.graph.value(mac, RDFS.label) if label: row['name'] = label returnValue(rows) @inlineCallbacks def getPresentMacAddrs_multirouter(self): rows = [] for router in self.routers: log.debug("GET %s", router) try: resp = yield fetch(router.url, headers=router.headers, timeout=2) except socket.error: log.warn("get on %s failed" % router) continue data = resp.body if 'Wireless -- Authenticated Stations' in data: # zyxel 'Station Info' page rows.extend(self._parseZyxel(data, router.name)) else: # tomato page rows.extend(self._parseTomato(data, router.name)) for r in rows: try: r['name'] = self.knownMacAddr[r['mac']] except KeyError: pass returnValue(rows) def _parseZyxel(self, data, routerName): import lxml.html.soupparser root = lxml.html.soupparser.fromstring(data) for tr in root.cssselect('tr'): mac, assoc, uth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()] if mac == "MAC": continue assoc = assoc.lower() == 'yes' yield dict(router=routerName, mac=mac, assoc=assoc, connected=assoc) def _parseTomato(self, data, routerName): for iface, mac, signal in jsValue(data, 'wldev'): yield dict(router=routerName, mac=mac, signal=signal, connected=bool(signal)) @inlineCallbacks def loadUvaData(): import lxml.html.soupparser config = json.load(open("priv-uva.json")) headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]} resp = yield fetch('http://10.2.0.2/wlstationlist.cmd', headers=headers) root = lxml.html.soupparser.fromstring(resp.body) byMac = {} for tr in root.cssselect('tr'): mac, connected, auth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()] if mac == "MAC": continue connected = connected.lower() == 'yes' byMac[mac] = dict(mac=mac, connected=connected, auth=auth == 'Yes', ssid=ssid, iface=iface) resp = yield fetch('http://10.2.0.2/DHCPTable.asp', headers=headers) for row in re.findall(r'new AAA\((.*)\)', resp.body): clientHostname, ipaddr, mac, expires, iface = [s.strip("'") for s in row.rsplit(',', 4)] if clientHostname == 'wlanadv.none': continue byMac.setdefault(mac, {}).update(dict( clientHostname=clientHostname, connection=iface, ipaddr=ipaddr, dhcpExpires=expires)) returnValue(sorted(byMac.values())) @inlineCallbacks def loadCiscoData(): config = json.load(open("priv-uva.json")) headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]} print(headers) resp = yield fetch('http://10.2.0.2/', headers=headers) print(resp.body) returnValue([]) @inlineCallbacks def loadOrbiData(): config = json.load(open("priv-uva.json")) headers = {b'Authorization': [ b'Basic %s' % base64.encodebytes(config['userPass'].encode('utf8')).strip()]} resp = yield fetch(b'http://orbi.bigasterisk.com/DEV_device_info.htm', method=b'GET', headers=headers) print('back from fetch') if not resp.body.startswith((b'device=', b'device_changed=0\ndevice=', b'device_changed=1\ndevice=')): raise ValueError(resp.body) ret = [] for row in json.loads(resp.body.split(b'device=', 1)[-1]): ret.append(dict( connected=True, ipaddr=row['ip'], mac=row['mac'].lower(), contype=row['contype'], model=row['model'], clientHostname=row['name'] if row['name'] != 'Unknown' else None)) returnValue(ret) def jsValue(js, variableName): # using literal_eval instead of json parser to handle the trailing commas val = re.search(variableName + r'\s*=\s*(.*?);', js, re.DOTALL).group(1) return ast.literal_eval(val)