annotate service/wifi/scrape.py @ 1225:b8c0daabe5a5

factor out some URI generation Ignore-this: 3c982a1fdbadcc3154278fbae3d2ce0 darcs-hash:22be14eeb34ca3e87842f4619af6277851033561
author drewp <drewp@bigasterisk.com>
date Sat, 30 Mar 2019 18:59:19 -0700
parents e1202af42d4d
children 7de8f0cd3392
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1224
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
1 import re, ast, logging, socket, json, base64
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
2 from twisted.internet.defer import inlineCallbacks, returnValue
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
3 from cyclone.httpclient import fetch
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
4 from rdflib import Literal, Graph, RDFS, URIRef
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
5
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
6 log = logging.getLogger()
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
7
1225
b8c0daabe5a5 factor out some URI generation
drewp <drewp@bigasterisk.com>
parents: 1224
diff changeset
8 def macUri(macAddress: str) -> URIRef:
b8c0daabe5a5 factor out some URI generation
drewp <drewp@bigasterisk.com>
parents: 1224
diff changeset
9 return URIRef("http://bigasterisk.com/mac/%s" % dev['mac'].lower())
1224
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
10
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
11 class Wifi(object):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
12 """
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
13 gather the users of wifi from the tomato routers
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
14 """
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
15 def __init__(self, accessN3="/my/proj/openid_proxy/access.n3"):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
16 self.rereadConfig()
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
17 #self._loadRouters(accessN3, tomatoUrl)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
18
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
19 def rereadConfig(self):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
20 self.graph = Graph()
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
21 self.graph.parse('config.n3', format='n3')
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
22
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
23
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
24 def _loadRouters(self, accessN3, tomatoUrl):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
25 g = Graph()
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
26 g.parse(accessN3, format="n3")
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
27 repl = {
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
28 '/wifiRouter1/' : None,
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
29 #'/tomato2/' : None
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
30 }
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
31 for k in repl:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
32 rows = list(g.query('''
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
33 PREFIX p: <http://bigasterisk.com/openid_proxy#>
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
34 SELECT ?prefix WHERE {
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
35 ?site
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
36 p:requestPrefix ?public;
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
37 p:proxyUrlPrefix ?prefix
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
38 .
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
39 }''', initBindings={"public" : Literal(k)}))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
40 repl[k] = str(rows[0][0])
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
41 log.debug('repl %r', repl)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
42
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
43 self.routers = []
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
44 for url in tomatoUrl:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
45 name = url
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
46 for k, v in repl.items():
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
47 url = url.replace(k, v)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
48
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
49 r = Router()
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
50 http, tail = url.split('//', 1)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
51 userPass, tail = tail.split("@", 1)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
52 r.url = http + '//' + tail
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
53 r.headers = {'Authorization': ['Basic %s' % userPass.encode('base64').strip()]}
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
54 r.name = {'wifiRouter1' : 'bigasterisk5',
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
55 'tomato2' : 'bigasterisk4'}[name.split('/')[1]]
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
56 self.routers.append(r)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
57
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
58 @inlineCallbacks
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
59 def getPresentMacAddrs(self):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
60 self.rereadConfig()
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
61 rows = yield loadOrbiData()
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
62 for row in rows:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
63 if 'clientHostname' in row:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
64 row['name'] = row['clientHostname']
1225
b8c0daabe5a5 factor out some URI generation
drewp <drewp@bigasterisk.com>
parents: 1224
diff changeset
65 mac = macUri(row['mac'].lower())
1224
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
66 label = self.graph.value(mac, RDFS.label)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
67 if label:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
68 row['name'] = label
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
69 returnValue(rows)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
70
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
71 @inlineCallbacks
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
72 def getPresentMacAddrs_multirouter(self):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
73 rows = []
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
74
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
75 for router in self.routers:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
76 log.debug("GET %s", router)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
77 try:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
78 resp = yield fetch(router.url, headers=router.headers,
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
79 timeout=2)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
80 except socket.error:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
81 log.warn("get on %s failed" % router)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
82 continue
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
83 data = resp.body
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
84 if 'Wireless -- Authenticated Stations' in data:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
85 # zyxel 'Station Info' page
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
86 rows.extend(self._parseZyxel(data, router.name))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
87 else:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
88 # tomato page
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
89 rows.extend(self._parseTomato(data, router.name))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
90
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
91 for r in rows:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
92 try:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
93 r['name'] = self.knownMacAddr[r['mac']]
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
94 except KeyError:
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
95 pass
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
96
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
97 returnValue(rows)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
98
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
99 def _parseZyxel(self, data, routerName):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
100 import lxml.html.soupparser
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
101
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
102 root = lxml.html.soupparser.fromstring(data)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
103 for tr in root.cssselect('tr'):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
104 mac, assoc, uth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()]
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
105 if mac == "MAC":
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
106 continue
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
107 assoc = assoc.lower() == 'yes'
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
108 yield dict(router=routerName, mac=mac, assoc=assoc, connected=assoc)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
109
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
110 def _parseTomato(self, data, routerName):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
111 for iface, mac, signal in jsValue(data, 'wldev'):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
112 yield dict(router=routerName, mac=mac, signal=signal, connected=bool(signal))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
113
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
114
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
115 @inlineCallbacks
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
116 def loadUvaData():
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
117 import lxml.html.soupparser
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
118
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
119 config = json.load(open("priv-uva.json"))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
120 headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]}
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
121 resp = yield fetch('http://10.2.0.2/wlstationlist.cmd', headers=headers)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
122 root = lxml.html.soupparser.fromstring(resp.body)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
123 byMac = {}
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
124 for tr in root.cssselect('tr'):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
125 mac, connected, auth, ssid, iface = [td.text_content().strip() for td in tr.getchildren()]
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
126 if mac == "MAC":
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
127 continue
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
128 connected = connected.lower() == 'yes'
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
129 byMac[mac] = dict(mac=mac, connected=connected, auth=auth == 'Yes', ssid=ssid, iface=iface)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
130
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
131 resp = yield fetch('http://10.2.0.2/DHCPTable.asp', headers=headers)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
132 for row in re.findall(r'new AAA\((.*)\)', resp.body):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
133 clientHostname, ipaddr, mac, expires, iface = [s.strip("'") for s in row.rsplit(',', 4)]
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
134 if clientHostname == 'wlanadv.none':
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
135 continue
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
136 byMac.setdefault(mac, {}).update(dict(
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
137 clientHostname=clientHostname, connection=iface, ipaddr=ipaddr, dhcpExpires=expires))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
138
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
139 returnValue(sorted(byMac.values()))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
140
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
141 @inlineCallbacks
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
142 def loadCiscoData():
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
143 config = json.load(open("priv-uva.json"))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
144 headers = {'Authorization': ['Basic %s' % config['userPass'].encode('base64').strip()]}
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
145 print(headers)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
146 resp = yield fetch('http://10.2.0.2/', headers=headers)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
147 print(resp.body)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
148 returnValue([])
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
149
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
150 @inlineCallbacks
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
151 def loadOrbiData():
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
152 config = json.load(open("priv-uva.json"))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
153 headers = {b'Authorization': [
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
154 b'Basic %s' % base64.encodebytes(config['userPass'].encode('utf8')).strip()]}
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
155 resp = yield fetch(b'http://orbi.bigasterisk.com/DEV_device_info.htm', method=b'GET', headers=headers)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
156 print('back from fetch')
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
157
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
158 if not resp.body.startswith((b'device=', b'device_changed=0\ndevice=', b'device_changed=1\ndevice=')):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
159 raise ValueError(resp.body)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
160
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
161 ret = []
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
162 for row in json.loads(resp.body.split(b'device=', 1)[-1]):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
163 ret.append(dict(
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
164 connected=True,
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
165 ipaddr=row['ip'],
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
166 mac=row['mac'].lower(),
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
167 contype=row['contype'],
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
168 model=row['model'],
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
169 clientHostname=row['name'] if row['name'] != 'Unknown' else None))
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
170 returnValue(ret)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
171
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
172
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
173 def jsValue(js, variableName):
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
174 # using literal_eval instead of json parser to handle the trailing commas
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
175 val = re.search(variableName + r'\s*=\s*(.*?);', js, re.DOTALL).group(1)
e1202af42d4d port to py3
drewp <drewp@bigasterisk.com>
parents:
diff changeset
176 return ast.literal_eval(val)