# HG changeset patch # User drewp@bigasterisk.com # Date 1594586034 25200 # Node ID e86642cf739312829f7ef71ad1457e87e75c33b0 # Parent 890584020372e58887aefe880feda4a7721911e8 style and requirements.txt cleanup diff -r 890584020372 -r e86642cf7393 .flake8 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.flake8 Sun Jul 12 13:33:54 2020 -0700 @@ -0,0 +1,3 @@ +[flake8] +ignore= +max-line-length=160 \ No newline at end of file diff -r 890584020372 -r e86642cf7393 Dockerfile --- a/Dockerfile Sun Jul 12 13:17:13 2020 -0700 +++ b/Dockerfile Sun Jul 12 13:33:54 2020 -0700 @@ -6,24 +6,7 @@ COPY requirements.txt ./ -RUN pip3 install 'BeautifulSoup4' -RUN pip3 install 'CherryPy==3.2.2' -RUN pip3 install 'argparse==1.2.1' -RUN pip3 install 'bottle==0.11.6' -RUN pip3 install 'cssselect==0.7.1' -RUN pip3 install 'gunicorn==0.17.2' -RUN pip3 install 'http-parser==0.8.1' -RUN pip3 install 'pymongo==2.4.2' -RUN pip3 install 'pystache==0.5.3' -RUN pip3 install 'python-dateutil' -RUN pip3 install 'six' -RUN pip3 install 'pyjade==4.0.0' -RUN pip3 install 'lxml==4.5.2' -RUN pip3 install 'requests==2.24.0' - - RUN pip3 install --index-url https://projects.bigasterisk.com/ --extra-index-url https://pypi.org/simple -r requirements.txt -RUN pip3 install -U 'https://github.com/drewp/cyclone/archive/python3.zip?v3' COPY *.py run ./ COPY static static/ diff -r 890584020372 -r e86642cf7393 jadestache.py --- a/jadestache.py Sun Jul 12 13:17:13 2020 -0700 +++ b/jadestache.py Sun Jul 12 13:33:54 2020 -0700 @@ -1,4 +1,6 @@ -import pyjade, pyjade.exceptions, pystache +import pyjade +import pyjade.exceptions +import pystache class _JadeLoader(pystache.loader.Loader): @@ -8,12 +10,12 @@ """ def __init__(self, *args, **kw): pystache.renderer.Loader.__init__(self, *args, **kw) - self.seen = {} # path : expanded jade - + self.seen = {} # path : expanded jade + def read(self, path, encoding=None): if path in self.seen: return self.seen[path] - + b = pystache.common.read(path) if encoding is None: @@ -25,6 +27,7 @@ self.seen[path] = expanded return expanded + class Renderer(pystache.renderer.Renderer): """ pystache renderer that expands base jade syntax on its input @@ -41,12 +44,13 @@ del kw['debug'] pystache.renderer.Renderer.__init__(self, *args, **kw) self._loader = None if debug else self._new_loader() - + def _new_loader(self): - return _JadeLoader( - file_encoding=self.file_encoding, extension=self.file_extension, - to_unicode=self.str, search_dirs=self.search_dirs) - + return _JadeLoader(file_encoding=self.file_encoding, + extension=self.file_extension, + to_unicode=self.str, + search_dirs=self.search_dirs) + def _make_loader(self): if self._loader is not None: return self._loader diff -r 890584020372 -r e86642cf7393 link.py --- a/link.py Sun Jul 12 13:17:13 2020 -0700 +++ b/link.py Sun Jul 12 13:33:54 2020 -0700 @@ -1,18 +1,23 @@ -import urllib.parse, urllib.request, urllib.parse, urllib.error +import urllib.parse +import urllib.request +import urllib.parse +import urllib.error from dateutil.tz import tzlocal + class NotFound(ValueError): pass + class Links(object): def __init__(self, db): self.coll = db['links'] - + def insertOrUpdate(self, doc): if not doc['href']: raise ValueError("no link") self.extract(doc) - self.coll.update({'href':doc['href']}, doc, upsert=True, safe=True) + self.coll.update({'href': doc['href']}, doc, upsert=True, safe=True) def extract(self, doc): forUsers = [] @@ -25,7 +30,7 @@ doc['extracted'] = dict(tags=tags, forUsers=forUsers) def find(self, uri, user): - docs = list(self.coll.find({'href': uri, 'user' : user})) + docs = list(self.coll.find({'href': uri, 'user': user})) if len(docs) == 0: raise NotFound("not found") elif len(docs) > 1: @@ -34,8 +39,8 @@ return docs[0] def filter(self, user, startTime): - return self.coll.find({'user' : user, 't': {'$gte': startTime}}) - + return self.coll.find({'user': user, 't': {'$gte': startTime}}) + def forDisplay(self, doc): """return a mustache-ready dict for this db doc""" out = doc.copy() @@ -46,10 +51,11 @@ else: out['displayDescription'] = out['description'] - out['tagWords'] = [{'word' : w} for w in out['tag'].split(None)] + out['tagWords'] = [{'word': w} for w in out['tag'].split(None)] out['domain'] = urllib.parse.urlparse(out['href']).netloc - out['editLink'] = 'addLink?' + urllib.parse.urlencode([('url', out['href'])]) - out['shareWith'] = [{'label' : uri} for uri in doc.get('shareWith', [])] + out['editLink'] = 'addLink?' + urllib.parse.urlencode( + [('url', out['href'])]) + out['shareWith'] = [{'label': uri} for uri in doc.get('shareWith', [])] return out def fromPostdata(self, data, user, t): @@ -67,10 +73,11 @@ ) def asDeliciousAddParams(self): - return dict(url=self['href'], - description=self['description'], - extended=self['extended'], - tags=','.join(self['tag'].split(' ')), - dt=self['t'], - replace='yes', + return dict( + url=self['href'], + description=self['description'], + extended=self['extended'], + tags=','.join(self['tag'].split(' ')), + dt=self['t'], + replace='yes', ) diff -r 890584020372 -r e86642cf7393 lookup.py --- a/lookup.py Sun Jul 12 13:17:13 2020 -0700 +++ b/lookup.py Sun Jul 12 13:33:54 2020 -0700 @@ -1,4 +1,3 @@ -#!bin/python """ serve some queries over bookmarks: @@ -6,100 +5,121 @@ /user/tag+tag+tag and the add-bookmark stuff - """ -import pymongo, bottle, time, urllib.request, urllib.parse, urllib.error, datetime, json, logging -import requests from collections import defaultdict -from urllib.parse import urlparse +import datetime +import json +import logging +import time +import urllib.error +import urllib.parse +import urllib.request + +from bottle import static_file from dateutil.tz import tzlocal -from bottle import static_file +import bottle +import pymongo +import requests + from jadestache import Renderer +from link import Links, NotFound from pagetitle import PageTitle -from link import Links, NotFound -db = pymongo.Connection('mongodb.default.svc.cluster.local', tz_aware=True)['href'] + +db = pymongo.Connection('mongodb.default.svc.cluster.local', + tz_aware=True)['href'] pageTitle = PageTitle(db) links = Links(db) renderer = Renderer(search_dirs=['template'], debug=bottle.DEBUG) log = logging.getLogger() + def getLoginBar(): - return requests.get("http://openid-proxy.default.svc.cluster.local:9023/_loginBar", - headers={ - "Cookie" : bottle.request.headers.get('cookie'), - 'x-site': 'http://bigasterisk.com/openidProxySite/href', - }).text + return requests.get( + "http://openid-proxy.default.svc.cluster.local:9023/_loginBar", + headers={ + "Cookie": bottle.request.headers.get('cookie'), + 'x-site': 'http://bigasterisk.com/openidProxySite/href', + }).text + def getUser(): agent = bottle.request.headers.get('x-foaf-agent', None) - username = db['user'].find_one({'_id':agent})['username'] if agent else None + username = db['user'].find_one({'_id': agent + })['username'] if agent else None return username, agent + def siteRoot(): try: return bottle.request.headers['x-site-root'].rstrip('/') except KeyError: log.warn(repr(bottle.request.__dict__)) raise - + + @bottle.route('/static/') def server_static(path): return static_file(path, root='static') + def recentLinks(user, tags, allowEdit): - out = {'links':[]} + out = {'links': []} t1 = time.time() - spec = {'user':user} + spec = {'user': user} if tags: - spec['extracted.tags'] = {'$all' : tags} + spec['extracted.tags'] = {'$all': tags} for doc in db['links'].find(spec, sort=[('t', -1)], limit=50): link = links.forDisplay(doc) link['allowEdit'] = allowEdit out['links'].append(link) - out['stats'] = {'queryTimeMs' : round((time.time() - t1) * 1000, 2)} + out['stats'] = {'queryTimeMs': round((time.time() - t1) * 1000, 2)} return out + def allTags(user, withTags=[]): """withTags limits results to other tags that have been used with those tags""" withTags = set(withTags) - count = defaultdict(lambda: 0) # tag : count - for doc in db['links'].find({'user':user}, fields=['extracted.tags']): + count = defaultdict(lambda: 0) # tag : count + for doc in db['links'].find({'user': user}, fields=['extracted.tags']): docTags = set(doc.get('extracted', {}).get('tags', [])) if withTags and not withTags.issubset(docTags): continue for t in docTags.difference(withTags): count[t] = count[t] + 1 - byFreq = [(n, t) for t,n in count.items()] + byFreq = [(n, t) for t, n in count.items()] byFreq.sort(key=lambda n_t: (-n_t[0], n_t[1])) return [{'label': t, 'count': n} for n, t in byFreq] - + + def renderWithTime(name, data): t1 = time.time() rendered = renderer.render_name(name, data) dt = (time.time() - t1) * 1000 rendered = rendered.replace('TEMPLATETIME', "%.02f ms" % dt) return rendered - + + @bottle.route('/addLink') def addLink(): out = { 'toRoot': siteRoot(), 'absRoot': siteRoot(), 'user': getUser()[0], - 'withKnockout': True, - 'fillHrefJson': json.dumps(bottle.request.params.get('url', '')), - 'loginBar': getLoginBar(), + 'withKnockout': True, + 'fillHrefJson': json.dumps(bottle.request.params.get('url', '')), + 'loginBar': getLoginBar(), } return renderWithTime('add.jade', out) + @bottle.route('/addOverlay') def addOverlay(): p = bottle.request.params return "" - + @bottle.route('/addLink/proposedUri') def proposedUri(): uri = bottle.request.params.uri @@ -109,18 +129,23 @@ prevDoc = links.find(uri, user) except NotFound: prevDoc = None - + return { - 'description': prevDoc['description'] if prevDoc else pageTitle.pageTitle(uri), - 'tag' : prevDoc['tag'] if prevDoc else '', - 'extended' : prevDoc['extended'] if prevDoc else '', - 'shareWith' : prevDoc.get('shareWith', []) if prevDoc else [], + 'description': + prevDoc['description'] if prevDoc else pageTitle.pageTitle(uri), + 'tag': + prevDoc['tag'] if prevDoc else '', + 'extended': + prevDoc['extended'] if prevDoc else '', + 'shareWith': + prevDoc.get('shareWith', []) if prevDoc else [], 'suggestedTags': ['tag1', 'tag2'], - 'existed': prevDoc is not None, - } + 'existed': + prevDoc is not None, + } -if 0: - pass#proposal check existing links, get page title (stuff that in db), get tags from us and other serviecs. maybe the deferred ones ater + +# proposal check existing links, get page title (stuff that in db), get tags from us and other serviecs. maybe the deferred ones ater @bottle.route('/tags') @@ -136,46 +161,52 @@ for t in allTags(params.user, withTags=haveTags): if partialTerm and partialTerm not in t['label']: continue - out.append({'id': t['label'], - 'text': "%s (%s%s)" % (t['label'], - t['count'], - " left" if haveTags else "")}) - - return {'tags' : out} - + out.append({ + 'id': + t['label'], + 'text': + "%s (%s%s)" % (t['label'], t['count'], " left" if haveTags else "") + }) + + return {'tags': out} + + @bottle.route('//') def userSlash(user): bottle.redirect(siteRoot() + "/%s" % urllib.parse.quote(user)) + @bottle.route('/.json', method='GET') def userAllJson(user): data = recentLinks(user, [], allowEdit=getUser()[0] == user) data['toRoot'] = siteRoot() return json.dumps(data) - + + @bottle.route('/', method='GET') def userAll(user): return userLinks(user, "") - - + + @bottle.route('/', method='POST') def userAddLink(user): if getUser()[0] != user: raise ValueError("not logged in as %s" % user) print(repr(bottle.request.params.__dict__)) - doc = links.fromPostdata(bottle.request.params, - user, + doc = links.fromPostdata(bottle.request.params, user, datetime.datetime.now(tzlocal())) links.insertOrUpdate(doc) print("notify about sharing to", repr(doc['shareWith'])) - + bottle.redirect(siteRoot() + '/' + user) + def parseTags(tagComponent): # the %20 is coming from davis.js, not me :( return [_f for _f in tagComponent.replace("%20", "+").split('+') if _f] - + + @bottle.route('//.json') def userLinksJson(user, tags): tags = parseTags(tags) @@ -183,37 +214,45 @@ data['toRoot'] = siteRoot() return json.dumps(data) - + @bottle.route('//') def userLinks(user, tags): tags = parseTags(tags) log.info('userLinks user=%r tags=%r', user, tags) data = recentLinks(user, tags, allowEdit=getUser()[0] == user) data['loginBar'] = getLoginBar() - data['desc'] = ("%s's recent links" % user) + (" tagged %s" % (tags,) if tags else "") + data['desc'] = ("%s's recent links" % user) + (" tagged %s" % + (tags, ) if tags else "") data['toRoot'] = siteRoot() data['allTags'] = allTags(user) data['user'] = user data['showPrivateData'] = (user == getUser()[0]) - data['pageTags'] = [{"word":t} for t in tags] + data['pageTags'] = [{"word": t} for t in tags] data['stats']['template'] = 'TEMPLATETIME' return renderWithTime('links.jade', data) + @bottle.route('/templates') def templates(): return json.dumps({'linklist': renderer.load_template("linklist.jade")}) - + + @bottle.route('/') def root(): data = { 'loginBar': getLoginBar(), 'toRoot': siteRoot(), - 'stats': {'template': 'TEMPLATETIME'}, - 'users': [{'user':doc['username']} for doc in db['user'].find()], - } + 'stats': { + 'template': 'TEMPLATETIME' + }, + 'users': [{ + 'user': doc['username'] + } for doc in db['user'].find()], + } return renderWithTime('index.jade', data) - + + if __name__ == '__main__': logging.basicConfig(level=logging.INFO) bottle.run(server='gunicorn', host='0.0.0.0', port=10002, workers=4) diff -r 890584020372 -r e86642cf7393 pagetitle.py --- a/pagetitle.py Sun Jul 12 13:17:13 2020 -0700 +++ b/pagetitle.py Sun Jul 12 13:33:54 2020 -0700 @@ -1,43 +1,49 @@ import lxml.html.soupparser -import datetime, socket +import datetime from dateutil.tz import tzlocal import requests import traceback + class CantGetTitle(ValueError): pass + class PageTitle(object): def __init__(self, db): self.coll = db['pageTitle'] def getPageTitleNow(self, uri): try: - response = requests.get(uri, timeout=1, allow_redirects=True, - headers={ - 'user-agent': - 'link title checker - drewp@bigasterisk.com' - }) + response = requests.get( + uri, + timeout=1, + allow_redirects=True, + headers={ + 'user-agent': 'link title checker - drewp@bigasterisk.com' + }) if not str(response.status_code).startswith('2'): raise CantGetTitle("(got %s)" % response.status_code) - root = lxml.html.soupparser.fromstring( - response.text) + root = lxml.html.soupparser.fromstring(response.text) for title in root.cssselect("title"): return title.text except Exception: traceback.print_exc() raise CantGetTitle("(error requesting title from site)") - + def pageTitle(self, uri): """page title from our db or by getting a new load from the page""" - doc = self.coll.find_one({'_id' : uri}) + doc = self.coll.find_one({'_id': uri}) if doc is None: try: title = self.getPageTitleNow(uri) except CantGetTitle as e: return str(e) - doc = {'_id': uri, 'title' : title, - 'getTime':datetime.datetime.now(tzlocal())} + doc = { + '_id': uri, + 'title': title, + 'getTime': datetime.datetime.now(tzlocal()) + } self.coll.insert(doc, safe=True) return doc['title'] diff -r 890584020372 -r e86642cf7393 requirements.txt --- a/requirements.txt Sun Jul 12 13:17:13 2020 -0700 +++ b/requirements.txt Sun Jul 12 13:33:54 2020 -0700 @@ -0,0 +1,14 @@ +BeautifulSoup4 +CherryPy==3.2.2 +argparse==1.2.1 +bottle==0.11.6 +cssselect==0.7.1 +gunicorn==0.17.2 +http-parser==0.8.1 +pymongo==2.4.2 +pystache==0.5.3 +python-dateutil +six +pyjade==4.0.0 +lxml==4.5.2 +requests==2.24.0 \ No newline at end of file