Mercurial > code > home > repos > href
diff pagetitle.py @ 41:293a694304b8
reformat
author | drewp@bigasterisk.com |
---|---|
date | Sat, 19 Nov 2022 17:18:55 -0800 |
parents | 94181d521d6d |
children |
line wrap: on
line diff
--- a/pagetitle.py Sat Nov 19 17:07:10 2022 -0800 +++ b/pagetitle.py Sat Nov 19 17:18:55 2022 -0800 @@ -1,8 +1,9 @@ +import datetime +import traceback + import lxml.html.soupparser -import datetime +import requests from dateutil.tz import tzlocal -import requests -import traceback class CantGetTitle(ValueError): @@ -10,18 +11,16 @@ class PageTitle(object): + def __init__(self, db): self.coll = db['pageTitle'] def getPageTitleNow(self, uri): try: - response = requests.get( - uri, - timeout=3, - allow_redirects=True, - headers={ - 'user-agent': 'link title checker - drewp@bigasterisk.com' - }) + response = requests.get(uri, + timeout=3, + allow_redirects=True, + headers={'user-agent': 'link title checker - drewp@bigasterisk.com'}) if not str(response.status_code).startswith('2'): raise CantGetTitle("(got %s)" % response.status_code) root = lxml.html.soupparser.fromstring(response.text) @@ -40,10 +39,6 @@ title = self.getPageTitleNow(uri) except CantGetTitle as e: return str(e) - doc = { - '_id': uri, - 'title': title, - 'getTime': datetime.datetime.now(tzlocal()) - } + doc = {'_id': uri, 'title': title, 'getTime': datetime.datetime.now(tzlocal())} self.coll.insert(doc) return doc['title']