Mercurial > code > home > repos > href
view delicious2mongo @ 8:be339aa223bf
tag/filter autocompelte
Ignore-this: f6235de879c1ce62a8fc41e890edd0d2
author | Drew Perttula <drewp@bigasterisk.com> |
---|---|
date | Wed, 06 Mar 2013 23:22:24 -0800 |
parents | 7cecda055fae |
children | 8a5feb8d383d |
line wrap: on
line source
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Python script to read from delicious.com into mongodb. Usage: python delicious2mongo --mongo localhost --login user:password """ import datetime, optparse, requests from requests.auth import HTTPBasicAuth from dateutil.tz import tzutc from dateutil.parser import parse import pymongo import lxml.objectify def addPost(db, user, xmlNode): x = xmlNode out = dict( user=user, description=x.get('description'), extended=x.get('extended'), href=x.get('href'), private=x.get('private') == 'yes', shared=x.get('shared') == 'yes', tag=x.get('tag'), t=parse(x.get('time')), ) forUsers = [] tags = [] for t in x.get('tag').split(' '): if t.startswith('for:'): forUsers.append(t[4:]) else: tags.append(t) out['extracted'] = dict(tags=tags, forUsers=forUsers) db['links'].update(dict(href=out['href']), out, upsert=True) parser = optparse.OptionParser() parser.add_option("--mongo", help="host") parser.add_option("--login", help="user:passwd") opts, args = parser.parse_args() if __name__ == '__main__': conn = pymongo.Connection(opts.mongo) db = conn['href'] offset = 0 knownTotal = '?' while True: # https://delicious.com/developers/fetchall print "get [%s/%s]" % (offset, knownTotal) resp = requests.get("https://api.delicious.com/v1/posts/all", params={'start' : offset}, auth=tuple(opts.login.split(':'))) posts = lxml.objectify.fromstring(resp.content) user = posts.get('user') knownTotal = posts.get('total') children = posts.getchildren() if not children: break for post in children: addPost(db, user, post) offset += len(children)