Mercurial > code > home > repos > gcalendarwatch
annotate ingest.py @ 45:e53a1bc87f99
cleanup and some fixes to starred event graph
author | drewp@bigasterisk.com |
---|---|
date | Thu, 06 Jun 2024 15:57:26 -0700 |
parents | 7d9609edcf9c |
children |
rev | line source |
---|---|
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
1 import logging |
38 | 2 import os |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
3 import re |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
4 from typing import Any, Dict, Iterable, List, Sequence, cast |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
5 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
6 import pymongo.collection |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
7 from dateutil.tz import tzlocal |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
8 from googleapiclient.discovery import Resource |
38 | 9 from googleapiclient.errors import HttpError |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
10 from patchablegraph import PatchableGraph |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
11 from rdflib import Namespace |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
12 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
13 from calendar_connection import getCalendarService |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
14 from datetimemath import dayRange, limitDays, parse |
38 | 15 from graphconvert import asGraph |
42
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
16 from localtypes import Conf, Record, feedFromCalId |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
17 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
18 log = logging.getLogger() |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
19 EV = Namespace("http://bigasterisk.com/event#") |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
20 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
21 |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
22 def _getFirstPageOfCalendars(service: Resource) -> Iterable[tuple[str, str | None, str | None]]: |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
23 for row in service.calendarList().list().execute()['items']: |
42
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
24 yield row['id'], row.get('summary'), row.get('description') |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
25 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
26 |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
27 def _recordFromEv(conf: Conf, calId: str, ev: Dict) -> Record: |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
28 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
29 def dateOrTime(d): |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
30 if 'date' in d: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
31 return d['date'] |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
32 return d['dateTime'] |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
33 |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
34 rec = { |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
35 'uri': conf['event_uri_ns'] + ev['id'], |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
36 'feed': feedFromCalId(conf, calId), |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
37 'title': ev.get('summary', '?'), |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
38 'start': dateOrTime(ev['start']), |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
39 'end': dateOrTime(ev['end']), |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
40 'endTimeUnspecified': ev.get('endTimeUnspecified', False), |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
41 'htmlLink': ev.get('htmlLink', ''), |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
42 'creatorEmail': ev.get('creator', {}).get('email', ''), |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
43 } |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
44 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
45 for field, val in [('start', ev['start']), ('end', ev['end'])]: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
46 if 'date' in val: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
47 rec['%sTime' % field] = parse(val['date']).replace(tzinfo=tzlocal()) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
48 rec['%sDate' % field] = val['date'] |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
49 else: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
50 rec['%sTime' % field] = parse(val['dateTime']) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
51 rec['%sDate' % field] = parse(val['dateTime']).date().isoformat() |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
52 return cast(Record, rec) |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
53 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
54 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
55 def filterStarred(recs: Sequence[Record], maxCount=15) -> List[Record]: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
56 recs = sorted(recs, key=lambda r: r['start']) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
57 out = [] |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
58 for rec in recs: |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
59 if m:=re.search(r'(.*)\*\s*$', rec['title']): |
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
60 rec = rec.copy() |
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
61 rec['title'] = m.group(1) |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
62 out.append(rec) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
63 if len(out) >= maxCount: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
64 break |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
65 return out |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
66 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
67 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
68 class SyncToMongo(object): |
42
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
69 """reads gcal, writes to mongodb and graphs""" |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
70 collection: pymongo.collection.Collection |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
71 |
42
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
72 def __init__(self, conf: Conf, collection: pymongo.collection.Collection, agendaGraph: PatchableGraph, countdownGraph: PatchableGraph): |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
73 self.conf = conf |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
74 self.service = getCalendarService() |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
75 self.collection = collection |
42
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
76 self.calendarsCollection = collection.database.get_collection('gcalendar_cals') |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
77 self.agendaGraph = agendaGraph |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
78 self.countdownGraph = countdownGraph |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
79 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
80 def update(self, days=10, cal=None) -> int: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
81 start, end = dayRange(days) |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
82 idsFormerlyInRange = self._clearByStartTime(cal, start, end) |
38 | 83 |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
84 totalNew, currentRecords = self._gatherNewEventsInRange(cal, start, end, idsFormerlyInRange) |
38 | 85 |
86 self.updateGraphs(currentRecords) | |
87 return totalNew | |
88 | |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
89 def _gatherNewEventsInRange(self, cal, start, end, idsFormerlyInRange): |
38 | 90 totalNew = 0 |
91 currentRecords = [] | |
92 try: | |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
93 cals = _getFirstPageOfCalendars(self.service) |
38 | 94 except HttpError: |
95 log.error('on getFirstPageOfCalendars') | |
96 os.abort() | |
42
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
97 for calId, summary, description in cals: |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
98 self.calendarsCollection.update_one({'_id': calId}, {'$set': { |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
99 'summary': summary, |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
100 'description': description, |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
101 }}, upsert=True) |
38 | 102 if cal and calId != cal: |
103 continue | |
104 try: | |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
105 self._updateOneCal(start, end, idsFormerlyInRange, totalNew, currentRecords, calId) |
38 | 106 except HttpError: |
107 log.error(f"on cal {calId}") | |
42
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
108 return totalNew, currentRecords |
38 | 109 |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
110 def _clearByStartTime(self, cal, start, end): |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
111 spec: Dict[str, Any] = {"startTime": {"$gte": start, "$lte": end}} |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
112 if cal is not None: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
113 spec['feed'] = feedFromCalId(self.conf, cal) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
114 idsFormerlyInRange = [doc['_id'] for doc in self.collection.find(spec)] |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
115 n = self.collection.delete_many(spec) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
116 log.info(f'cleared {n} records before reread') |
38 | 117 return idsFormerlyInRange |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
118 |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
119 def _updateOneCal(self, start, end, idsFormerlyInRange, totalNew, currentRecords, calId): |
38 | 120 print('read %s' % calId) |
121 events = self.service.events().list( | |
42
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
122 calendarId=calId, |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
123 singleEvents=True, |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
124 timeMin=start.isoformat(), |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
125 timeMax=end.isoformat(), |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
126 showDeleted=False, |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
127 maxResults=1000, |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
128 ).execute() |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
129 |
38 | 130 for ev in events['items']: |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
131 rec = _recordFromEv(self.conf, calId, ev) |
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
132 self._upsertMongo(rec) |
38 | 133 if rec['uri'] not in idsFormerlyInRange: |
134 totalNew += 1 | |
135 currentRecords.append(rec) | |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
136 |
45
e53a1bc87f99
cleanup and some fixes to starred event graph
drewp@bigasterisk.com
parents:
42
diff
changeset
|
137 def _upsertMongo(self, rec: Record) -> List[Record]: |
28
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
138 if self.collection.find_one({"_id": rec['uri']}) is not None: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
139 log.debug("existing record %s", rec['uri']) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
140 # this is not yet noticing updates |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
141 return [] |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
142 else: |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
143 log.debug("add record %s", rec) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
144 d = cast(Dict[str, Any], rec.copy()) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
145 d['_id'] = d.pop('uri') |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
146 self.collection.insert_one(d) |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
147 return [rec] |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
148 |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
149 def updateGraphs(self, currentRecords: Iterable[Record]): |
e2209226b001
rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff
changeset
|
150 currentRecords = list(currentRecords) |
42
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
151 cals = list(self.calendarsCollection.find()) |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
152 self.agendaGraph.setToGraph(asGraph(self.conf, cals, limitDays(currentRecords, days=2))) |
7d9609edcf9c
track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents:
38
diff
changeset
|
153 self.countdownGraph.setToGraph(asGraph(self.conf, cals, filterStarred(currentRecords, maxCount=15), extraClasses=[EV['CountdownEvent']])) |