annotate ingest.py @ 42:7d9609edcf9c

track calendar feed summary/description text and emit them in graphs
author drewp@bigasterisk.com
date Sun, 18 Feb 2024 12:34:53 -0800
parents d686e4a5b892
children e53a1bc87f99
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
1 import logging
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
2 import os
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
3 import re
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
4 from typing import Any, Dict, Iterable, List, Sequence, cast
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
5
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
6 import pymongo.collection
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
7 from dateutil.tz import tzlocal
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
8 from googleapiclient.discovery import Resource
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
9 from googleapiclient.errors import HttpError
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
10 from patchablegraph import PatchableGraph
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
11 from rdflib import Namespace
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
12
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
13 from calendar_connection import getCalendarService
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
14 from datetimemath import dayRange, limitDays, parse
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
15 from graphconvert import asGraph
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
16 from localtypes import Conf, Record, feedFromCalId
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
17
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
18 log = logging.getLogger()
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
19 EV = Namespace("http://bigasterisk.com/event#")
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
20
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
21
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
22
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
23
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
24 def getFirstPageOfCalendars(service: Resource) -> Iterable[tuple[str, str | None, str | None]]:
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
25 for row in service.calendarList().list().execute()['items']:
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
26 yield row['id'], row.get('summary'), row.get('description')
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
27
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
29 def recordFromEv(conf: Conf, calId: str, ev: Dict) -> Record:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
30
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
31 def dateOrTime(d):
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
32 if 'date' in d:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
33 return d['date']
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
34 return d['dateTime']
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
35
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
36 rec= {
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
37 'uri': conf['event_uri_ns'] + ev['id'],
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
38 'feed': feedFromCalId(conf, calId),
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
39 'title': ev.get('summary', '?'),
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
40 'start': dateOrTime(ev['start']),
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
41 'end': dateOrTime(ev['end']),
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
42 'endTimeUnspecified': ev.get('endTimeUnspecified', False),
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
43 'htmlLink': ev.get('htmlLink', ''),
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
44 'creatorEmail': ev.get('creator', {}).get('email', ''),
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
45 }
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
46
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
47 for field, val in [('start', ev['start']), ('end', ev['end'])]:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
48 if 'date' in val:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
49 rec['%sTime' % field] = parse(val['date']).replace(tzinfo=tzlocal())
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
50 rec['%sDate' % field] = val['date']
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
51 else:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
52 rec['%sTime' % field] = parse(val['dateTime'])
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
53 rec['%sDate' % field] = parse(val['dateTime']).date().isoformat()
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
54 return rec
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
55
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
56
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
57 def filterStarred(recs: Sequence[Record], maxCount=15) -> List[Record]:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
58 recs = sorted(recs, key=lambda r: r['start'])
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
59 out = []
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
60 for rec in recs:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
61 if re.search(r'(.*)\*\s*$', rec['title']):
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
62 out.append(rec)
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
63 if len(out) >= maxCount:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
64 break
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
65 return out
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
66
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
67
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
68 class SyncToMongo(object):
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
69 """reads gcal, writes to mongodb and graphs"""
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
70 collection: pymongo.collection.Collection
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
71
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
72 def __init__(self, conf: Conf, collection: pymongo.collection.Collection, agendaGraph: PatchableGraph, countdownGraph: PatchableGraph):
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
73 self.conf = conf
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
74 self.service = getCalendarService()
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
75 self.collection = collection
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
76 self.calendarsCollection = collection.database.get_collection('gcalendar_cals')
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
77 self.agendaGraph = agendaGraph
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
78 self.countdownGraph = countdownGraph
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
79
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
80 def update(self, days=10, cal=None) -> int:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
81 start, end = dayRange(days)
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
82 idsFormerlyInRange = self.clearByStartTime(cal, start, end)
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
83
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
84 totalNew, currentRecords = self.gatherNewEventsInRange(cal, start, end, idsFormerlyInRange)
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
85
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
86 self.updateGraphs(currentRecords)
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
87 return totalNew
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
88
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
89 def gatherNewEventsInRange(self, cal, start, end, idsFormerlyInRange):
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
90 totalNew = 0
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
91 currentRecords = []
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
92 try:
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
93 cals = getFirstPageOfCalendars(self.service)
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
94 except HttpError:
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
95 log.error('on getFirstPageOfCalendars')
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
96 os.abort()
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
97 for calId, summary, description in cals:
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
98 self.calendarsCollection.update_one({'_id': calId}, {'$set': {
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
99 'summary': summary,
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
100 'description': description,
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
101 }}, upsert=True)
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
102 if cal and calId != cal:
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
103 continue
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
104 try:
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
105 self.updateOneCal(start, end, idsFormerlyInRange, totalNew, currentRecords, calId)
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
106 except HttpError:
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
107 log.error(f"on cal {calId}")
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
108 return totalNew, currentRecords
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
109
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
110 def clearByStartTime(self, cal, start, end):
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
111 spec: Dict[str, Any] = {"startTime": {"$gte": start, "$lte": end}}
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
112 if cal is not None:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
113 spec['feed'] = feedFromCalId(self.conf, cal)
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
114 idsFormerlyInRange = [doc['_id'] for doc in self.collection.find(spec)]
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
115 n = self.collection.delete_many(spec)
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
116 log.info(f'cleared {n} records before reread')
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
117 return idsFormerlyInRange
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
118
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
119 def updateOneCal(self, start, end, idsFormerlyInRange, totalNew, currentRecords, calId):
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
120 print('read %s' % calId)
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
121 events = self.service.events().list(
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
122 calendarId=calId,
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
123 singleEvents=True,
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
124 timeMin=start.isoformat(),
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
125 timeMax=end.isoformat(),
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
126 showDeleted=False,
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
127 maxResults=1000,
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
128 ).execute()
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
129
38
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
130 for ev in events['items']:
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
131 rec = recordFromEv(self.conf, calId, ev)
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
132 self.upsertMongo(rec)
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
133 if rec['uri'] not in idsFormerlyInRange:
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
134 totalNew += 1
d686e4a5b892 refactor; attempt clearer errors
drewp@bigasterisk.com
parents: 28
diff changeset
135 currentRecords.append(rec)
28
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
136
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
137 def upsertMongo(self, rec: Record) -> List[Record]:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
138 if self.collection.find_one({"_id": rec['uri']}) is not None:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
139 log.debug("existing record %s", rec['uri'])
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
140 # this is not yet noticing updates
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
141 return []
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
142 else:
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
143 log.debug("add record %s", rec)
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
144 d = cast(Dict[str, Any], rec.copy())
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
145 d['_id'] = d.pop('uri')
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
146 self.collection.insert_one(d)
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
147 return [rec]
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
148
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
149 def updateGraphs(self, currentRecords: Iterable[Record]):
e2209226b001 rewrite with starlette and background_loop
drewp@bigasterisk.com
parents:
diff changeset
150 currentRecords = list(currentRecords)
42
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
151 cals = list(self.calendarsCollection.find())
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
152 self.agendaGraph.setToGraph(asGraph(self.conf, cals, limitDays(currentRecords, days=2)))
7d9609edcf9c track calendar feed summary/description text and emit them in graphs
drewp@bigasterisk.com
parents: 38
diff changeset
153 self.countdownGraph.setToGraph(asGraph(self.conf, cals, filterStarred(currentRecords, maxCount=15), extraClasses=[EV['CountdownEvent']]))