comparison repo_github_status.py @ 19:5751ef191454

read from github into local graph
author drewp@bigasterisk.com
date Sun, 09 Jan 2022 16:02:08 -0800
parents 6f38aa08408d
children b59912649fc4
comparison
equal deleted inserted replaced
18:6f38aa08408d 19:5751ef191454
1 """ 1 """
2 repos from config.yaml that are at github -> rdf data 2 repos from config.yaml that are at github -> rdf data
3 """ 3 """
4 import datetime
5 from pathlib import Path
6 from typing import Set, Tuple
7
8 import cyclone.web
9 import docopt
10 import treq
11 from background_loop import loop_forever_async
12 from dateutil.parser import parse
13 from dateutil.tz import tzlocal
14 from patchablegraph import (CycloneGraphEventsHandler, CycloneGraphHandler, PatchableGraph)
15 from prometheus_client import Counter, Gauge
16 from prometheus_client.exposition import generate_latest
17 from prometheus_client.registry import REGISTRY
18 from rdfdb.currentstategraphapi import CurrentStateGraphApi
19 from rdfdb.patch import Patch
20 from rdflib import RDF, Literal, Namespace, URIRef
21 from rdflib.term import Identifier
22 from ruamel.yaml import YAML
23 from standardservice.logsetup import log, verboseLogging
24 from twisted.internet import reactor
25 from twisted.internet.defer import inlineCallbacks
26
27 Quad = Tuple[Identifier, Identifier, Identifier, Identifier]
28 Triple = Tuple[Identifier, Identifier, Identifier]
29
30 githubOwner = 'drewp'
31
32 EX = Namespace('http://example.com/') # todo
33 GITHUB_SYNC = Gauge('github_sync', 'syncs to github')
34 GITHUB_CALLS = Counter('github_calls', 'http calls to github')
35
36
37 # merge this into setToGraph
38 def replaceContext(pg: PatchableGraph, ctx: URIRef, newTriples: Set[Triple]):
39 prevCtxStmts = set((s, p, o, g.identifier) for s, p, o, g in pg._graph.quads((None, None, None, ctx)))
40
41 currentStmts: Set[Quad] = set()
42 for tri in newTriples:
43 currentStmts.add(tri + (ctx,))
44
45 p = Patch(delQuads=prevCtxStmts.difference(currentStmts), addQuads=currentStmts.difference(prevCtxStmts))
46
47 pg.patch(p)
48
49
50 class Metrics(cyclone.web.RequestHandler):
51
52 def get(self):
53 self.add_header('content-type', 'text/plain')
54 self.write(generate_latest(REGISTRY))
55
56
57 class Index(cyclone.web.RequestHandler):
58
59 def get(self, *args):
60 self.add_header('content-type', 'text/html')
61 self.write('''<!DOCTYPE html>
62 <html>
63 <head>
64 <title>repo_github_status</title>
65 </head>
66 <body>
67 <a href="graph/localRepos">graph</a>
68 </body>
69 </html>''')
70
71
72 @inlineCallbacks
73 def updateOne(graph: PatchableGraph, repo: URIRef, ghrepo: URIRef, shortRepoName: str):
74 log.info(f'getting update from github for {repo}')
75
76 writeGhRepo(graph, repo, ghrepo)
77
78 commitsUrl = f'https://api.github.com/repos/{githubOwner}/{shortRepoName}/commits?per_page=1'
79 GITHUB_CALLS.inc()
80 resp = yield treq.get(commitsUrl,
81 timeout=5,
82 headers={
83 'User-agent': 'reposync by github.com/drewp',
84 'Accept': 'application/vnd.github.v3+json'
85 })
86 ret = yield treq.json_content(resp)
87
88 if len(ret) < 1:
89 raise ValueError(f"no commits on {commitsUrl}")
90 log.info(f'{repo=} {ret[0]=}')
91
92 author = writeAuthor(graph, ret[0]['author'])
93 writeCommit(graph, ghrepo, ret[0], author)
94
95
96 def writeGhRepo(graph, repo, ghrepo):
97 replaceContext(graph, URIRef(ghrepo + '/config'), {
98 (repo, EX['githubRepo'], ghrepo),
99 (ghrepo, RDF.type, EX['GithubRepo']),
100 })
101
102
103 def writeCommit(graph, ghrepo, row, author):
104 new: Set[Triple] = set()
105
106 commit = row['commit']
107 latest = URIRef(commit['url'])
108 new.add((ghrepo, EX['latestCommit'], latest))
109
110 new.add((latest, RDF.type, EX['GithubCommit']))
111
112 t = parse(commit['author']['date']).astimezone(tzlocal()).isoformat()
113 new.add((latest, EX['created'], Literal(t)))
114
115 new.add((latest, EX['creator'], author))
116 new.add((author, EX['foafMail'], Literal(commit['committer']['email'])))
117 new.add((latest, EX['commitMessage'], Literal(commit['message'])))
118 new.add((latest, EX['sha'], Literal(row['sha'])))
119 for p in row['parents']:
120 new.add((latest, EX['parent'], Literal(p['url'])))
121
122 replaceContext(graph, ghrepo, new)
123
124
125 def writeAuthor(graph: PatchableGraph, author: dict) -> URIRef:
126 uri = URIRef(author['url'])
127 replaceContext(
128 graph, uri, {
129 (uri, RDF.type, EX['GithubAuthor']),
130 (uri, EX['login'], Literal(author['login'])),
131 (uri, EX['avatar'], URIRef(author['avatar_url'])),
132 })
133 return uri
134
135
136 @inlineCallbacks
137 def update(graph, repos):
138 for shortRepoName in repos:
139 uri = URIRef(f'http://bigasterisk.com/repo/{shortRepoName}')
140 ghrepo = URIRef(uri + '/github')
141
142 now = datetime.datetime.now(tzlocal())
143 if lastReadBefore(graph, ghrepo, now, datetime.timedelta(hours=24)):
144 yield updateOne(graph, uri, ghrepo, shortRepoName)
145 graph.patchObject(EX['githubUpdates'], ghrepo, EX['lastRead'], newObject=Literal(now))
146
147
148 def lastReadBefore(graph, ghrepo, now, ago):
149 with graph.currentState() as g:
150 lastRead = g.value(ghrepo, EX['lastRead'])
151 return lastRead is None or lastRead.toPython() < now - ago
152
153
154 def githubRepoForPath(p: Path) -> str:
155 return p.name
156
157
158 def main():
159 args = docopt.docopt('''
160 Usage:
161 repo_github_status.py [options]
162
163 Options:
164 -v, --verbose more logging
165 ''')
166 verboseLogging(args['--verbose'])
167
168 yaml = YAML(typ='safe')
169 config = yaml.load(open('config.yaml'))
170 repos = [githubRepoForPath(Path(row['dir'])) for row in config['hg_repos'] if row['github']]
171
172 log.info(f'{repos=}')
173
174 class PG2(PatchableGraph, CurrentStateGraphApi):
175 pass
176
177 graph = PG2()
178
179 loop_forever_async(lambda first: update(graph, repos), 10, GITHUB_SYNC)
180
181 class Application(cyclone.web.Application):
182
183 def __init__(self):
184 handlers = [
185 (r"/()", Index),
186 (r'/graph/localRepos', CycloneGraphHandler, {
187 'masterGraph': graph
188 }),
189 (r'/graph/localRepos/events', CycloneGraphEventsHandler, {
190 'masterGraph': graph,
191 }),
192 (r'/metrics', Metrics),
193 ]
194 cyclone.web.Application.__init__(
195 self,
196 handlers,
197 debug=args['--verbose'],
198 )
199
200 reactor.listenTCP(8000, Application(), interface='::')
201 reactor.run()
202
203
204 if __name__ == '__main__':
205 main()