Mercurial > code > home > repos > reposync
annotate repo_github_status.py @ 20:b59912649fc4
rewrite local hg scanner
author | drewp@bigasterisk.com |
---|---|
date | Sun, 09 Jan 2022 20:47:57 -0800 |
parents | 5751ef191454 |
children | cb71722bb75c |
rev | line source |
---|---|
18
6f38aa08408d
starting over: make a web page that draws a streamed graph from collector, with plans for services to scrape the data that collector will subscribe to
drewp@bigasterisk.com
parents:
diff
changeset
|
1 """ |
6f38aa08408d
starting over: make a web page that draws a streamed graph from collector, with plans for services to scrape the data that collector will subscribe to
drewp@bigasterisk.com
parents:
diff
changeset
|
2 repos from config.yaml that are at github -> rdf data |
19 | 3 """ |
4 import datetime | |
5 from pathlib import Path | |
6 from typing import Set, Tuple | |
7 | |
8 import cyclone.web | |
9 import docopt | |
10 import treq | |
11 from background_loop import loop_forever_async | |
12 from dateutil.parser import parse | |
13 from dateutil.tz import tzlocal | |
14 from patchablegraph import (CycloneGraphEventsHandler, CycloneGraphHandler, PatchableGraph) | |
15 from prometheus_client import Counter, Gauge | |
16 from prometheus_client.exposition import generate_latest | |
17 from prometheus_client.registry import REGISTRY | |
18 from rdfdb.currentstategraphapi import CurrentStateGraphApi | |
19 from rdfdb.patch import Patch | |
20 from rdflib import RDF, Literal, Namespace, URIRef | |
21 from rdflib.term import Identifier | |
22 from ruamel.yaml import YAML | |
23 from standardservice.logsetup import log, verboseLogging | |
24 from twisted.internet import reactor | |
25 from twisted.internet.defer import inlineCallbacks | |
26 | |
27 Quad = Tuple[Identifier, Identifier, Identifier, Identifier] | |
28 Triple = Tuple[Identifier, Identifier, Identifier] | |
29 | |
30 githubOwner = 'drewp' | |
31 | |
32 EX = Namespace('http://example.com/') # todo | |
33 GITHUB_SYNC = Gauge('github_sync', 'syncs to github') | |
34 GITHUB_CALLS = Counter('github_calls', 'http calls to github') | |
35 | |
36 | |
37 # merge this into setToGraph | |
38 def replaceContext(pg: PatchableGraph, ctx: URIRef, newTriples: Set[Triple]): | |
39 prevCtxStmts = set((s, p, o, g.identifier) for s, p, o, g in pg._graph.quads((None, None, None, ctx))) | |
40 | |
41 currentStmts: Set[Quad] = set() | |
42 for tri in newTriples: | |
43 currentStmts.add(tri + (ctx,)) | |
44 | |
45 p = Patch(delQuads=prevCtxStmts.difference(currentStmts), addQuads=currentStmts.difference(prevCtxStmts)) | |
46 | |
47 pg.patch(p) | |
48 | |
49 | |
50 class Metrics(cyclone.web.RequestHandler): | |
51 | |
52 def get(self): | |
53 self.add_header('content-type', 'text/plain') | |
54 self.write(generate_latest(REGISTRY)) | |
55 | |
56 | |
57 class Index(cyclone.web.RequestHandler): | |
58 | |
59 def get(self, *args): | |
60 self.add_header('content-type', 'text/html') | |
61 self.write('''<!DOCTYPE html> | |
62 <html> | |
63 <head> | |
64 <title>repo_github_status</title> | |
65 </head> | |
66 <body> | |
67 <a href="graph/localRepos">graph</a> | |
68 </body> | |
69 </html>''') | |
70 | |
71 | |
72 @inlineCallbacks | |
73 def updateOne(graph: PatchableGraph, repo: URIRef, ghrepo: URIRef, shortRepoName: str): | |
74 log.info(f'getting update from github for {repo}') | |
75 | |
76 writeGhRepo(graph, repo, ghrepo) | |
77 | |
78 commitsUrl = f'https://api.github.com/repos/{githubOwner}/{shortRepoName}/commits?per_page=1' | |
79 GITHUB_CALLS.inc() | |
80 resp = yield treq.get(commitsUrl, | |
81 timeout=5, | |
82 headers={ | |
83 'User-agent': 'reposync by github.com/drewp', | |
84 'Accept': 'application/vnd.github.v3+json' | |
85 }) | |
86 ret = yield treq.json_content(resp) | |
87 | |
88 if len(ret) < 1: | |
89 raise ValueError(f"no commits on {commitsUrl}") | |
90 log.info(f'{repo=} {ret[0]=}') | |
91 | |
92 author = writeAuthor(graph, ret[0]['author']) | |
93 writeCommit(graph, ghrepo, ret[0], author) | |
94 | |
95 | |
96 def writeGhRepo(graph, repo, ghrepo): | |
97 replaceContext(graph, URIRef(ghrepo + '/config'), { | |
98 (repo, EX['githubRepo'], ghrepo), | |
99 (ghrepo, RDF.type, EX['GithubRepo']), | |
100 }) | |
101 | |
102 | |
103 def writeCommit(graph, ghrepo, row, author): | |
104 new: Set[Triple] = set() | |
105 | |
106 commit = row['commit'] | |
107 latest = URIRef(commit['url']) | |
108 new.add((ghrepo, EX['latestCommit'], latest)) | |
109 | |
110 new.add((latest, RDF.type, EX['GithubCommit'])) | |
111 | |
112 t = parse(commit['author']['date']).astimezone(tzlocal()).isoformat() | |
113 new.add((latest, EX['created'], Literal(t))) | |
114 | |
115 new.add((latest, EX['creator'], author)) | |
116 new.add((author, EX['foafMail'], Literal(commit['committer']['email']))) | |
117 new.add((latest, EX['commitMessage'], Literal(commit['message']))) | |
118 new.add((latest, EX['sha'], Literal(row['sha']))) | |
119 for p in row['parents']: | |
120 new.add((latest, EX['parent'], Literal(p['url']))) | |
121 | |
122 replaceContext(graph, ghrepo, new) | |
123 | |
124 | |
125 def writeAuthor(graph: PatchableGraph, author: dict) -> URIRef: | |
126 uri = URIRef(author['url']) | |
127 replaceContext( | |
128 graph, uri, { | |
129 (uri, RDF.type, EX['GithubAuthor']), | |
130 (uri, EX['login'], Literal(author['login'])), | |
131 (uri, EX['avatar'], URIRef(author['avatar_url'])), | |
132 }) | |
133 return uri | |
134 | |
135 | |
136 @inlineCallbacks | |
137 def update(graph, repos): | |
138 for shortRepoName in repos: | |
139 uri = URIRef(f'http://bigasterisk.com/repo/{shortRepoName}') | |
140 ghrepo = URIRef(uri + '/github') | |
141 | |
142 now = datetime.datetime.now(tzlocal()) | |
143 if lastReadBefore(graph, ghrepo, now, datetime.timedelta(hours=24)): | |
144 yield updateOne(graph, uri, ghrepo, shortRepoName) | |
145 graph.patchObject(EX['githubUpdates'], ghrepo, EX['lastRead'], newObject=Literal(now)) | |
146 | |
147 | |
148 def lastReadBefore(graph, ghrepo, now, ago): | |
149 with graph.currentState() as g: | |
150 lastRead = g.value(ghrepo, EX['lastRead']) | |
151 return lastRead is None or lastRead.toPython() < now - ago | |
152 | |
153 | |
154 def githubRepoForPath(p: Path) -> str: | |
155 return p.name | |
156 | |
157 | |
158 def main(): | |
159 args = docopt.docopt(''' | |
160 Usage: | |
161 repo_github_status.py [options] | |
162 | |
163 Options: | |
164 -v, --verbose more logging | |
165 ''') | |
166 verboseLogging(args['--verbose']) | |
167 | |
168 yaml = YAML(typ='safe') | |
169 config = yaml.load(open('config.yaml')) | |
170 repos = [githubRepoForPath(Path(row['dir'])) for row in config['hg_repos'] if row['github']] | |
171 | |
172 log.info(f'{repos=}') | |
173 | |
174 class PG2(PatchableGraph, CurrentStateGraphApi): | |
175 pass | |
176 | |
177 graph = PG2() | |
178 | |
179 loop_forever_async(lambda first: update(graph, repos), 10, GITHUB_SYNC) | |
180 | |
181 class Application(cyclone.web.Application): | |
182 | |
183 def __init__(self): | |
184 handlers = [ | |
185 (r"/()", Index), | |
20 | 186 (r'/graph/githubRepos', CycloneGraphHandler, { |
19 | 187 'masterGraph': graph |
188 }), | |
20 | 189 (r'/graph/githubRepos/events', CycloneGraphEventsHandler, { |
19 | 190 'masterGraph': graph, |
191 }), | |
192 (r'/metrics', Metrics), | |
193 ] | |
194 cyclone.web.Application.__init__( | |
195 self, | |
196 handlers, | |
197 debug=args['--verbose'], | |
198 ) | |
199 | |
200 reactor.listenTCP(8000, Application(), interface='::') | |
201 reactor.run() | |
202 | |
203 | |
204 if __name__ == '__main__': | |
205 main() |