Mercurial > code > home > repos > reposync
changeset 19:5751ef191454
read from github into local graph
author | drewp@bigasterisk.com |
---|---|
date | Sun, 09 Jan 2022 16:02:08 -0800 |
parents | 6f38aa08408d |
children | b59912649fc4 |
files | deploy.yaml repo_github_status.py requirements.txt view/index.ts |
diffstat | 4 files changed, 218 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/deploy.yaml Sun Jan 09 00:21:41 2022 -0800 +++ b/deploy.yaml Sun Jan 09 16:02:08 2022 -0800 @@ -20,13 +20,15 @@ - vite - --mode=dev ports: - - containerPort: 3000 - # - name: reposync - # image: bang5:5000/reposync_image - # imagePullPolicy: "Always" - # securityContext: {capabilities: {add: [SYS_PTRACE]}} - # ports: - # - containerPort: 10001 + - containerPort: 3000 + - name: github-status + image: bang5:5000/reposync_image + ports: + - containerPort: 8000 + command: + - python3 + - repo_github_status.py + - "-v" # volumeMounts: # - {name: my, mountPath: /my} @@ -48,5 +50,6 @@ spec: ports: - { port: 80, targetPort: 3000, name: http } + - { port: 8000, targetPort: 8000, name: localrepos } selector: app: reposync
--- a/repo_github_status.py Sun Jan 09 00:21:41 2022 -0800 +++ b/repo_github_status.py Sun Jan 09 16:02:08 2022 -0800 @@ -1,3 +1,205 @@ """ repos from config.yaml that are at github -> rdf data -""" \ No newline at end of file +""" +import datetime +from pathlib import Path +from typing import Set, Tuple + +import cyclone.web +import docopt +import treq +from background_loop import loop_forever_async +from dateutil.parser import parse +from dateutil.tz import tzlocal +from patchablegraph import (CycloneGraphEventsHandler, CycloneGraphHandler, PatchableGraph) +from prometheus_client import Counter, Gauge +from prometheus_client.exposition import generate_latest +from prometheus_client.registry import REGISTRY +from rdfdb.currentstategraphapi import CurrentStateGraphApi +from rdfdb.patch import Patch +from rdflib import RDF, Literal, Namespace, URIRef +from rdflib.term import Identifier +from ruamel.yaml import YAML +from standardservice.logsetup import log, verboseLogging +from twisted.internet import reactor +from twisted.internet.defer import inlineCallbacks + +Quad = Tuple[Identifier, Identifier, Identifier, Identifier] +Triple = Tuple[Identifier, Identifier, Identifier] + +githubOwner = 'drewp' + +EX = Namespace('http://example.com/') # todo +GITHUB_SYNC = Gauge('github_sync', 'syncs to github') +GITHUB_CALLS = Counter('github_calls', 'http calls to github') + + +# merge this into setToGraph +def replaceContext(pg: PatchableGraph, ctx: URIRef, newTriples: Set[Triple]): + prevCtxStmts = set((s, p, o, g.identifier) for s, p, o, g in pg._graph.quads((None, None, None, ctx))) + + currentStmts: Set[Quad] = set() + for tri in newTriples: + currentStmts.add(tri + (ctx,)) + + p = Patch(delQuads=prevCtxStmts.difference(currentStmts), addQuads=currentStmts.difference(prevCtxStmts)) + + pg.patch(p) + + +class Metrics(cyclone.web.RequestHandler): + + def get(self): + self.add_header('content-type', 'text/plain') + self.write(generate_latest(REGISTRY)) + + +class Index(cyclone.web.RequestHandler): + + def get(self, *args): + self.add_header('content-type', 'text/html') + self.write('''<!DOCTYPE html> + <html> + <head> + <title>repo_github_status</title> + </head> + <body> + <a href="graph/localRepos">graph</a> + </body> + </html>''') + + +@inlineCallbacks +def updateOne(graph: PatchableGraph, repo: URIRef, ghrepo: URIRef, shortRepoName: str): + log.info(f'getting update from github for {repo}') + + writeGhRepo(graph, repo, ghrepo) + + commitsUrl = f'https://api.github.com/repos/{githubOwner}/{shortRepoName}/commits?per_page=1' + GITHUB_CALLS.inc() + resp = yield treq.get(commitsUrl, + timeout=5, + headers={ + 'User-agent': 'reposync by github.com/drewp', + 'Accept': 'application/vnd.github.v3+json' + }) + ret = yield treq.json_content(resp) + + if len(ret) < 1: + raise ValueError(f"no commits on {commitsUrl}") + log.info(f'{repo=} {ret[0]=}') + + author = writeAuthor(graph, ret[0]['author']) + writeCommit(graph, ghrepo, ret[0], author) + + +def writeGhRepo(graph, repo, ghrepo): + replaceContext(graph, URIRef(ghrepo + '/config'), { + (repo, EX['githubRepo'], ghrepo), + (ghrepo, RDF.type, EX['GithubRepo']), + }) + + +def writeCommit(graph, ghrepo, row, author): + new: Set[Triple] = set() + + commit = row['commit'] + latest = URIRef(commit['url']) + new.add((ghrepo, EX['latestCommit'], latest)) + + new.add((latest, RDF.type, EX['GithubCommit'])) + + t = parse(commit['author']['date']).astimezone(tzlocal()).isoformat() + new.add((latest, EX['created'], Literal(t))) + + new.add((latest, EX['creator'], author)) + new.add((author, EX['foafMail'], Literal(commit['committer']['email']))) + new.add((latest, EX['commitMessage'], Literal(commit['message']))) + new.add((latest, EX['sha'], Literal(row['sha']))) + for p in row['parents']: + new.add((latest, EX['parent'], Literal(p['url']))) + + replaceContext(graph, ghrepo, new) + + +def writeAuthor(graph: PatchableGraph, author: dict) -> URIRef: + uri = URIRef(author['url']) + replaceContext( + graph, uri, { + (uri, RDF.type, EX['GithubAuthor']), + (uri, EX['login'], Literal(author['login'])), + (uri, EX['avatar'], URIRef(author['avatar_url'])), + }) + return uri + + +@inlineCallbacks +def update(graph, repos): + for shortRepoName in repos: + uri = URIRef(f'http://bigasterisk.com/repo/{shortRepoName}') + ghrepo = URIRef(uri + '/github') + + now = datetime.datetime.now(tzlocal()) + if lastReadBefore(graph, ghrepo, now, datetime.timedelta(hours=24)): + yield updateOne(graph, uri, ghrepo, shortRepoName) + graph.patchObject(EX['githubUpdates'], ghrepo, EX['lastRead'], newObject=Literal(now)) + + +def lastReadBefore(graph, ghrepo, now, ago): + with graph.currentState() as g: + lastRead = g.value(ghrepo, EX['lastRead']) + return lastRead is None or lastRead.toPython() < now - ago + + +def githubRepoForPath(p: Path) -> str: + return p.name + + +def main(): + args = docopt.docopt(''' +Usage: + repo_github_status.py [options] + +Options: + -v, --verbose more logging +''') + verboseLogging(args['--verbose']) + + yaml = YAML(typ='safe') + config = yaml.load(open('config.yaml')) + repos = [githubRepoForPath(Path(row['dir'])) for row in config['hg_repos'] if row['github']] + + log.info(f'{repos=}') + + class PG2(PatchableGraph, CurrentStateGraphApi): + pass + + graph = PG2() + + loop_forever_async(lambda first: update(graph, repos), 10, GITHUB_SYNC) + + class Application(cyclone.web.Application): + + def __init__(self): + handlers = [ + (r"/()", Index), + (r'/graph/localRepos', CycloneGraphHandler, { + 'masterGraph': graph + }), + (r'/graph/localRepos/events', CycloneGraphEventsHandler, { + 'masterGraph': graph, + }), + (r'/metrics', Metrics), + ] + cyclone.web.Application.__init__( + self, + handlers, + debug=args['--verbose'], + ) + + reactor.listenTCP(8000, Application(), interface='::') + reactor.run() + + +if __name__ == '__main__': + main()
--- a/requirements.txt Sun Jan 09 00:21:41 2022 -0800 +++ b/requirements.txt Sun Jan 09 16:02:08 2022 -0800 @@ -4,6 +4,7 @@ prometheus_client==0.12.0 pyopenssl python-dateutil==2.8.2 +rdflib==6.1.1 requests==2.27.1 ruamel.yaml==0.17.20 treq==21.5.0 @@ -11,5 +12,8 @@ tzlocal==4.1 # PyGithub==1.14.2 +background_loop==0.3.0 cycloneerr==0.4.0 +patchablegraph==0.19.0 +rdfdb==0.21.0 standardservice==0.6.0
--- a/view/index.ts Sun Jan 09 00:21:41 2022 -0800 +++ b/view/index.ts Sun Jan 09 16:02:08 2022 -0800 @@ -31,7 +31,7 @@ render() { return html` <h1>repo statuses yay</h1> - <streamed-graph url="/collector/graph/home" expanded="true"></streamed-graph> + <streamed-graph url="/collector/graph/reposync" expanded="true"></streamed-graph> `; } }