Mercurial > code > home > repos > reposync
view repo_github_status.py @ 24:03803832a087 default tip
add view definition for streamed-graph viewer
author | drewp@bigasterisk.com |
---|---|
date | Tue, 29 Mar 2022 21:17:56 -0700 |
parents | cb71722bb75c |
children |
line wrap: on
line source
""" repos from config.yaml that are at github -> rdf data """ import datetime from pathlib import Path from typing import Set, Tuple import cyclone.web import docopt import treq from background_loop import loop_forever_async from dateutil.parser import parse from dateutil.tz import tzlocal from patchablegraph import (CycloneGraphEventsHandler, CycloneGraphHandler, PatchableGraph) from prometheus_client import Counter, Gauge from prometheus_client.exposition import generate_latest from prometheus_client.registry import REGISTRY from rdfdb.currentstategraphapi import CurrentStateGraphApi from rdfdb.patch import Patch from rdflib import RDF, Literal, Namespace, URIRef from rdflib.term import Identifier from ruamel.yaml import YAML from standardservice.logsetup import log, verboseLogging from twisted.internet import reactor from twisted.internet.defer import inlineCallbacks Quad = Tuple[Identifier, Identifier, Identifier, Identifier] Triple = Tuple[Identifier, Identifier, Identifier] githubOwner = 'drewp' EX = Namespace('http://example.com/') # todo GITHUB_SYNC = Gauge('github_sync', 'syncs to github') GITHUB_CALLS = Counter('github_calls', 'http calls to github') # merge this into setToGraph def replaceContext(pg: PatchableGraph, ctx: URIRef, newTriples: Set[Triple]): prevCtxStmts = set((s, p, o, g.identifier) for s, p, o, g in pg._graph.quads((None, None, None, ctx))) currentStmts: Set[Quad] = set() for tri in newTriples: currentStmts.add(tri + (ctx,)) p = Patch(delQuads=prevCtxStmts.difference(currentStmts), addQuads=currentStmts.difference(prevCtxStmts)) pg.patch(p) class Metrics(cyclone.web.RequestHandler): def get(self): self.add_header('content-type', 'text/plain') self.write(generate_latest(REGISTRY)) class Index(cyclone.web.RequestHandler): def get(self, *args): self.add_header('content-type', 'text/html') self.write('''<!DOCTYPE html> <html> <head> <title>repo_github_status</title> </head> <body> <a href="graph/localRepos">graph</a> </body> </html>''') @inlineCallbacks def updateOne(graph: PatchableGraph, repo: URIRef, ghrepo: URIRef, shortRepoName: str): log.info(f'getting update from github for {repo}') writeGhRepo(graph, repo, ghrepo) commitsUrl = f'https://api.github.com/repos/{githubOwner}/{shortRepoName}/commits?per_page=1' GITHUB_CALLS.inc() resp = yield treq.get(commitsUrl, timeout=5, headers={ 'User-agent': 'reposync by github.com/drewp', 'Accept': 'application/vnd.github.v3+json' }) ret = yield treq.json_content(resp) if len(ret) < 1: raise ValueError(f"no commits on {commitsUrl}") log.info(f'{repo=} {ret[0]=}') author = writeAuthor(graph, ret[0]['author']) writeCommit(graph, ghrepo, ret[0], author) def writeGhRepo(graph, repo, ghrepo): replaceContext(graph, URIRef(ghrepo + '/config'), { (repo, RDF.type, EX['Repo']), (repo, EX['githubRepo'], ghrepo), (ghrepo, RDF.type, EX['GithubRepo']), }) def writeCommit(graph, ghrepo, row, author): new: Set[Triple] = set() commit = row['commit'] latest = URIRef(commit['url']) new.add((ghrepo, EX['latestCommit'], latest)) new.add((latest, RDF.type, EX['GithubCommit'])) t = parse(commit['author']['date']).astimezone(tzlocal()).isoformat() new.add((latest, EX['created'], Literal(t))) new.add((latest, EX['creator'], author)) new.add((author, EX['foafMail'], Literal(commit['committer']['email']))) new.add((latest, EX['commitMessage'], Literal(commit['message']))) new.add((latest, EX['sha'], Literal(row['sha']))) for p in row['parents']: new.add((latest, EX['parent'], Literal(p['url']))) replaceContext(graph, ghrepo, new) def writeAuthor(graph: PatchableGraph, author: dict) -> URIRef: uri = URIRef(author['url']) replaceContext( graph, uri, { (uri, RDF.type, EX['GithubAuthor']), (uri, EX['login'], Literal(author['login'])), (uri, EX['avatar'], URIRef(author['avatar_url'])), }) return uri @inlineCallbacks def update(graph, repos): for shortRepoName in repos: uri = URIRef(f'http://bigasterisk.com/repo/{shortRepoName}') ghrepo = URIRef(uri + '/github') now = datetime.datetime.now(tzlocal()) if lastReadBefore(graph, ghrepo, now, datetime.timedelta(hours=24)): yield updateOne(graph, uri, ghrepo, shortRepoName) graph.patchObject(EX['githubUpdates'], ghrepo, EX['lastRead'], newObject=Literal(now)) def lastReadBefore(graph, ghrepo, now, ago): with graph.currentState() as g: lastRead = g.value(ghrepo, EX['lastRead']) return lastRead is None or lastRead.toPython() < now - ago def githubRepoForPath(p: Path) -> str: return p.name def main(): args = docopt.docopt(''' Usage: repo_github_status.py [options] Options: -v, --verbose more logging ''') verboseLogging(args['--verbose']) yaml = YAML(typ='safe') config = yaml.load(open('config.yaml')) repos = [githubRepoForPath(Path(row['dir'])) for row in config['hg_repos'] if row['github']] log.info(f'{repos=}') class PG2(PatchableGraph, CurrentStateGraphApi): pass graph = PG2() loop_forever_async(lambda first: update(graph, repos), 10, GITHUB_SYNC) class Application(cyclone.web.Application): def __init__(self): handlers = [ (r"/()", Index), (r'/graph/githubRepos', CycloneGraphHandler, { 'masterGraph': graph }), (r'/graph/githubRepos/events', CycloneGraphEventsHandler, { 'masterGraph': graph, }), (r'/metrics', Metrics), ] cyclone.web.Application.__init__( self, handlers, debug=args['--verbose'], ) reactor.listenTCP(8000, Application(), interface='::') reactor.run() if __name__ == '__main__': main()