view repo_github_status.py @ 20:b59912649fc4

rewrite local hg scanner
author drewp@bigasterisk.com
date Sun, 09 Jan 2022 20:47:57 -0800
parents 5751ef191454
children cb71722bb75c
line wrap: on
line source

"""
repos from config.yaml that are at github -> rdf data
"""
import datetime
from pathlib import Path
from typing import Set, Tuple

import cyclone.web
import docopt
import treq
from background_loop import loop_forever_async
from dateutil.parser import parse
from dateutil.tz import tzlocal
from patchablegraph import (CycloneGraphEventsHandler, CycloneGraphHandler, PatchableGraph)
from prometheus_client import Counter, Gauge
from prometheus_client.exposition import generate_latest
from prometheus_client.registry import REGISTRY
from rdfdb.currentstategraphapi import CurrentStateGraphApi
from rdfdb.patch import Patch
from rdflib import RDF, Literal, Namespace, URIRef
from rdflib.term import Identifier
from ruamel.yaml import YAML
from standardservice.logsetup import log, verboseLogging
from twisted.internet import reactor
from twisted.internet.defer import inlineCallbacks

Quad = Tuple[Identifier, Identifier, Identifier, Identifier]
Triple = Tuple[Identifier, Identifier, Identifier]

githubOwner = 'drewp'

EX = Namespace('http://example.com/')  # todo
GITHUB_SYNC = Gauge('github_sync', 'syncs to github')
GITHUB_CALLS = Counter('github_calls', 'http calls to github')


# merge this into setToGraph
def replaceContext(pg: PatchableGraph, ctx: URIRef, newTriples: Set[Triple]):
    prevCtxStmts = set((s, p, o, g.identifier) for s, p, o, g in pg._graph.quads((None, None, None, ctx)))

    currentStmts: Set[Quad] = set()
    for tri in newTriples:
        currentStmts.add(tri + (ctx,))

    p = Patch(delQuads=prevCtxStmts.difference(currentStmts), addQuads=currentStmts.difference(prevCtxStmts))

    pg.patch(p)


class Metrics(cyclone.web.RequestHandler):

    def get(self):
        self.add_header('content-type', 'text/plain')
        self.write(generate_latest(REGISTRY))


class Index(cyclone.web.RequestHandler):

    def get(self, *args):
        self.add_header('content-type', 'text/html')
        self.write('''<!DOCTYPE html>
        <html>
          <head>
            <title>repo_github_status</title>
          </head>
          <body>
            <a href="graph/localRepos">graph</a>
          </body>
        </html>''')


@inlineCallbacks
def updateOne(graph: PatchableGraph, repo: URIRef, ghrepo: URIRef, shortRepoName: str):
    log.info(f'getting update from github for {repo}')

    writeGhRepo(graph, repo, ghrepo)

    commitsUrl = f'https://api.github.com/repos/{githubOwner}/{shortRepoName}/commits?per_page=1'
    GITHUB_CALLS.inc()
    resp = yield treq.get(commitsUrl,
                          timeout=5,
                          headers={
                              'User-agent': 'reposync by github.com/drewp',
                              'Accept': 'application/vnd.github.v3+json'
                          })
    ret = yield treq.json_content(resp)

    if len(ret) < 1:
        raise ValueError(f"no commits on {commitsUrl}")
    log.info(f'{repo=} {ret[0]=}')

    author = writeAuthor(graph, ret[0]['author'])
    writeCommit(graph, ghrepo, ret[0], author)


def writeGhRepo(graph, repo, ghrepo):
    replaceContext(graph, URIRef(ghrepo + '/config'), {
        (repo, EX['githubRepo'], ghrepo),
        (ghrepo, RDF.type, EX['GithubRepo']),
    })


def writeCommit(graph, ghrepo, row, author):
    new: Set[Triple] = set()

    commit = row['commit']
    latest = URIRef(commit['url'])
    new.add((ghrepo, EX['latestCommit'], latest))

    new.add((latest, RDF.type, EX['GithubCommit']))

    t = parse(commit['author']['date']).astimezone(tzlocal()).isoformat()
    new.add((latest, EX['created'], Literal(t)))

    new.add((latest, EX['creator'], author))
    new.add((author, EX['foafMail'], Literal(commit['committer']['email'])))
    new.add((latest, EX['commitMessage'], Literal(commit['message'])))
    new.add((latest, EX['sha'], Literal(row['sha'])))
    for p in row['parents']:
        new.add((latest, EX['parent'], Literal(p['url'])))

    replaceContext(graph, ghrepo, new)


def writeAuthor(graph: PatchableGraph, author: dict) -> URIRef:
    uri = URIRef(author['url'])
    replaceContext(
        graph, uri, {
            (uri, RDF.type, EX['GithubAuthor']),
            (uri, EX['login'], Literal(author['login'])),
            (uri, EX['avatar'], URIRef(author['avatar_url'])),
        })
    return uri


@inlineCallbacks
def update(graph, repos):
    for shortRepoName in repos:
        uri = URIRef(f'http://bigasterisk.com/repo/{shortRepoName}')
        ghrepo = URIRef(uri + '/github')

        now = datetime.datetime.now(tzlocal())
        if lastReadBefore(graph, ghrepo, now, datetime.timedelta(hours=24)):
            yield updateOne(graph, uri, ghrepo, shortRepoName)
            graph.patchObject(EX['githubUpdates'], ghrepo, EX['lastRead'], newObject=Literal(now))


def lastReadBefore(graph, ghrepo, now, ago):
    with graph.currentState() as g:
        lastRead = g.value(ghrepo, EX['lastRead'])
    return lastRead is None or lastRead.toPython() < now - ago


def githubRepoForPath(p: Path) -> str:
    return p.name


def main():
    args = docopt.docopt('''
Usage:
  repo_github_status.py [options]

Options:
  -v, --verbose  more logging
''')
    verboseLogging(args['--verbose'])

    yaml = YAML(typ='safe')
    config = yaml.load(open('config.yaml'))
    repos = [githubRepoForPath(Path(row['dir'])) for row in config['hg_repos'] if row['github']]

    log.info(f'{repos=}')

    class PG2(PatchableGraph, CurrentStateGraphApi):
        pass

    graph = PG2()

    loop_forever_async(lambda first: update(graph, repos), 10, GITHUB_SYNC)

    class Application(cyclone.web.Application):

        def __init__(self):
            handlers = [
                (r"/()", Index),
                (r'/graph/githubRepos', CycloneGraphHandler, {
                    'masterGraph': graph
                }),
                (r'/graph/githubRepos/events', CycloneGraphEventsHandler, {
                    'masterGraph': graph,
                }),
                (r'/metrics', Metrics),
            ]
            cyclone.web.Application.__init__(
                self,
                handlers,
                debug=args['--verbose'],
            )

    reactor.listenTCP(8000, Application(), interface='::')
    reactor.run()


if __name__ == '__main__':
    main()