changeset 19:5751ef191454

read from github into local graph
author drewp@bigasterisk.com
date Sun, 09 Jan 2022 16:02:08 -0800
parents 6f38aa08408d
children b59912649fc4
files deploy.yaml repo_github_status.py requirements.txt view/index.ts
diffstat 4 files changed, 218 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/deploy.yaml	Sun Jan 09 00:21:41 2022 -0800
+++ b/deploy.yaml	Sun Jan 09 16:02:08 2022 -0800
@@ -20,13 +20,15 @@
             - vite
             - --mode=dev
           ports:
-          - containerPort: 3000
-        # - name: reposync
-        #   image: bang5:5000/reposync_image
-        #   imagePullPolicy: "Always"
-        #   securityContext: {capabilities: {add: [SYS_PTRACE]}}
-        #   ports:
-        #   - containerPort: 10001
+            - containerPort: 3000
+        - name: github-status
+          image: bang5:5000/reposync_image
+          ports:
+            - containerPort: 8000
+          command:
+            - python3
+            - repo_github_status.py
+            - "-v"
         #   volumeMounts:
         #     - {name: my, mountPath: /my}
 
@@ -48,5 +50,6 @@
 spec:
   ports:
     - { port: 80, targetPort: 3000, name: http }
+    - { port: 8000, targetPort: 8000, name: localrepos }
   selector:
     app: reposync
--- a/repo_github_status.py	Sun Jan 09 00:21:41 2022 -0800
+++ b/repo_github_status.py	Sun Jan 09 16:02:08 2022 -0800
@@ -1,3 +1,205 @@
 """
 repos from config.yaml that are at github -> rdf data
-"""
\ No newline at end of file
+"""
+import datetime
+from pathlib import Path
+from typing import Set, Tuple
+
+import cyclone.web
+import docopt
+import treq
+from background_loop import loop_forever_async
+from dateutil.parser import parse
+from dateutil.tz import tzlocal
+from patchablegraph import (CycloneGraphEventsHandler, CycloneGraphHandler, PatchableGraph)
+from prometheus_client import Counter, Gauge
+from prometheus_client.exposition import generate_latest
+from prometheus_client.registry import REGISTRY
+from rdfdb.currentstategraphapi import CurrentStateGraphApi
+from rdfdb.patch import Patch
+from rdflib import RDF, Literal, Namespace, URIRef
+from rdflib.term import Identifier
+from ruamel.yaml import YAML
+from standardservice.logsetup import log, verboseLogging
+from twisted.internet import reactor
+from twisted.internet.defer import inlineCallbacks
+
+Quad = Tuple[Identifier, Identifier, Identifier, Identifier]
+Triple = Tuple[Identifier, Identifier, Identifier]
+
+githubOwner = 'drewp'
+
+EX = Namespace('http://example.com/')  # todo
+GITHUB_SYNC = Gauge('github_sync', 'syncs to github')
+GITHUB_CALLS = Counter('github_calls', 'http calls to github')
+
+
+# merge this into setToGraph
+def replaceContext(pg: PatchableGraph, ctx: URIRef, newTriples: Set[Triple]):
+    prevCtxStmts = set((s, p, o, g.identifier) for s, p, o, g in pg._graph.quads((None, None, None, ctx)))
+
+    currentStmts: Set[Quad] = set()
+    for tri in newTriples:
+        currentStmts.add(tri + (ctx,))
+
+    p = Patch(delQuads=prevCtxStmts.difference(currentStmts), addQuads=currentStmts.difference(prevCtxStmts))
+
+    pg.patch(p)
+
+
+class Metrics(cyclone.web.RequestHandler):
+
+    def get(self):
+        self.add_header('content-type', 'text/plain')
+        self.write(generate_latest(REGISTRY))
+
+
+class Index(cyclone.web.RequestHandler):
+
+    def get(self, *args):
+        self.add_header('content-type', 'text/html')
+        self.write('''<!DOCTYPE html>
+        <html>
+          <head>
+            <title>repo_github_status</title>
+          </head>
+          <body>
+            <a href="graph/localRepos">graph</a>
+          </body>
+        </html>''')
+
+
+@inlineCallbacks
+def updateOne(graph: PatchableGraph, repo: URIRef, ghrepo: URIRef, shortRepoName: str):
+    log.info(f'getting update from github for {repo}')
+
+    writeGhRepo(graph, repo, ghrepo)
+
+    commitsUrl = f'https://api.github.com/repos/{githubOwner}/{shortRepoName}/commits?per_page=1'
+    GITHUB_CALLS.inc()
+    resp = yield treq.get(commitsUrl,
+                          timeout=5,
+                          headers={
+                              'User-agent': 'reposync by github.com/drewp',
+                              'Accept': 'application/vnd.github.v3+json'
+                          })
+    ret = yield treq.json_content(resp)
+
+    if len(ret) < 1:
+        raise ValueError(f"no commits on {commitsUrl}")
+    log.info(f'{repo=} {ret[0]=}')
+
+    author = writeAuthor(graph, ret[0]['author'])
+    writeCommit(graph, ghrepo, ret[0], author)
+
+
+def writeGhRepo(graph, repo, ghrepo):
+    replaceContext(graph, URIRef(ghrepo + '/config'), {
+        (repo, EX['githubRepo'], ghrepo),
+        (ghrepo, RDF.type, EX['GithubRepo']),
+    })
+
+
+def writeCommit(graph, ghrepo, row, author):
+    new: Set[Triple] = set()
+
+    commit = row['commit']
+    latest = URIRef(commit['url'])
+    new.add((ghrepo, EX['latestCommit'], latest))
+
+    new.add((latest, RDF.type, EX['GithubCommit']))
+
+    t = parse(commit['author']['date']).astimezone(tzlocal()).isoformat()
+    new.add((latest, EX['created'], Literal(t)))
+
+    new.add((latest, EX['creator'], author))
+    new.add((author, EX['foafMail'], Literal(commit['committer']['email'])))
+    new.add((latest, EX['commitMessage'], Literal(commit['message'])))
+    new.add((latest, EX['sha'], Literal(row['sha'])))
+    for p in row['parents']:
+        new.add((latest, EX['parent'], Literal(p['url'])))
+
+    replaceContext(graph, ghrepo, new)
+
+
+def writeAuthor(graph: PatchableGraph, author: dict) -> URIRef:
+    uri = URIRef(author['url'])
+    replaceContext(
+        graph, uri, {
+            (uri, RDF.type, EX['GithubAuthor']),
+            (uri, EX['login'], Literal(author['login'])),
+            (uri, EX['avatar'], URIRef(author['avatar_url'])),
+        })
+    return uri
+
+
+@inlineCallbacks
+def update(graph, repos):
+    for shortRepoName in repos:
+        uri = URIRef(f'http://bigasterisk.com/repo/{shortRepoName}')
+        ghrepo = URIRef(uri + '/github')
+
+        now = datetime.datetime.now(tzlocal())
+        if lastReadBefore(graph, ghrepo, now, datetime.timedelta(hours=24)):
+            yield updateOne(graph, uri, ghrepo, shortRepoName)
+            graph.patchObject(EX['githubUpdates'], ghrepo, EX['lastRead'], newObject=Literal(now))
+
+
+def lastReadBefore(graph, ghrepo, now, ago):
+    with graph.currentState() as g:
+        lastRead = g.value(ghrepo, EX['lastRead'])
+    return lastRead is None or lastRead.toPython() < now - ago
+
+
+def githubRepoForPath(p: Path) -> str:
+    return p.name
+
+
+def main():
+    args = docopt.docopt('''
+Usage:
+  repo_github_status.py [options]
+
+Options:
+  -v, --verbose  more logging
+''')
+    verboseLogging(args['--verbose'])
+
+    yaml = YAML(typ='safe')
+    config = yaml.load(open('config.yaml'))
+    repos = [githubRepoForPath(Path(row['dir'])) for row in config['hg_repos'] if row['github']]
+
+    log.info(f'{repos=}')
+
+    class PG2(PatchableGraph, CurrentStateGraphApi):
+        pass
+
+    graph = PG2()
+
+    loop_forever_async(lambda first: update(graph, repos), 10, GITHUB_SYNC)
+
+    class Application(cyclone.web.Application):
+
+        def __init__(self):
+            handlers = [
+                (r"/()", Index),
+                (r'/graph/localRepos', CycloneGraphHandler, {
+                    'masterGraph': graph
+                }),
+                (r'/graph/localRepos/events', CycloneGraphEventsHandler, {
+                    'masterGraph': graph,
+                }),
+                (r'/metrics', Metrics),
+            ]
+            cyclone.web.Application.__init__(
+                self,
+                handlers,
+                debug=args['--verbose'],
+            )
+
+    reactor.listenTCP(8000, Application(), interface='::')
+    reactor.run()
+
+
+if __name__ == '__main__':
+    main()
--- a/requirements.txt	Sun Jan 09 00:21:41 2022 -0800
+++ b/requirements.txt	Sun Jan 09 16:02:08 2022 -0800
@@ -4,6 +4,7 @@
 prometheus_client==0.12.0
 pyopenssl
 python-dateutil==2.8.2
+rdflib==6.1.1
 requests==2.27.1
 ruamel.yaml==0.17.20
 treq==21.5.0
@@ -11,5 +12,8 @@
 tzlocal==4.1
 # PyGithub==1.14.2
 
+background_loop==0.3.0
 cycloneerr==0.4.0
+patchablegraph==0.19.0
+rdfdb==0.21.0
 standardservice==0.6.0
--- a/view/index.ts	Sun Jan 09 00:21:41 2022 -0800
+++ b/view/index.ts	Sun Jan 09 16:02:08 2022 -0800
@@ -31,7 +31,7 @@
   render() {
     return html`
       <h1>repo statuses yay</h1>
-      <streamed-graph url="/collector/graph/home" expanded="true"></streamed-graph>
+      <streamed-graph url="/collector/graph/reposync" expanded="true"></streamed-graph>
     `;
   }
 }