changeset 36:ed16fdbb3996

rewrite WIP. scan fs separately; store in db. thumbs are broken for now
author drewp@bigasterisk.com
date Tue, 03 Dec 2024 00:08:22 -0800
parents 814bda860dda
children 7cacfae58430
files deploy.yaml ingest.py serve-files.js skaffold.yaml src/VideoPage.ts video.py video_file_store.py volumes.yaml
diffstat 8 files changed, 190 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/deploy.yaml	Mon Dec 02 23:27:59 2024 -0800
+++ b/deploy.yaml	Tue Dec 03 00:08:22 2024 -0800
@@ -3,7 +3,7 @@
 metadata:
   name: video-files
 spec:
-  replicas: 2
+  replicas: 1
   selector:
     matchLabels:
       app: video-files
@@ -16,10 +16,20 @@
         prometheus.io/port: "8004"
     spec:
       volumes:
-        - name: data
-          persistentVolumeClaim:
-            claimName: video-data
+        - { name: video-data-download, persistentVolumeClaim: { claimName: video-data-download } }
+        - { name: video-data-src, persistentVolumeClaim: { claimName: video-data-src } }
       containers:
+        - name: ingest
+          image: reg:5000/video_image
+          command:
+            - pdm
+            - run
+            - python
+            - ingest.py
+          volumeMounts:
+            - { name: video-data-download, mountPath: /data/video-download }
+            - { name: video-data-src, mountPath: /data/video-src }
+
         - name: files
           image: reg:5000/video_image
           # alternate: [ "webfsd", "-Fp", "9054", "-r", "/vids/" ]
@@ -31,7 +41,8 @@
           ports:
             - containerPort: 8003
           volumeMounts:
-            - { name: data, mountPath: /data }
+            - { name: video-data-download, mountPath: /data/video-download }
+            - { name: video-data-src, mountPath: /data/video-src }
 
         - name: api
           image: reg:5000/video_image
@@ -42,8 +53,6 @@
             - video.py
           ports:
             - containerPort: 8004
-          volumeMounts:
-            - { name: data, mountPath: /data }
           resources:
             requests:
               cpu: "2"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ingest.py	Tue Dec 03 00:08:22 2024 -0800
@@ -0,0 +1,106 @@
+"""keep db representing our data files
+
+collection fs:
+  diskPath   # what you ffprobe (or a directory)
+  webRelPath # what comes after /video/ for this file's page (no ext, no source)
+  webDataPath   # what comes after /video/files/ for this file's content (yes ext, yes source)
+  label      # what we show as the title
+  mtime
+
+collection thumb:
+  diskPath
+  thumbData
+
+collection probe:
+  diskPath
+  durationSec
+"""
+
+import logging
+from pathlib import Path
+import re
+import time
+
+import pymongo
+import pymongo.database
+import pymongo.collection
+from mongo_required import open_mongo_or_die
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger()
+VIDEO_EXTNS = [".mp4", ".mkv", ".webm"]
+
+sources = [
+    # These get overlaid by most of the FE.
+    Path("/data/video-src"),
+    Path("/data/video-download")
+]
+db = open_mongo_or_die().get_database('video')
+
+
+def _updateOneFile(p: Path, fs: pymongo.collection.Collection, source: Path):
+    key = str(p)
+    if fs.find_one({'diskPath': key}):
+        return
+
+    rel = p.relative_to(source)
+    label = re.sub(r'\s*\[.*?\]\s*', '', p.stem)
+
+    doc = {
+        'type': 'file',
+        'diskPath': key,
+        'webRelPath': str(rel.with_suffix('')),
+        'webRelParent': str(rel.parent),
+        'webDataPath': key[len('/data/'):],
+        'label': label,
+        'mtime': p.stat().st_mtime,
+    }
+    log.info(f'new file: {doc=}')
+    fs.insert_one(doc)
+
+
+def _updateOneDir(p: Path, fs: pymongo.collection.Collection, source: Path):
+    key = str(p)
+
+    if fs.find_one({'diskPath': key}):
+        return
+
+    rel = p.relative_to(source)
+    label = p.stem
+    doc = {
+        'type': 'dir',
+        'diskPath': key,
+        'webRelPath': str(rel),
+        'webRelParent': str(rel.parent),
+        'label': label,
+        'mtime': p.stat().st_mtime,
+    }
+    log.info(f'new dir: {doc=}')
+    fs.insert_one(doc)
+
+
+def updateFs(db: pymongo.database.Database, sources: list[Path]):
+    fs = db.get_collection('fs')
+    for source in sources:
+        log.info(f'updateFs: {source=}')
+        for root, dirs, files in source.walk():
+            for d in dirs:
+                _updateOneDir(root / d, fs, source)
+            for fn in files:
+                p = root / fn
+                if p.suffix not in VIDEO_EXTNS:
+                    if p.suffix == '.webp':
+                        # youtube thumbnail is ok in here
+                        continue
+                    log.info(f'ignoring {p=} {p.suffix=}')
+                    continue
+                _updateOneFile(p, fs, source)
+
+
+# thumb = db.get_collection('thumb')
+# probe = db.get_collection('probe')
+
+if __name__ == '__main__':
+    while True:
+        updateFs(db, sources)
+        time.sleep(600)
--- a/serve-files.js	Mon Dec 02 23:27:59 2024 -0800
+++ b/serve-files.js	Tue Dec 03 00:08:22 2024 -0800
@@ -3,6 +3,8 @@
 
 const app = express()
 
+// e.g. /video/files/video-download/movie1/part1.webm
+
 app.use('/video/files',
     express.static('/data'), // serves file content
     serveIndex('/data', { 'icons': true }) // serves dir listings
--- a/skaffold.yaml	Mon Dec 02 23:27:59 2024 -0800
+++ b/skaffold.yaml	Tue Dec 03 00:08:22 2024 -0800
@@ -9,7 +9,7 @@
       sync:
         infer:
           - src/**
-          - '*.py'
+          # - '*.py'
           - vite.config.ts
   tagPolicy:
     dateTime:
--- a/src/VideoPage.ts	Mon Dec 02 23:27:59 2024 -0800
+++ b/src/VideoPage.ts	Tue Dec 03 00:08:22 2024 -0800
@@ -11,6 +11,7 @@
   webRelPath: string;
   label: string;
   thumbRelPath: string;
+  webDataPath: string;
 }
 interface Subdir {
   label: string;
@@ -104,7 +105,7 @@
       <div class="listing">
       ${this.subdirs.map((s) => html`<div class="subdir"><a href="${"./?" + subdirQuery(s.path)}">${s.label}</a></div>`)}
       ${this.videos.map(
-        (v) => html`<video-section @playVideo=${this.playVideo} thumbRelPath="${v.thumbRelPath}" title="${v.label}" manifest=${v.webRelPath}></video-section>`
+        (v) => html`<video-section @playVideo=${this.playVideo} thumbRelPath="${v.thumbRelPath}" title="${v.label}" manifest="/video/files/${v.webDataPath}"></video-section>`
       )}
       </div>
       <p><a href="ingest/">Add new videos...</a></p>
--- a/video.py	Mon Dec 02 23:27:59 2024 -0800
+++ b/video.py	Tue Dec 03 00:08:22 2024 -0800
@@ -15,6 +15,7 @@
 import dl_queue
 from video_file_store import VideoFileStore
 from video_ingest import VideoIngest
+from mongo_required import open_mongo_or_die
 
 logging.basicConfig(level=logging.DEBUG)
 log = logging.getLogger()
@@ -32,8 +33,8 @@
     return JSONResponse({
         "videos": [{
             'webRelPath': vf.webRelPath,
+            'webDataPath': vf.webDataPath,
             'label': vf.label,
-            'thumbRelPath': await store.getOrCreateThumb(vf),
         } for vf in vfInDir],
         "subdirs":
         list(store.findSubdirs(subdir)),
@@ -66,7 +67,8 @@
     return EventSourceResponse(g())
 
 
-store = VideoFileStore(top=Path('/data'))
+db = open_mongo_or_die().get_database('video')
+store = VideoFileStore(db.get_collection('fs'))
 svc = VideoIngest(store)
 
 
--- a/video_file_store.py	Mon Dec 02 23:27:59 2024 -0800
+++ b/video_file_store.py	Tue Dec 03 00:08:22 2024 -0800
@@ -1,58 +1,52 @@
 import asyncio
 import hashlib
-import re
+import logging
 import os
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Iterable, Iterator, NewType
+from typing import Iterable, Iterator
 
-IGNORE = {'_thumb'}
+import pymongo.collection
+
+log = logging.getLogger('vfs')
 
 
 @dataclass
 class VideoFile:
     diskPath: Path
     webRelPath: str
+    webDataPath: str
     label: str
     # perms, playlists, req by/when
 
 
-def vf(p: Path, label: str):
-    return VideoFile(p, './files/' + str(p.relative_to('/data')), label)
-
-
-def thumbWebPath(rel: str) -> str:
-    return './files/' + rel
-
-
 @dataclass
 class VideoFileStore:
-    top: Path
+    fs: pymongo.collection.Collection
 
     def findInDir(self, subdir: str) -> Iterable[VideoFile]:
-        if subdir[0] != '/': raise ValueError
-        here = self.top / subdir[1:]
-        manifests = list(here.glob('*.mpd'))
-        if manifests:
-            p = manifests[0]
-            label = p.parent.name
-            yield vf(p, label)
-            return
-        for p in sorted(list(here.glob('*.mp4')) + list(here.glob('*.webm'))):
-            label = re.sub(r' \[[^\]]+\]\.\w+', '', p.name)
-            yield vf(p, label)
+        webRelParent = '.' if subdir == '/' else subdir
+        for doc in self.fs.find({
+                'type': 'file',
+                'webRelParent': webRelParent
+        }):
+            yield VideoFile(Path(doc['diskPath']), doc['webRelPath'],
+                            doc['webDataPath'], doc['label'])
 
     def findSubdirs(self, subdir: str) -> Iterable:
-        if subdir[0] != '/': raise ValueError
-        here = self.top / subdir[1:]
-        for p in here.iterdir():
-            if p.is_dir() and p.name not in IGNORE:
-                yield {
-                    'label': p.name,
-                    'path': '/' + str(p.relative_to(self.top))
-                }
+        for doc in self.fs.find({
+                'type':
+                'dir',
+                'webRelParent':
+                '.' if subdir == '/' else subdir
+        }):
+            yield {
+                'label': doc['label'],
+                'path': doc['webRelPath'],
+            }
 
     def thumbPath(self, vf: VideoFile) -> str:
+        return '_thumb/' + vf.webRelPath
         sha256 = hashlib.sha256()
         with open(vf.diskPath, 'rb') as f:
             firstMb = f.read(1 << 20)
@@ -61,6 +55,7 @@
         return f'_thumb/{cksum}.jpg'
 
     async def getOrCreateThumb(self, vf: VideoFile) -> str:
+        raise
         p = self.top / self.thumbPath(vf)
         if not p.exists():
             sp = asyncio.create_subprocess_exec('ffmpegthumbnailer',
@@ -70,6 +65,7 @@
         return thumbWebPath(str(p.relative_to(self.top)))
 
     async def save(self, name: str, chunks: Iterator[bytes]):
+        raise
         p = self.top / name
         if p.exists():
             raise ValueError(f'{p} exists')
@@ -92,4 +88,4 @@
                     fill(subNode, subDir)
 
         fill(out, self.top)
-        return out
\ No newline at end of file
+        return out
--- a/volumes.yaml	Mon Dec 02 23:27:59 2024 -0800
+++ b/volumes.yaml	Tue Dec 03 00:08:22 2024 -0800
@@ -1,26 +1,52 @@
 apiVersion: v1
 kind: PersistentVolume
 metadata:
-  name: video-data
+  name: video-data-download
   labels: { type: local }
 spec:
   storageClassName: manual
 # host = ditto
-  hostPath: { path: "/d2/video" }
+  hostPath: { path: "/d2/video-download" }
   capacity: { storage: 50Gi }
   accessModes: [ReadWriteOnce]
   persistentVolumeReclaimPolicy: Retain
   claimRef:
     namespace: default
-    name: video-data
+    name: video-data-download
 
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: video-data
+  name: video-data-download
 spec:
   storageClassName: ""
-  volumeName: "video-data"
+  volumeName: "video-data-download"
   accessModes: [ReadWriteOnce]
   resources: { requests: { storage: 50Gi } }
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: video-data-src
+  labels: { type: local }
+spec:
+  storageClassName: manual
+# host = ditto
+  hostPath: { path: "/opt/video-src" }
+  capacity: { storage: 50Gi }
+  accessModes: [ReadWriteOnce]
+  persistentVolumeReclaimPolicy: Retain
+  claimRef:
+    namespace: default
+    name: video-data-src
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: video-data-src
+spec:
+  storageClassName: ""
+  volumeName: "video-data-src"
+  accessModes: [ReadWriteOnce]
+  resources: { requests: { storage: 50Gi } }