diff ingest.py @ 36:ed16fdbb3996

rewrite WIP. scan fs separately; store in db. thumbs are broken for now
author drewp@bigasterisk.com
date Tue, 03 Dec 2024 00:08:22 -0800
parents
children 7cacfae58430
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ingest.py	Tue Dec 03 00:08:22 2024 -0800
@@ -0,0 +1,106 @@
+"""keep db representing our data files
+
+collection fs:
+  diskPath   # what you ffprobe (or a directory)
+  webRelPath # what comes after /video/ for this file's page (no ext, no source)
+  webDataPath   # what comes after /video/files/ for this file's content (yes ext, yes source)
+  label      # what we show as the title
+  mtime
+
+collection thumb:
+  diskPath
+  thumbData
+
+collection probe:
+  diskPath
+  durationSec
+"""
+
+import logging
+from pathlib import Path
+import re
+import time
+
+import pymongo
+import pymongo.database
+import pymongo.collection
+from mongo_required import open_mongo_or_die
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger()
+VIDEO_EXTNS = [".mp4", ".mkv", ".webm"]
+
+sources = [
+    # These get overlaid by most of the FE.
+    Path("/data/video-src"),
+    Path("/data/video-download")
+]
+db = open_mongo_or_die().get_database('video')
+
+
+def _updateOneFile(p: Path, fs: pymongo.collection.Collection, source: Path):
+    key = str(p)
+    if fs.find_one({'diskPath': key}):
+        return
+
+    rel = p.relative_to(source)
+    label = re.sub(r'\s*\[.*?\]\s*', '', p.stem)
+
+    doc = {
+        'type': 'file',
+        'diskPath': key,
+        'webRelPath': str(rel.with_suffix('')),
+        'webRelParent': str(rel.parent),
+        'webDataPath': key[len('/data/'):],
+        'label': label,
+        'mtime': p.stat().st_mtime,
+    }
+    log.info(f'new file: {doc=}')
+    fs.insert_one(doc)
+
+
+def _updateOneDir(p: Path, fs: pymongo.collection.Collection, source: Path):
+    key = str(p)
+
+    if fs.find_one({'diskPath': key}):
+        return
+
+    rel = p.relative_to(source)
+    label = p.stem
+    doc = {
+        'type': 'dir',
+        'diskPath': key,
+        'webRelPath': str(rel),
+        'webRelParent': str(rel.parent),
+        'label': label,
+        'mtime': p.stat().st_mtime,
+    }
+    log.info(f'new dir: {doc=}')
+    fs.insert_one(doc)
+
+
+def updateFs(db: pymongo.database.Database, sources: list[Path]):
+    fs = db.get_collection('fs')
+    for source in sources:
+        log.info(f'updateFs: {source=}')
+        for root, dirs, files in source.walk():
+            for d in dirs:
+                _updateOneDir(root / d, fs, source)
+            for fn in files:
+                p = root / fn
+                if p.suffix not in VIDEO_EXTNS:
+                    if p.suffix == '.webp':
+                        # youtube thumbnail is ok in here
+                        continue
+                    log.info(f'ignoring {p=} {p.suffix=}')
+                    continue
+                _updateOneFile(p, fs, source)
+
+
+# thumb = db.get_collection('thumb')
+# probe = db.get_collection('probe')
+
+if __name__ == '__main__':
+    while True:
+        updateFs(db, sources)
+        time.sleep(600)