Mercurial > code > home > repos > video
diff ingest.py @ 36:ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
author | drewp@bigasterisk.com |
---|---|
date | Tue, 03 Dec 2024 00:08:22 -0800 |
parents | |
children | 7cacfae58430 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ingest.py Tue Dec 03 00:08:22 2024 -0800 @@ -0,0 +1,106 @@ +"""keep db representing our data files + +collection fs: + diskPath # what you ffprobe (or a directory) + webRelPath # what comes after /video/ for this file's page (no ext, no source) + webDataPath # what comes after /video/files/ for this file's content (yes ext, yes source) + label # what we show as the title + mtime + +collection thumb: + diskPath + thumbData + +collection probe: + diskPath + durationSec +""" + +import logging +from pathlib import Path +import re +import time + +import pymongo +import pymongo.database +import pymongo.collection +from mongo_required import open_mongo_or_die + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger() +VIDEO_EXTNS = [".mp4", ".mkv", ".webm"] + +sources = [ + # These get overlaid by most of the FE. + Path("/data/video-src"), + Path("/data/video-download") +] +db = open_mongo_or_die().get_database('video') + + +def _updateOneFile(p: Path, fs: pymongo.collection.Collection, source: Path): + key = str(p) + if fs.find_one({'diskPath': key}): + return + + rel = p.relative_to(source) + label = re.sub(r'\s*\[.*?\]\s*', '', p.stem) + + doc = { + 'type': 'file', + 'diskPath': key, + 'webRelPath': str(rel.with_suffix('')), + 'webRelParent': str(rel.parent), + 'webDataPath': key[len('/data/'):], + 'label': label, + 'mtime': p.stat().st_mtime, + } + log.info(f'new file: {doc=}') + fs.insert_one(doc) + + +def _updateOneDir(p: Path, fs: pymongo.collection.Collection, source: Path): + key = str(p) + + if fs.find_one({'diskPath': key}): + return + + rel = p.relative_to(source) + label = p.stem + doc = { + 'type': 'dir', + 'diskPath': key, + 'webRelPath': str(rel), + 'webRelParent': str(rel.parent), + 'label': label, + 'mtime': p.stat().st_mtime, + } + log.info(f'new dir: {doc=}') + fs.insert_one(doc) + + +def updateFs(db: pymongo.database.Database, sources: list[Path]): + fs = db.get_collection('fs') + for source in sources: + log.info(f'updateFs: {source=}') + for root, dirs, files in source.walk(): + for d in dirs: + _updateOneDir(root / d, fs, source) + for fn in files: + p = root / fn + if p.suffix not in VIDEO_EXTNS: + if p.suffix == '.webp': + # youtube thumbnail is ok in here + continue + log.info(f'ignoring {p=} {p.suffix=}') + continue + _updateOneFile(p, fs, source) + + +# thumb = db.get_collection('thumb') +# probe = db.get_collection('probe') + +if __name__ == '__main__': + while True: + updateFs(db, sources) + time.sleep(600)