Mercurial > code > home > repos > video
view ingest.py @ 36:ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
author | drewp@bigasterisk.com |
---|---|
date | Tue, 03 Dec 2024 00:08:22 -0800 |
parents | |
children | 7cacfae58430 |
line wrap: on
line source
"""keep db representing our data files collection fs: diskPath # what you ffprobe (or a directory) webRelPath # what comes after /video/ for this file's page (no ext, no source) webDataPath # what comes after /video/files/ for this file's content (yes ext, yes source) label # what we show as the title mtime collection thumb: diskPath thumbData collection probe: diskPath durationSec """ import logging from pathlib import Path import re import time import pymongo import pymongo.database import pymongo.collection from mongo_required import open_mongo_or_die logging.basicConfig(level=logging.INFO) log = logging.getLogger() VIDEO_EXTNS = [".mp4", ".mkv", ".webm"] sources = [ # These get overlaid by most of the FE. Path("/data/video-src"), Path("/data/video-download") ] db = open_mongo_or_die().get_database('video') def _updateOneFile(p: Path, fs: pymongo.collection.Collection, source: Path): key = str(p) if fs.find_one({'diskPath': key}): return rel = p.relative_to(source) label = re.sub(r'\s*\[.*?\]\s*', '', p.stem) doc = { 'type': 'file', 'diskPath': key, 'webRelPath': str(rel.with_suffix('')), 'webRelParent': str(rel.parent), 'webDataPath': key[len('/data/'):], 'label': label, 'mtime': p.stat().st_mtime, } log.info(f'new file: {doc=}') fs.insert_one(doc) def _updateOneDir(p: Path, fs: pymongo.collection.Collection, source: Path): key = str(p) if fs.find_one({'diskPath': key}): return rel = p.relative_to(source) label = p.stem doc = { 'type': 'dir', 'diskPath': key, 'webRelPath': str(rel), 'webRelParent': str(rel.parent), 'label': label, 'mtime': p.stat().st_mtime, } log.info(f'new dir: {doc=}') fs.insert_one(doc) def updateFs(db: pymongo.database.Database, sources: list[Path]): fs = db.get_collection('fs') for source in sources: log.info(f'updateFs: {source=}') for root, dirs, files in source.walk(): for d in dirs: _updateOneDir(root / d, fs, source) for fn in files: p = root / fn if p.suffix not in VIDEO_EXTNS: if p.suffix == '.webp': # youtube thumbnail is ok in here continue log.info(f'ignoring {p=} {p.suffix=}') continue _updateOneFile(p, fs, source) # thumb = db.get_collection('thumb') # probe = db.get_collection('probe') if __name__ == '__main__': while True: updateFs(db, sources) time.sleep(600)