view ingest.py @ 42:1d2c65d260d1

factor out breadcrumbs
author drewp@bigasterisk.com
date Thu, 05 Dec 2024 21:34:00 -0800
parents 7cacfae58430
children
line wrap: on
line source

"""keep db representing our data files

collection fs:
  diskPath   # what you ffprobe (or a directory)
  webRelPath # what comes after /video/ for this file's page (no ext, no source)
  webDataPath   # what comes after /video/files/ for this file's content (yes ext, yes source)
  label      # what we show as the title
  mtime

collection thumb:
  diskPath
  thumbData

collection probe:
  diskPath
  durationSec
"""

import asyncio
import logging
from pathlib import Path
import re
import time

import pymongo
import pymongo.database
import pymongo.collection
from mongo_required import open_mongo_or_die
import thumbnail

logging.basicConfig(level=logging.INFO)
log = logging.getLogger()
VIDEO_EXTNS = [".mp4", ".mkv", ".webm"]

sources = [
    # These get overlaid by most of the FE.
    Path("/data/video-src"),
    Path("/data/video-download")
]
db = open_mongo_or_die().get_database('video')


def _updateOneFile(p: Path, fs: pymongo.collection.Collection, source: Path):
    key = str(p)
    if fs.find_one({'diskPath': key}):
        return

    rel = p.relative_to(source)
    label = re.sub(r'\s*\[.*?\]\s*', '', p.stem)

    doc = {
        'type': 'file',
        'diskPath': key,
        'webRelPath': str(rel.with_suffix('')),
        'webRelParent': str(rel.parent),
        'webDataPath': key[len('/data/'):],
        'label': label,
        'mtime': p.stat().st_mtime,
    }
    log.info(f'new file: {doc=}')
    fs.insert_one(doc)


def _updateOneDir(p: Path, fs: pymongo.collection.Collection, source: Path):
    key = str(p)

    if fs.find_one({'diskPath': key}):
        return

    rel = p.relative_to(source)
    label = p.stem
    doc = {
        'type': 'dir',
        'diskPath': key,
        'webRelPath': str(rel),
        'webRelParent': str(rel.parent),
        'label': label,
        'mtime': p.stat().st_mtime,
    }
    log.info(f'new dir: {doc=}')
    fs.insert_one(doc)


def updateFs(db: pymongo.database.Database, sources: list[Path]):
    fs = db.get_collection('fs')
    for source in sources:
        log.info(f'updateFs: {source=}')
        for root, dirs, files in source.walk():
            for d in dirs:
                _updateOneDir(root / d, fs, source)
            for fn in files:
                p = root / fn
                if p.suffix not in VIDEO_EXTNS:
                    if p.suffix in ['.jpg','.webp']:
                        # youtube thumbnail is ok in here
                        continue
                    log.info(f'ignoring {p=} {p.suffix=}')
                    continue
                _updateOneFile(p, fs, source)

async def updateThumbnails(db: pymongo.database.Database):
    fs = db.get_collection('fs')
    thumb = db.get_collection('thumb')
    n=0
    for doc in fs.find({'type': 'file'}):
        n+=1
        # if n>10:
        #     log.info('updateThumbnails: stop')
        #     break
        await thumbnail.createThumbnail(thumb, doc['diskPath'])

if __name__ == '__main__':
    while True:
        updateFs(db, sources)
        asyncio.run(updateThumbnails(db))
        time.sleep(600)