Mercurial > code > home > repos > video
changeset 36:ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
author | drewp@bigasterisk.com |
---|---|
date | Tue, 03 Dec 2024 00:08:22 -0800 |
parents | 814bda860dda |
children | 7cacfae58430 |
files | deploy.yaml ingest.py serve-files.js skaffold.yaml src/VideoPage.ts video.py video_file_store.py volumes.yaml |
diffstat | 8 files changed, 190 insertions(+), 48 deletions(-) [+] |
line wrap: on
line diff
--- a/deploy.yaml Mon Dec 02 23:27:59 2024 -0800 +++ b/deploy.yaml Tue Dec 03 00:08:22 2024 -0800 @@ -3,7 +3,7 @@ metadata: name: video-files spec: - replicas: 2 + replicas: 1 selector: matchLabels: app: video-files @@ -16,10 +16,20 @@ prometheus.io/port: "8004" spec: volumes: - - name: data - persistentVolumeClaim: - claimName: video-data + - { name: video-data-download, persistentVolumeClaim: { claimName: video-data-download } } + - { name: video-data-src, persistentVolumeClaim: { claimName: video-data-src } } containers: + - name: ingest + image: reg:5000/video_image + command: + - pdm + - run + - python + - ingest.py + volumeMounts: + - { name: video-data-download, mountPath: /data/video-download } + - { name: video-data-src, mountPath: /data/video-src } + - name: files image: reg:5000/video_image # alternate: [ "webfsd", "-Fp", "9054", "-r", "/vids/" ] @@ -31,7 +41,8 @@ ports: - containerPort: 8003 volumeMounts: - - { name: data, mountPath: /data } + - { name: video-data-download, mountPath: /data/video-download } + - { name: video-data-src, mountPath: /data/video-src } - name: api image: reg:5000/video_image @@ -42,8 +53,6 @@ - video.py ports: - containerPort: 8004 - volumeMounts: - - { name: data, mountPath: /data } resources: requests: cpu: "2"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ingest.py Tue Dec 03 00:08:22 2024 -0800 @@ -0,0 +1,106 @@ +"""keep db representing our data files + +collection fs: + diskPath # what you ffprobe (or a directory) + webRelPath # what comes after /video/ for this file's page (no ext, no source) + webDataPath # what comes after /video/files/ for this file's content (yes ext, yes source) + label # what we show as the title + mtime + +collection thumb: + diskPath + thumbData + +collection probe: + diskPath + durationSec +""" + +import logging +from pathlib import Path +import re +import time + +import pymongo +import pymongo.database +import pymongo.collection +from mongo_required import open_mongo_or_die + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger() +VIDEO_EXTNS = [".mp4", ".mkv", ".webm"] + +sources = [ + # These get overlaid by most of the FE. + Path("/data/video-src"), + Path("/data/video-download") +] +db = open_mongo_or_die().get_database('video') + + +def _updateOneFile(p: Path, fs: pymongo.collection.Collection, source: Path): + key = str(p) + if fs.find_one({'diskPath': key}): + return + + rel = p.relative_to(source) + label = re.sub(r'\s*\[.*?\]\s*', '', p.stem) + + doc = { + 'type': 'file', + 'diskPath': key, + 'webRelPath': str(rel.with_suffix('')), + 'webRelParent': str(rel.parent), + 'webDataPath': key[len('/data/'):], + 'label': label, + 'mtime': p.stat().st_mtime, + } + log.info(f'new file: {doc=}') + fs.insert_one(doc) + + +def _updateOneDir(p: Path, fs: pymongo.collection.Collection, source: Path): + key = str(p) + + if fs.find_one({'diskPath': key}): + return + + rel = p.relative_to(source) + label = p.stem + doc = { + 'type': 'dir', + 'diskPath': key, + 'webRelPath': str(rel), + 'webRelParent': str(rel.parent), + 'label': label, + 'mtime': p.stat().st_mtime, + } + log.info(f'new dir: {doc=}') + fs.insert_one(doc) + + +def updateFs(db: pymongo.database.Database, sources: list[Path]): + fs = db.get_collection('fs') + for source in sources: + log.info(f'updateFs: {source=}') + for root, dirs, files in source.walk(): + for d in dirs: + _updateOneDir(root / d, fs, source) + for fn in files: + p = root / fn + if p.suffix not in VIDEO_EXTNS: + if p.suffix == '.webp': + # youtube thumbnail is ok in here + continue + log.info(f'ignoring {p=} {p.suffix=}') + continue + _updateOneFile(p, fs, source) + + +# thumb = db.get_collection('thumb') +# probe = db.get_collection('probe') + +if __name__ == '__main__': + while True: + updateFs(db, sources) + time.sleep(600)
--- a/serve-files.js Mon Dec 02 23:27:59 2024 -0800 +++ b/serve-files.js Tue Dec 03 00:08:22 2024 -0800 @@ -3,6 +3,8 @@ const app = express() +// e.g. /video/files/video-download/movie1/part1.webm + app.use('/video/files', express.static('/data'), // serves file content serveIndex('/data', { 'icons': true }) // serves dir listings
--- a/skaffold.yaml Mon Dec 02 23:27:59 2024 -0800 +++ b/skaffold.yaml Tue Dec 03 00:08:22 2024 -0800 @@ -9,7 +9,7 @@ sync: infer: - src/** - - '*.py' + # - '*.py' - vite.config.ts tagPolicy: dateTime:
--- a/src/VideoPage.ts Mon Dec 02 23:27:59 2024 -0800 +++ b/src/VideoPage.ts Tue Dec 03 00:08:22 2024 -0800 @@ -11,6 +11,7 @@ webRelPath: string; label: string; thumbRelPath: string; + webDataPath: string; } interface Subdir { label: string; @@ -104,7 +105,7 @@ <div class="listing"> ${this.subdirs.map((s) => html`<div class="subdir"><a href="${"./?" + subdirQuery(s.path)}">${s.label}</a></div>`)} ${this.videos.map( - (v) => html`<video-section @playVideo=${this.playVideo} thumbRelPath="${v.thumbRelPath}" title="${v.label}" manifest=${v.webRelPath}></video-section>` + (v) => html`<video-section @playVideo=${this.playVideo} thumbRelPath="${v.thumbRelPath}" title="${v.label}" manifest="/video/files/${v.webDataPath}"></video-section>` )} </div> <p><a href="ingest/">Add new videos...</a></p>
--- a/video.py Mon Dec 02 23:27:59 2024 -0800 +++ b/video.py Tue Dec 03 00:08:22 2024 -0800 @@ -15,6 +15,7 @@ import dl_queue from video_file_store import VideoFileStore from video_ingest import VideoIngest +from mongo_required import open_mongo_or_die logging.basicConfig(level=logging.DEBUG) log = logging.getLogger() @@ -32,8 +33,8 @@ return JSONResponse({ "videos": [{ 'webRelPath': vf.webRelPath, + 'webDataPath': vf.webDataPath, 'label': vf.label, - 'thumbRelPath': await store.getOrCreateThumb(vf), } for vf in vfInDir], "subdirs": list(store.findSubdirs(subdir)), @@ -66,7 +67,8 @@ return EventSourceResponse(g()) -store = VideoFileStore(top=Path('/data')) +db = open_mongo_or_die().get_database('video') +store = VideoFileStore(db.get_collection('fs')) svc = VideoIngest(store)
--- a/video_file_store.py Mon Dec 02 23:27:59 2024 -0800 +++ b/video_file_store.py Tue Dec 03 00:08:22 2024 -0800 @@ -1,58 +1,52 @@ import asyncio import hashlib -import re +import logging import os from dataclasses import dataclass from pathlib import Path -from typing import Iterable, Iterator, NewType +from typing import Iterable, Iterator -IGNORE = {'_thumb'} +import pymongo.collection + +log = logging.getLogger('vfs') @dataclass class VideoFile: diskPath: Path webRelPath: str + webDataPath: str label: str # perms, playlists, req by/when -def vf(p: Path, label: str): - return VideoFile(p, './files/' + str(p.relative_to('/data')), label) - - -def thumbWebPath(rel: str) -> str: - return './files/' + rel - - @dataclass class VideoFileStore: - top: Path + fs: pymongo.collection.Collection def findInDir(self, subdir: str) -> Iterable[VideoFile]: - if subdir[0] != '/': raise ValueError - here = self.top / subdir[1:] - manifests = list(here.glob('*.mpd')) - if manifests: - p = manifests[0] - label = p.parent.name - yield vf(p, label) - return - for p in sorted(list(here.glob('*.mp4')) + list(here.glob('*.webm'))): - label = re.sub(r' \[[^\]]+\]\.\w+', '', p.name) - yield vf(p, label) + webRelParent = '.' if subdir == '/' else subdir + for doc in self.fs.find({ + 'type': 'file', + 'webRelParent': webRelParent + }): + yield VideoFile(Path(doc['diskPath']), doc['webRelPath'], + doc['webDataPath'], doc['label']) def findSubdirs(self, subdir: str) -> Iterable: - if subdir[0] != '/': raise ValueError - here = self.top / subdir[1:] - for p in here.iterdir(): - if p.is_dir() and p.name not in IGNORE: - yield { - 'label': p.name, - 'path': '/' + str(p.relative_to(self.top)) - } + for doc in self.fs.find({ + 'type': + 'dir', + 'webRelParent': + '.' if subdir == '/' else subdir + }): + yield { + 'label': doc['label'], + 'path': doc['webRelPath'], + } def thumbPath(self, vf: VideoFile) -> str: + return '_thumb/' + vf.webRelPath sha256 = hashlib.sha256() with open(vf.diskPath, 'rb') as f: firstMb = f.read(1 << 20) @@ -61,6 +55,7 @@ return f'_thumb/{cksum}.jpg' async def getOrCreateThumb(self, vf: VideoFile) -> str: + raise p = self.top / self.thumbPath(vf) if not p.exists(): sp = asyncio.create_subprocess_exec('ffmpegthumbnailer', @@ -70,6 +65,7 @@ return thumbWebPath(str(p.relative_to(self.top))) async def save(self, name: str, chunks: Iterator[bytes]): + raise p = self.top / name if p.exists(): raise ValueError(f'{p} exists') @@ -92,4 +88,4 @@ fill(subNode, subDir) fill(out, self.top) - return out \ No newline at end of file + return out
--- a/volumes.yaml Mon Dec 02 23:27:59 2024 -0800 +++ b/volumes.yaml Tue Dec 03 00:08:22 2024 -0800 @@ -1,26 +1,52 @@ apiVersion: v1 kind: PersistentVolume metadata: - name: video-data + name: video-data-download labels: { type: local } spec: storageClassName: manual # host = ditto - hostPath: { path: "/d2/video" } + hostPath: { path: "/d2/video-download" } capacity: { storage: 50Gi } accessModes: [ReadWriteOnce] persistentVolumeReclaimPolicy: Retain claimRef: namespace: default - name: video-data + name: video-data-download --- apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: video-data + name: video-data-download spec: storageClassName: "" - volumeName: "video-data" + volumeName: "video-data-download" accessModes: [ReadWriteOnce] resources: { requests: { storage: 50Gi } } +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: video-data-src + labels: { type: local } +spec: + storageClassName: manual +# host = ditto + hostPath: { path: "/opt/video-src" } + capacity: { storage: 50Gi } + accessModes: [ReadWriteOnce] + persistentVolumeReclaimPolicy: Retain + claimRef: + namespace: default + name: video-data-src +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: video-data-src +spec: + storageClassName: "" + volumeName: "video-data-src" + accessModes: [ReadWriteOnce] + resources: { requests: { storage: 50Gi } }