Mercurial > code > home > repos > video
annotate ingest.py @ 36:ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
author | drewp@bigasterisk.com |
---|---|
date | Tue, 03 Dec 2024 00:08:22 -0800 |
parents | |
children | 7cacfae58430 |
rev | line source |
---|---|
36
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
1 """keep db representing our data files |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
2 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
3 collection fs: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
4 diskPath # what you ffprobe (or a directory) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
5 webRelPath # what comes after /video/ for this file's page (no ext, no source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
6 webDataPath # what comes after /video/files/ for this file's content (yes ext, yes source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
7 label # what we show as the title |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
8 mtime |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
9 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
10 collection thumb: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
11 diskPath |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
12 thumbData |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
13 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
14 collection probe: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
15 diskPath |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
16 durationSec |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
17 """ |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
18 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
19 import logging |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
20 from pathlib import Path |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
21 import re |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
22 import time |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
23 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
24 import pymongo |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
25 import pymongo.database |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
26 import pymongo.collection |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
27 from mongo_required import open_mongo_or_die |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
28 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
29 logging.basicConfig(level=logging.INFO) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
30 log = logging.getLogger() |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
31 VIDEO_EXTNS = [".mp4", ".mkv", ".webm"] |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
32 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
33 sources = [ |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
34 # These get overlaid by most of the FE. |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
35 Path("/data/video-src"), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
36 Path("/data/video-download") |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
37 ] |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
38 db = open_mongo_or_die().get_database('video') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
39 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
40 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
41 def _updateOneFile(p: Path, fs: pymongo.collection.Collection, source: Path): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
42 key = str(p) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
43 if fs.find_one({'diskPath': key}): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
44 return |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
45 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
46 rel = p.relative_to(source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
47 label = re.sub(r'\s*\[.*?\]\s*', '', p.stem) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
48 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
49 doc = { |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
50 'type': 'file', |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
51 'diskPath': key, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
52 'webRelPath': str(rel.with_suffix('')), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
53 'webRelParent': str(rel.parent), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
54 'webDataPath': key[len('/data/'):], |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
55 'label': label, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
56 'mtime': p.stat().st_mtime, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
57 } |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
58 log.info(f'new file: {doc=}') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
59 fs.insert_one(doc) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
60 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
61 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
62 def _updateOneDir(p: Path, fs: pymongo.collection.Collection, source: Path): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
63 key = str(p) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
64 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
65 if fs.find_one({'diskPath': key}): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
66 return |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
67 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
68 rel = p.relative_to(source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
69 label = p.stem |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
70 doc = { |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
71 'type': 'dir', |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
72 'diskPath': key, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
73 'webRelPath': str(rel), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
74 'webRelParent': str(rel.parent), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
75 'label': label, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
76 'mtime': p.stat().st_mtime, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
77 } |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
78 log.info(f'new dir: {doc=}') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
79 fs.insert_one(doc) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
80 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
81 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
82 def updateFs(db: pymongo.database.Database, sources: list[Path]): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
83 fs = db.get_collection('fs') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
84 for source in sources: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
85 log.info(f'updateFs: {source=}') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
86 for root, dirs, files in source.walk(): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
87 for d in dirs: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
88 _updateOneDir(root / d, fs, source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
89 for fn in files: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
90 p = root / fn |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
91 if p.suffix not in VIDEO_EXTNS: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
92 if p.suffix == '.webp': |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
93 # youtube thumbnail is ok in here |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
94 continue |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
95 log.info(f'ignoring {p=} {p.suffix=}') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
96 continue |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
97 _updateOneFile(p, fs, source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
98 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
99 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
100 # thumb = db.get_collection('thumb') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
101 # probe = db.get_collection('probe') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
102 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
103 if __name__ == '__main__': |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
104 while True: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
105 updateFs(db, sources) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
106 time.sleep(600) |