Mercurial > code > home > repos > video
annotate ingest.py @ 42:1d2c65d260d1
factor out breadcrumbs
author | drewp@bigasterisk.com |
---|---|
date | Thu, 05 Dec 2024 21:34:00 -0800 |
parents | 7cacfae58430 |
children |
rev | line source |
---|---|
36
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
1 """keep db representing our data files |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
2 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
3 collection fs: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
4 diskPath # what you ffprobe (or a directory) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
5 webRelPath # what comes after /video/ for this file's page (no ext, no source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
6 webDataPath # what comes after /video/files/ for this file's content (yes ext, yes source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
7 label # what we show as the title |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
8 mtime |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
9 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
10 collection thumb: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
11 diskPath |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
12 thumbData |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
13 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
14 collection probe: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
15 diskPath |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
16 durationSec |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
17 """ |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
18 |
37
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
19 import asyncio |
36
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
20 import logging |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
21 from pathlib import Path |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
22 import re |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
23 import time |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
24 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
25 import pymongo |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
26 import pymongo.database |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
27 import pymongo.collection |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
28 from mongo_required import open_mongo_or_die |
37
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
29 import thumbnail |
36
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
30 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
31 logging.basicConfig(level=logging.INFO) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
32 log = logging.getLogger() |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
33 VIDEO_EXTNS = [".mp4", ".mkv", ".webm"] |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
34 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
35 sources = [ |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
36 # These get overlaid by most of the FE. |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
37 Path("/data/video-src"), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
38 Path("/data/video-download") |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
39 ] |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
40 db = open_mongo_or_die().get_database('video') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
41 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
42 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
43 def _updateOneFile(p: Path, fs: pymongo.collection.Collection, source: Path): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
44 key = str(p) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
45 if fs.find_one({'diskPath': key}): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
46 return |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
47 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
48 rel = p.relative_to(source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
49 label = re.sub(r'\s*\[.*?\]\s*', '', p.stem) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
50 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
51 doc = { |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
52 'type': 'file', |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
53 'diskPath': key, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
54 'webRelPath': str(rel.with_suffix('')), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
55 'webRelParent': str(rel.parent), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
56 'webDataPath': key[len('/data/'):], |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
57 'label': label, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
58 'mtime': p.stat().st_mtime, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
59 } |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
60 log.info(f'new file: {doc=}') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
61 fs.insert_one(doc) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
62 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
63 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
64 def _updateOneDir(p: Path, fs: pymongo.collection.Collection, source: Path): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
65 key = str(p) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
66 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
67 if fs.find_one({'diskPath': key}): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
68 return |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
69 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
70 rel = p.relative_to(source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
71 label = p.stem |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
72 doc = { |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
73 'type': 'dir', |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
74 'diskPath': key, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
75 'webRelPath': str(rel), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
76 'webRelParent': str(rel.parent), |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
77 'label': label, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
78 'mtime': p.stat().st_mtime, |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
79 } |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
80 log.info(f'new dir: {doc=}') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
81 fs.insert_one(doc) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
82 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
83 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
84 def updateFs(db: pymongo.database.Database, sources: list[Path]): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
85 fs = db.get_collection('fs') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
86 for source in sources: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
87 log.info(f'updateFs: {source=}') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
88 for root, dirs, files in source.walk(): |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
89 for d in dirs: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
90 _updateOneDir(root / d, fs, source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
91 for fn in files: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
92 p = root / fn |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
93 if p.suffix not in VIDEO_EXTNS: |
37
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
94 if p.suffix in ['.jpg','.webp']: |
36
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
95 # youtube thumbnail is ok in here |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
96 continue |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
97 log.info(f'ignoring {p=} {p.suffix=}') |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
98 continue |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
99 _updateOneFile(p, fs, source) |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
100 |
37
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
101 async def updateThumbnails(db: pymongo.database.Database): |
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
102 fs = db.get_collection('fs') |
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
103 thumb = db.get_collection('thumb') |
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
104 n=0 |
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
105 for doc in fs.find({'type': 'file'}): |
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
106 n+=1 |
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
107 # if n>10: |
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
108 # log.info('updateThumbnails: stop') |
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
109 # break |
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
110 await thumbnail.createThumbnail(thumb, doc['diskPath']) |
36
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
111 |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
112 if __name__ == '__main__': |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
113 while True: |
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
114 updateFs(db, sources) |
37
7cacfae58430
thumbnails rewrite - store in db; don't use YT-provided pics for now
drewp@bigasterisk.com
parents:
36
diff
changeset
|
115 asyncio.run(updateThumbnails(db)) |
36
ed16fdbb3996
rewrite WIP. scan fs separately; store in db. thumbs are broken for now
drewp@bigasterisk.com
parents:
diff
changeset
|
116 time.sleep(600) |