annotate scobot/index/download_tasks.py @ 16:7a87ba2f00d9

reformat, fix some types, make more async
author drewp@bigasterisk.com
date Fri, 19 Jul 2024 00:49:38 -0700
parents 6622bacb0b84
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
1 import datetime
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
2 import time
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
3
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
4 import httpx
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
5 from prefect import task
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
6 from prefect.artifacts import create_link_artifact
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
7
11
6622bacb0b84 first pass at reorg
drewp@bigasterisk.com
parents: 10
diff changeset
8 from scobot.local_types import Url
6622bacb0b84 first pass at reorg
drewp@bigasterisk.com
parents: 10
diff changeset
9
10
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
10
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
11 @task(
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
12 task_run_name=lambda: f'getHttp-{int(time.time())}',
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
13 cache_key_fn=lambda _, args: f'getHttp-{args["url"]}',
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
14 cache_expiration=datetime.timedelta(seconds=86400),
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
15 tags=['city'], # todo ratelimit based on tag
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
16 )
16
7a87ba2f00d9 reformat, fix some types, make more async
drewp@bigasterisk.com
parents: 11
diff changeset
17 async def getCityMutableJson(url: Url):
7a87ba2f00d9 reformat, fix some types, make more async
drewp@bigasterisk.com
parents: 11
diff changeset
18 await create_link_artifact("get", url)
10
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
19 req = httpx.get(url) # todo async
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
20 req.raise_for_status()
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
21 return req.json()
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
22
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
23
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
24 @task(task_run_name=lambda: f'getHttp-{int(time.time())}',
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
25 cache_key_fn=lambda _, args: f'getHttp-{args["url"]}',
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
26 tags=['city'])
16
7a87ba2f00d9 reformat, fix some types, make more async
drewp@bigasterisk.com
parents: 11
diff changeset
27 async def getCityPermanent(url: Url) -> str:
7a87ba2f00d9 reformat, fix some types, make more async
drewp@bigasterisk.com
parents: 11
diff changeset
28 await create_link_artifact("get", url)
10
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
29 req = httpx.get(url)
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
30 req.raise_for_status()
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
31 return req.text
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
32
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
33
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
34 @task
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
35 def getYoutubePermanent(url: str):
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
36 time.sleep(5)
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
37 return 'video' * 10000