annotate flow/download.py @ 10:13438795d896

rewrite with prefect flows and whoosh search, but it's in a nested pdm env
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 17:35:31 -0700
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
1 import datetime
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
2 import time
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
3 from local_types import Url
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
4
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
5 import httpx
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
6 from prefect import task
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
7 from prefect.artifacts import create_link_artifact
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
8
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
9
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
10 @task(
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
11 task_run_name=lambda: f'getHttp-{int(time.time())}',
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
12 cache_key_fn=lambda _, args: f'getHttp-{args["url"]}',
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
13 cache_expiration=datetime.timedelta(seconds=86400),
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
14 tags=['city'], # todo ratelimit based on tag
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
15 )
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
16 def getCityMutableJson(url: Url):
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
17 create_link_artifact("get", url)
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
18 req = httpx.get(url) # todo async
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
19 req.raise_for_status()
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
20 return req.json()
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
21
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
22
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
23 @task(task_run_name=lambda: f'getHttp-{int(time.time())}',
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
24 cache_key_fn=lambda _, args: f'getHttp-{args["url"]}',
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
25 tags=['city'])
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
26 def getCityPermanent(url: Url) -> str:
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
27 create_link_artifact("get", url)
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
28 req = httpx.get(url)
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
29 req.raise_for_status()
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
30 return req.text
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
31
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
32
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
33 @task
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
34 def getYoutubePermanent(url: str):
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
35 time.sleep(5)
13438795d896 rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff changeset
36 return 'video' * 10000