Mercurial > code > home > repos > sco-bot
annotate scobot/index/download_tasks.py @ 11:6622bacb0b84
first pass at reorg
author | drewp@bigasterisk.com |
---|---|
date | Thu, 11 Jul 2024 18:15:44 -0700 |
parents | flow/download.py@13438795d896 |
children | 7a87ba2f00d9 |
rev | line source |
---|---|
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
1 import datetime |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
2 import time |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
3 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
4 import httpx |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
5 from prefect import task |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
6 from prefect.artifacts import create_link_artifact |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
7 |
11 | 8 from scobot.local_types import Url |
9 | |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
10 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
11 @task( |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
12 task_run_name=lambda: f'getHttp-{int(time.time())}', |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
13 cache_key_fn=lambda _, args: f'getHttp-{args["url"]}', |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
14 cache_expiration=datetime.timedelta(seconds=86400), |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
15 tags=['city'], # todo ratelimit based on tag |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
16 ) |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
17 def getCityMutableJson(url: Url): |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
18 create_link_artifact("get", url) |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
19 req = httpx.get(url) # todo async |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
20 req.raise_for_status() |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
21 return req.json() |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
22 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
23 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
24 @task(task_run_name=lambda: f'getHttp-{int(time.time())}', |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
25 cache_key_fn=lambda _, args: f'getHttp-{args["url"]}', |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
26 tags=['city']) |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
27 def getCityPermanent(url: Url) -> str: |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
28 create_link_artifact("get", url) |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
29 req = httpx.get(url) |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
30 req.raise_for_status() |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
31 return req.text |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
32 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
33 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
34 @task |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
35 def getYoutubePermanent(url: str): |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
36 time.sleep(5) |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
37 return 'video' * 10000 |