Mercurial > code > home > repos > sco-bot
diff flow/download.py @ 10:13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
author | drewp@bigasterisk.com |
---|---|
date | Thu, 11 Jul 2024 17:35:31 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flow/download.py Thu Jul 11 17:35:31 2024 -0700 @@ -0,0 +1,36 @@ +import datetime +import time +from local_types import Url + +import httpx +from prefect import task +from prefect.artifacts import create_link_artifact + + +@task( + task_run_name=lambda: f'getHttp-{int(time.time())}', + cache_key_fn=lambda _, args: f'getHttp-{args["url"]}', + cache_expiration=datetime.timedelta(seconds=86400), + tags=['city'], # todo ratelimit based on tag +) +def getCityMutableJson(url: Url): + create_link_artifact("get", url) + req = httpx.get(url) # todo async + req.raise_for_status() + return req.json() + + +@task(task_run_name=lambda: f'getHttp-{int(time.time())}', + cache_key_fn=lambda _, args: f'getHttp-{args["url"]}', + tags=['city']) +def getCityPermanent(url: Url) -> str: + create_link_artifact("get", url) + req = httpx.get(url) + req.raise_for_status() + return req.text + + +@task +def getYoutubePermanent(url: str): + time.sleep(5) + return 'video' * 10000