diff flow/download.py @ 10:13438795d896

rewrite with prefect flows and whoosh search, but it's in a nested pdm env
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 17:35:31 -0700
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flow/download.py	Thu Jul 11 17:35:31 2024 -0700
@@ -0,0 +1,36 @@
+import datetime
+import time
+from local_types import Url
+
+import httpx
+from prefect import task
+from prefect.artifacts import create_link_artifact
+
+
+@task(
+    task_run_name=lambda: f'getHttp-{int(time.time())}',
+    cache_key_fn=lambda _, args: f'getHttp-{args["url"]}',
+    cache_expiration=datetime.timedelta(seconds=86400),
+    tags=['city'],  # todo ratelimit based on tag
+)
+def getCityMutableJson(url: Url):
+    create_link_artifact("get", url)
+    req = httpx.get(url)  # todo async
+    req.raise_for_status()
+    return req.json()
+
+
+@task(task_run_name=lambda: f'getHttp-{int(time.time())}',
+      cache_key_fn=lambda _, args: f'getHttp-{args["url"]}',
+      tags=['city'])
+def getCityPermanent(url: Url) -> str:
+    create_link_artifact("get", url)
+    req = httpx.get(url)
+    req.raise_for_status()
+    return req.text
+
+
+@task
+def getYoutubePermanent(url: str):
+    time.sleep(5)
+    return 'video' * 10000