diff flow/search_index.py @ 10:13438795d896

rewrite with prefect flows and whoosh search, but it's in a nested pdm env
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 17:35:31 -0700
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flow/search_index.py	Thu Jul 11 17:35:31 2024 -0700
@@ -0,0 +1,24 @@
+from pathlib import Path
+
+from whoosh.fields import ID
+from whoosh.index import create_in
+
+from schema import schema
+
+log = None  # set by flow
+
+
+class SearchIndex:
+
+    def __init__(self, indexDir: Path):
+        indexDir.mkdir(parents=True, exist_ok=True)
+        self.ix = create_in(indexDir, schema)
+        self.writer = self.ix.writer()
+
+    def addDoc(self, **kw):
+        self.writer.add_document(**kw)
+
+    def commit(self):
+        self.writer.commit()
+        with self.ix.searcher() as searcher:
+            log.info(f'index doc count = {searcher.doc_count()}')