diff scobot/index/access.py @ 13:403eff4a16c8

fix up indexer flow and fastapi server
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 21:32:24 -0700
parents 6622bacb0b84
children 6ed25bcaaf1f
line wrap: on
line diff
--- a/scobot/index/access.py	Thu Jul 11 18:16:20 2024 -0700
+++ b/scobot/index/access.py	Thu Jul 11 21:32:24 2024 -0700
@@ -1,6 +1,7 @@
 from pathlib import Path
+import shutil
 
-from whoosh.index import create_in
+from whoosh.index import create_in, open_dir
 
 from scobot.index.schema import schema
 
@@ -9,9 +10,13 @@
 
 class SearchIndex:
 
-    def __init__(self, indexDir: Path):
-        indexDir.mkdir(parents=True, exist_ok=True)
-        self.ix = create_in(indexDir, schema)
+    def __init__(self, indexDir: Path, delete_existing=True):
+        if delete_existing:
+            shutil.rmtree(indexDir)
+            indexDir.mkdir(parents=True, exist_ok=True)
+            self.ix = create_in(indexDir, schema)
+        else:
+            self.ix = open_dir(indexDir)
         self.writer = self.ix.writer()
 
     def addDoc(self, **kw):
@@ -21,3 +26,9 @@
         self.writer.commit()
         with self.ix.searcher() as searcher:
             log.info(f'index doc count = {searcher.doc_count()}')
+
+class SearchIndexRO:
+    def __init__(self, indexDir: Path):
+        self.ix = open_dir(indexDir, readonly=True)
+        self.searcher = self.ix.searcher()
+        print(f'{self.searcher.doc_count()=}')
\ No newline at end of file