comparison scobot/index/access.py @ 13:403eff4a16c8

fix up indexer flow and fastapi server
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 21:32:24 -0700
parents 6622bacb0b84
children 6ed25bcaaf1f
comparison
equal deleted inserted replaced
12:7f36497bfac3 13:403eff4a16c8
1 from pathlib import Path 1 from pathlib import Path
2 import shutil
2 3
3 from whoosh.index import create_in 4 from whoosh.index import create_in, open_dir
4 5
5 from scobot.index.schema import schema 6 from scobot.index.schema import schema
6 7
7 log = None # set by flow 8 log = None # set by flow
8 9
9 10
10 class SearchIndex: 11 class SearchIndex:
11 12
12 def __init__(self, indexDir: Path): 13 def __init__(self, indexDir: Path, delete_existing=True):
13 indexDir.mkdir(parents=True, exist_ok=True) 14 if delete_existing:
14 self.ix = create_in(indexDir, schema) 15 shutil.rmtree(indexDir)
16 indexDir.mkdir(parents=True, exist_ok=True)
17 self.ix = create_in(indexDir, schema)
18 else:
19 self.ix = open_dir(indexDir)
15 self.writer = self.ix.writer() 20 self.writer = self.ix.writer()
16 21
17 def addDoc(self, **kw): 22 def addDoc(self, **kw):
18 self.writer.add_document(**kw) 23 self.writer.add_document(**kw)
19 24
20 def commit(self): 25 def commit(self):
21 self.writer.commit() 26 self.writer.commit()
22 with self.ix.searcher() as searcher: 27 with self.ix.searcher() as searcher:
23 log.info(f'index doc count = {searcher.doc_count()}') 28 log.info(f'index doc count = {searcher.doc_count()}')
29
30 class SearchIndexRO:
31 def __init__(self, indexDir: Path):
32 self.ix = open_dir(indexDir, readonly=True)
33 self.searcher = self.ix.searcher()
34 print(f'{self.searcher.doc_count()=}')