Mercurial > code > home > repos > sco-bot
annotate scobot/index/access.py @ 13:403eff4a16c8
fix up indexer flow and fastapi server
author | drewp@bigasterisk.com |
---|---|
date | Thu, 11 Jul 2024 21:32:24 -0700 |
parents | 6622bacb0b84 |
children | 6ed25bcaaf1f |
rev | line source |
---|---|
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
1 from pathlib import Path |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
2 import shutil |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
3 |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
4 from whoosh.index import create_in, open_dir |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
5 |
11 | 6 from scobot.index.schema import schema |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
7 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
8 log = None # set by flow |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
9 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
10 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
11 class SearchIndex: |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
12 |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
13 def __init__(self, indexDir: Path, delete_existing=True): |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
14 if delete_existing: |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
15 shutil.rmtree(indexDir) |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
16 indexDir.mkdir(parents=True, exist_ok=True) |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
17 self.ix = create_in(indexDir, schema) |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
18 else: |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
19 self.ix = open_dir(indexDir) |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
20 self.writer = self.ix.writer() |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
21 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
22 def addDoc(self, **kw): |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
23 self.writer.add_document(**kw) |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
24 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
25 def commit(self): |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
26 self.writer.commit() |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
27 with self.ix.searcher() as searcher: |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
28 log.info(f'index doc count = {searcher.doc_count()}') |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
29 |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
30 class SearchIndexRO: |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
31 def __init__(self, indexDir: Path): |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
32 self.ix = open_dir(indexDir, readonly=True) |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
33 self.searcher = self.ix.searcher() |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
34 print(f'{self.searcher.doc_count()=}') |