Mercurial > code > home > repos > sco-bot
annotate scobot/index/access.py @ 18:a527228aa353 default tip
prefect use postgres
author | drewp@bigasterisk.com |
---|---|
date | Fri, 19 Jul 2024 21:01:09 -0700 |
parents | 7a87ba2f00d9 |
children |
rev | line source |
---|---|
16
7a87ba2f00d9
reformat, fix some types, make more async
drewp@bigasterisk.com
parents:
15
diff
changeset
|
1 import logging |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
2 from pathlib import Path |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
3 import shutil |
16
7a87ba2f00d9
reformat, fix some types, make more async
drewp@bigasterisk.com
parents:
15
diff
changeset
|
4 from typing import cast |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
5 |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
6 from whoosh.index import create_in, open_dir |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
7 |
11 | 8 from scobot.index.schema import schema |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
9 |
16
7a87ba2f00d9
reformat, fix some types, make more async
drewp@bigasterisk.com
parents:
15
diff
changeset
|
10 log = cast(logging.Logger, None) # set by flow |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
11 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
12 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
13 class SearchIndex: |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
14 |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
15 def __init__(self, indexDir: Path, delete_existing=True): |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
16 if delete_existing: |
15 | 17 shutil.rmtree(indexDir, ignore_errors=True) |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
18 indexDir.mkdir(parents=True, exist_ok=True) |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
19 self.ix = create_in(indexDir, schema) |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
20 else: |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
21 self.ix = open_dir(indexDir) |
10
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
22 self.writer = self.ix.writer() |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
23 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
24 def addDoc(self, **kw): |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
25 self.writer.add_document(**kw) |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
26 |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
27 def commit(self): |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
28 self.writer.commit() |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
29 with self.ix.searcher() as searcher: |
13438795d896
rewrite with prefect flows and whoosh search, but it's in a nested pdm env
drewp@bigasterisk.com
parents:
diff
changeset
|
30 log.info(f'index doc count = {searcher.doc_count()}') |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
31 |
16
7a87ba2f00d9
reformat, fix some types, make more async
drewp@bigasterisk.com
parents:
15
diff
changeset
|
32 |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
33 class SearchIndexRO: |
16
7a87ba2f00d9
reformat, fix some types, make more async
drewp@bigasterisk.com
parents:
15
diff
changeset
|
34 |
13
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
35 def __init__(self, indexDir: Path): |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
36 self.ix = open_dir(indexDir, readonly=True) |
403eff4a16c8
fix up indexer flow and fastapi server
drewp@bigasterisk.com
parents:
11
diff
changeset
|
37 self.searcher = self.ix.searcher() |
16
7a87ba2f00d9
reformat, fix some types, make more async
drewp@bigasterisk.com
parents:
15
diff
changeset
|
38 print(f'{self.searcher.doc_count()=}') |