view flow/search_index.py @ 10:13438795d896

rewrite with prefect flows and whoosh search, but it's in a nested pdm env
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 17:35:31 -0700
parents
children
line wrap: on
line source

from pathlib import Path

from whoosh.fields import ID
from whoosh.index import create_in

from schema import schema

log = None  # set by flow


class SearchIndex:

    def __init__(self, indexDir: Path):
        indexDir.mkdir(parents=True, exist_ok=True)
        self.ix = create_in(indexDir, schema)
        self.writer = self.ix.writer()

    def addDoc(self, **kw):
        self.writer.add_document(**kw)

    def commit(self):
        self.writer.commit()
        with self.ix.searcher() as searcher:
            log.info(f'index doc count = {searcher.doc_count()}')