view search/search_base.py @ 8:f23b21bd0fce

apex search
author drewp@bigasterisk.com
date Sun, 07 Jul 2024 16:26:56 -0700
parents
children
line wrap: on
line source


from dataclasses import dataclass
import json
from typing import Iterable

from search.extract_pdf import files, phrasesFromFile


@dataclass
class Doc:
    id: int
    title: str
    sourceFile: str
    posJson: str
    phrase: str

    def __getitem__(self, k):
        return getattr(self, k)

    pop = __getitem__


def allDocs() -> Iterable[Doc]:
    id = 0
    for src in files():
        for pos, line in phrasesFromFile(src):
            yield Doc(id=id,
                      title=src.name,
                      sourceFile=str(src),
                      posJson=json.dumps(pos),
                      phrase=line)
            id += 1