annotate search/search_base.py @ 8:f23b21bd0fce

apex search
author drewp@bigasterisk.com
date Sun, 07 Jul 2024 16:26:56 -0700
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
1
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
2 from dataclasses import dataclass
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
3 import json
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
4 from typing import Iterable
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
5
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
6 from search.extract_pdf import files, phrasesFromFile
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
7
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
8
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
9 @dataclass
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
10 class Doc:
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
11 id: int
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
12 title: str
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
13 sourceFile: str
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
14 posJson: str
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
15 phrase: str
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
16
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
17 def __getitem__(self, k):
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
18 return getattr(self, k)
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
19
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
20 pop = __getitem__
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
21
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
22
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
23 def allDocs() -> Iterable[Doc]:
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
24 id = 0
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
25 for src in files():
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
26 for pos, line in phrasesFromFile(src):
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
27 yield Doc(id=id,
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
28 title=src.name,
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
29 sourceFile=str(src),
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
30 posJson=json.dumps(pos),
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
31 phrase=line)
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
32 id += 1