annotate search/search_apex.py @ 9:d1b54241a731

rewrite meeting fetcher
author drewp@bigasterisk.com
date Wed, 10 Jul 2024 12:25:06 -0700
parents f23b21bd0fce
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
1 from pprint import pprint
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
2 from typing import Iterable
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
3 from apexsearch import ApexSearch
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
4
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
5
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
6 class Search:
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
7
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
8 def __init__(self):
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
9 self.apex = ApexSearch('data/apex',
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
10 tables={
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
11 "docs": {
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
12 "content": ["phrase"],
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
13 "title": "title",
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
14 "extras": ["sourceFile", "pos"],
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
15 }
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
16 },
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
17 id_field='id')
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
18
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
19 def rebuild(self, docs: Iterable):
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
20 self.apex.build_complete_index(lambda *a: docs)
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
21 print('rebuild complete')
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
22
9
d1b54241a731 rewrite meeting fetcher
drewp@bigasterisk.com
parents: 8
diff changeset
23 def search(self, q: str):
d1b54241a731 rewrite meeting fetcher
drewp@bigasterisk.com
parents: 8
diff changeset
24 res = self.apex.search(q, target_number=100)
d1b54241a731 rewrite meeting fetcher
drewp@bigasterisk.com
parents: 8
diff changeset
25 pprint(res)
8
f23b21bd0fce apex search
drewp@bigasterisk.com
parents:
diff changeset
26 for row in res['results']:
9
d1b54241a731 rewrite meeting fetcher
drewp@bigasterisk.com
parents: 8
diff changeset
27 yield {
d1b54241a731 rewrite meeting fetcher
drewp@bigasterisk.com
parents: 8
diff changeset
28 'title': row['title'],
d1b54241a731 rewrite meeting fetcher
drewp@bigasterisk.com
parents: 8
diff changeset
29 'snippetHtml': row['highlighted_content']
d1b54241a731 rewrite meeting fetcher
drewp@bigasterisk.com
parents: 8
diff changeset
30 }