diff scobot/index/build_index_flow.py @ 13:403eff4a16c8

fix up indexer flow and fastapi server
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 21:32:24 -0700
parents 6622bacb0b84
children b9c2b7fedbcd
line wrap: on
line diff
--- a/scobot/index/build_index_flow.py	Thu Jul 11 18:16:20 2024 -0700
+++ b/scobot/index/build_index_flow.py	Thu Jul 11 21:32:24 2024 -0700
@@ -71,9 +71,9 @@
         pass
     else:
         html = getCityPermanent(agendaUrl)
-        text = extractMeetingText(html)
-        # todo group phrases phrasesFromFile
-        index.addDoc(title=f'{mtg["date"]} {mtg["title"]}', content=text)
+        texts = extractMeetingText(html)
+        for se in nltk.sent_tokenize(' '.join(texts)):
+            index.addDoc(sourceTitle=f'{mtg["date"]} {mtg["title"]}', phrase=se)
 
     try:
         videoUrl = mtg['videoUrl']
@@ -102,4 +102,4 @@
 
 
 if __name__ == '__main__':
-    buildIndex.serve()
+    buildIndex.serve(name='buildIndex')