diff search/query.py @ 8:f23b21bd0fce

apex search
author drewp@bigasterisk.com
date Sun, 07 Jul 2024 16:26:56 -0700
parents 0e33c65f1904
children
line wrap: on
line diff
--- a/search/query.py	Sat Jul 06 16:45:19 2024 -0700
+++ b/search/query.py	Sun Jul 07 16:26:56 2024 -0700
@@ -1,15 +1,21 @@
+from dataclasses import dataclass
+import html
 import json
+from pprint import pprint
 import sys
 from pathlib import Path
+from typing import Iterable
 
 from tqdm import tqdm
 
 from pymilvus import MilvusClient
 from milvus_model.dense.onnx import OnnxEmbeddingFunction
 
-from extract_pdf import phrasesFromFile
+from extract_pdf import files, phrasesFromFile
 
 from fastapi import FastAPI
+from search_apex import Search
+
 
 def rebuild(client, embedding_fn, dim):
     client.drop_collection(collection_name="demo_collection")
@@ -24,8 +30,8 @@
             phrasesFromFile(
                 Path("data") /
                 "Meetings2226Minutes_20240702182359526 (1).pdf")),
-                desc="rebuilding",
-                unit=' phrase'):
+                                  desc="rebuilding",
+                                  unit=' phrase'):
         [vector] = embedding_fn.encode_documents([phrase])
         doc = {
             "id": i,
@@ -38,7 +44,7 @@
     print('insert:', res['insert_count'])
 
 
-def search(q, embedding_fn, client):
+def xxsearch(q, embedding_fn, client):
     query_vectors = embedding_fn.encode_queries([q])
 
     [query_result] = client.search(
@@ -63,8 +69,12 @@
 
 app = FastAPI()
 
+search = Search()
+
 
 @app.get("/sco/query")
-def read_query1(q: str|None):
-    print(f'1 {q=}')
-    return {"Hello": "World"}
+def read_query1(q: str):
+    results = []
+    results = search.search(q)
+
+    return {"results": results}