comparison scobot/service/query.py @ 11:6622bacb0b84

first pass at reorg
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 18:15:44 -0700
parents search/query.py@f23b21bd0fce
children 403eff4a16c8
comparison
equal deleted inserted replaced
10:13438795d896 11:6622bacb0b84
1 import json
2 from pathlib import Path
3
4 # from pymilvus import MilvusClient
5 # from milvus_model.dense.onnx import OnnxEmbeddingFunction
6 from fastapi import FastAPI
7 from tqdm import tqdm
8
9
10 def rebuild(client, embedding_fn, dim):
11 client.drop_collection(collection_name="demo_collection")
12 if not client.has_collection(collection_name="demo_collection"):
13 client.create_collection(
14 collection_name="demo_collection",
15 dimension=dim,
16 )
17
18 docs = []
19 for i, (bbox, phrase) in tqdm(enumerate(
20 phrasesFromFile(
21 Path("data") /
22 "Meetings2226Minutes_20240702182359526 (1).pdf")),
23 desc="rebuilding",
24 unit=' phrase'):
25 [vector] = embedding_fn.encode_documents([phrase])
26 doc = {
27 "id": i,
28 "vector": vector,
29 "text": phrase,
30 "bbox": json.dumps(bbox),
31 }
32 docs.append(doc)
33 res = client.insert(collection_name="demo_collection", data=docs)
34 print('insert:', res['insert_count'])
35
36
37 def xxsearch(q, embedding_fn, client):
38 query_vectors = embedding_fn.encode_queries([q])
39
40 [query_result] = client.search(
41 collection_name="demo_collection",
42 data=query_vectors,
43 limit=5,
44 output_fields=["text"],
45 )
46 query_result.sort(key=lambda x: x["distance"], reverse=True)
47
48 for row in query_result:
49 print(f'{row["distance"]:.6f} {row["entity"]["text"]}')
50
51
52 # q, = sys.argv[1:]
53
54 # https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending
55 # embedding_fn = OnnxEmbeddingFunction(model_name="jinaai/jina-embeddings-v2-base-en")
56 # client = MilvusClient("milvus_demo.db")
57 # rebuild(client, embedding_fn, dim=embedding_fn.dim)
58 # search(q, embedding_fn, client)
59
60 app = FastAPI()
61
62 #search = Search()
63
64
65 @app.get("/sco/query")
66 def read_query1(q: str):
67 results = []
68 results = search.search(q)
69
70 return {"results": results}