Mercurial > code > home > repos > sco-bot
comparison scobot/service/query.py @ 11:6622bacb0b84
first pass at reorg
author | drewp@bigasterisk.com |
---|---|
date | Thu, 11 Jul 2024 18:15:44 -0700 |
parents | search/query.py@f23b21bd0fce |
children | 403eff4a16c8 |
comparison
equal
deleted
inserted
replaced
10:13438795d896 | 11:6622bacb0b84 |
---|---|
1 import json | |
2 from pathlib import Path | |
3 | |
4 # from pymilvus import MilvusClient | |
5 # from milvus_model.dense.onnx import OnnxEmbeddingFunction | |
6 from fastapi import FastAPI | |
7 from tqdm import tqdm | |
8 | |
9 | |
10 def rebuild(client, embedding_fn, dim): | |
11 client.drop_collection(collection_name="demo_collection") | |
12 if not client.has_collection(collection_name="demo_collection"): | |
13 client.create_collection( | |
14 collection_name="demo_collection", | |
15 dimension=dim, | |
16 ) | |
17 | |
18 docs = [] | |
19 for i, (bbox, phrase) in tqdm(enumerate( | |
20 phrasesFromFile( | |
21 Path("data") / | |
22 "Meetings2226Minutes_20240702182359526 (1).pdf")), | |
23 desc="rebuilding", | |
24 unit=' phrase'): | |
25 [vector] = embedding_fn.encode_documents([phrase]) | |
26 doc = { | |
27 "id": i, | |
28 "vector": vector, | |
29 "text": phrase, | |
30 "bbox": json.dumps(bbox), | |
31 } | |
32 docs.append(doc) | |
33 res = client.insert(collection_name="demo_collection", data=docs) | |
34 print('insert:', res['insert_count']) | |
35 | |
36 | |
37 def xxsearch(q, embedding_fn, client): | |
38 query_vectors = embedding_fn.encode_queries([q]) | |
39 | |
40 [query_result] = client.search( | |
41 collection_name="demo_collection", | |
42 data=query_vectors, | |
43 limit=5, | |
44 output_fields=["text"], | |
45 ) | |
46 query_result.sort(key=lambda x: x["distance"], reverse=True) | |
47 | |
48 for row in query_result: | |
49 print(f'{row["distance"]:.6f} {row["entity"]["text"]}') | |
50 | |
51 | |
52 # q, = sys.argv[1:] | |
53 | |
54 # https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending | |
55 # embedding_fn = OnnxEmbeddingFunction(model_name="jinaai/jina-embeddings-v2-base-en") | |
56 # client = MilvusClient("milvus_demo.db") | |
57 # rebuild(client, embedding_fn, dim=embedding_fn.dim) | |
58 # search(q, embedding_fn, client) | |
59 | |
60 app = FastAPI() | |
61 | |
62 #search = Search() | |
63 | |
64 | |
65 @app.get("/sco/query") | |
66 def read_query1(q: str): | |
67 results = [] | |
68 results = search.search(q) | |
69 | |
70 return {"results": results} |