comparison scobot/service/query.py @ 13:403eff4a16c8

fix up indexer flow and fastapi server
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 21:32:24 -0700
parents 6622bacb0b84
children b9c2b7fedbcd
comparison
equal deleted inserted replaced
12:7f36497bfac3 13:403eff4a16c8
1 from scobot.index.access import SearchIndexRO
2 from whoosh.qparser import QueryParser
1 import json 3 import json
2 from pathlib import Path 4 from pathlib import Path
5 from pprint import pprint
6 from contextlib import asynccontextmanager
3 7
4 # from pymilvus import MilvusClient 8 # from pymilvus import MilvusClient
5 # from milvus_model.dense.onnx import OnnxEmbeddingFunction 9 # from milvus_model.dense.onnx import OnnxEmbeddingFunction
6 from fastapi import FastAPI 10 from fastapi import FastAPI
7 from tqdm import tqdm 11 from tqdm import tqdm
32 docs.append(doc) 36 docs.append(doc)
33 res = client.insert(collection_name="demo_collection", data=docs) 37 res = client.insert(collection_name="demo_collection", data=docs)
34 print('insert:', res['insert_count']) 38 print('insert:', res['insert_count'])
35 39
36 40
37 def xxsearch(q, embedding_fn, client):
38 query_vectors = embedding_fn.encode_queries([q])
39
40 [query_result] = client.search(
41 collection_name="demo_collection",
42 data=query_vectors,
43 limit=5,
44 output_fields=["text"],
45 )
46 query_result.sort(key=lambda x: x["distance"], reverse=True)
47
48 for row in query_result:
49 print(f'{row["distance"]:.6f} {row["entity"]["text"]}')
50
51
52 # q, = sys.argv[1:]
53
54 # https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending 41 # https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending
55 # embedding_fn = OnnxEmbeddingFunction(model_name="jinaai/jina-embeddings-v2-base-en") 42 # embedding_fn = OnnxEmbeddingFunction(model_name="jinaai/jina-embeddings-v2-base-en")
56 # client = MilvusClient("milvus_demo.db") 43 # client = MilvusClient("milvus_demo.db")
57 # rebuild(client, embedding_fn, dim=embedding_fn.dim) 44 # rebuild(client, embedding_fn, dim=embedding_fn.dim)
58 # search(q, embedding_fn, client) 45 # search(q, embedding_fn, client)
59 46
60 app = FastAPI()
61 47
62 #search = Search() 48 @asynccontextmanager
49 async def lifespan(app: FastAPI):
50 app.state.index = SearchIndexRO('/tmp/scoindex')
51 yield
63 52
53 app = FastAPI(lifespan=lifespan)
64 54
65 @app.get("/sco/query") 55 @app.get("/sco/query")
66 def read_query1(q: str): 56 def read_query1(q: str):
67 results = [] 57 index = app.state.index
68 results = search.search(q)
69 58
59 query = QueryParser("phrase", index.ix.schema).parse(q)
60 pprint(query)
61 results = list(index.searcher.search(query))
70 return {"results": results} 62 return {"results": results}