Mercurial > code > home > repos > sco-bot
comparison scobot/service/query.py @ 13:403eff4a16c8
fix up indexer flow and fastapi server
author | drewp@bigasterisk.com |
---|---|
date | Thu, 11 Jul 2024 21:32:24 -0700 |
parents | 6622bacb0b84 |
children | b9c2b7fedbcd |
comparison
equal
deleted
inserted
replaced
12:7f36497bfac3 | 13:403eff4a16c8 |
---|---|
1 from scobot.index.access import SearchIndexRO | |
2 from whoosh.qparser import QueryParser | |
1 import json | 3 import json |
2 from pathlib import Path | 4 from pathlib import Path |
5 from pprint import pprint | |
6 from contextlib import asynccontextmanager | |
3 | 7 |
4 # from pymilvus import MilvusClient | 8 # from pymilvus import MilvusClient |
5 # from milvus_model.dense.onnx import OnnxEmbeddingFunction | 9 # from milvus_model.dense.onnx import OnnxEmbeddingFunction |
6 from fastapi import FastAPI | 10 from fastapi import FastAPI |
7 from tqdm import tqdm | 11 from tqdm import tqdm |
32 docs.append(doc) | 36 docs.append(doc) |
33 res = client.insert(collection_name="demo_collection", data=docs) | 37 res = client.insert(collection_name="demo_collection", data=docs) |
34 print('insert:', res['insert_count']) | 38 print('insert:', res['insert_count']) |
35 | 39 |
36 | 40 |
37 def xxsearch(q, embedding_fn, client): | |
38 query_vectors = embedding_fn.encode_queries([q]) | |
39 | |
40 [query_result] = client.search( | |
41 collection_name="demo_collection", | |
42 data=query_vectors, | |
43 limit=5, | |
44 output_fields=["text"], | |
45 ) | |
46 query_result.sort(key=lambda x: x["distance"], reverse=True) | |
47 | |
48 for row in query_result: | |
49 print(f'{row["distance"]:.6f} {row["entity"]["text"]}') | |
50 | |
51 | |
52 # q, = sys.argv[1:] | |
53 | |
54 # https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending | 41 # https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending |
55 # embedding_fn = OnnxEmbeddingFunction(model_name="jinaai/jina-embeddings-v2-base-en") | 42 # embedding_fn = OnnxEmbeddingFunction(model_name="jinaai/jina-embeddings-v2-base-en") |
56 # client = MilvusClient("milvus_demo.db") | 43 # client = MilvusClient("milvus_demo.db") |
57 # rebuild(client, embedding_fn, dim=embedding_fn.dim) | 44 # rebuild(client, embedding_fn, dim=embedding_fn.dim) |
58 # search(q, embedding_fn, client) | 45 # search(q, embedding_fn, client) |
59 | 46 |
60 app = FastAPI() | |
61 | 47 |
62 #search = Search() | 48 @asynccontextmanager |
49 async def lifespan(app: FastAPI): | |
50 app.state.index = SearchIndexRO('/tmp/scoindex') | |
51 yield | |
63 | 52 |
53 app = FastAPI(lifespan=lifespan) | |
64 | 54 |
65 @app.get("/sco/query") | 55 @app.get("/sco/query") |
66 def read_query1(q: str): | 56 def read_query1(q: str): |
67 results = [] | 57 index = app.state.index |
68 results = search.search(q) | |
69 | 58 |
59 query = QueryParser("phrase", index.ix.schema).parse(q) | |
60 pprint(query) | |
61 results = list(index.searcher.search(query)) | |
70 return {"results": results} | 62 return {"results": results} |