diff scobot/service/query.py @ 13:403eff4a16c8

fix up indexer flow and fastapi server
author drewp@bigasterisk.com
date Thu, 11 Jul 2024 21:32:24 -0700
parents 6622bacb0b84
children b9c2b7fedbcd
line wrap: on
line diff
--- a/scobot/service/query.py	Thu Jul 11 18:16:20 2024 -0700
+++ b/scobot/service/query.py	Thu Jul 11 21:32:24 2024 -0700
@@ -1,5 +1,9 @@
+from scobot.index.access import SearchIndexRO
+from whoosh.qparser import QueryParser
 import json
 from pathlib import Path
+from pprint import pprint
+from contextlib import asynccontextmanager
 
 # from pymilvus import MilvusClient
 # from milvus_model.dense.onnx import OnnxEmbeddingFunction
@@ -34,37 +38,25 @@
     print('insert:', res['insert_count'])
 
 
-def xxsearch(q, embedding_fn, client):
-    query_vectors = embedding_fn.encode_queries([q])
-
-    [query_result] = client.search(
-        collection_name="demo_collection",
-        data=query_vectors,
-        limit=5,
-        output_fields=["text"],
-    )
-    query_result.sort(key=lambda x: x["distance"], reverse=True)
-
-    for row in query_result:
-        print(f'{row["distance"]:.6f} {row["entity"]["text"]}')
-
-
-# q, = sys.argv[1:]
-
 # https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending
 # embedding_fn = OnnxEmbeddingFunction(model_name="jinaai/jina-embeddings-v2-base-en")
 # client = MilvusClient("milvus_demo.db")
 # rebuild(client, embedding_fn, dim=embedding_fn.dim)
 # search(q, embedding_fn, client)
 
-app = FastAPI()
 
-#search = Search()
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    app.state.index = SearchIndexRO('/tmp/scoindex')
+    yield
 
+app = FastAPI(lifespan=lifespan)
 
 @app.get("/sco/query")
 def read_query1(q: str):
-    results = []
-    results = search.search(q)
+    index = app.state.index
 
+    query = QueryParser("phrase", index.ix.schema).parse(q)
+    pprint(query)
+    results = list(index.searcher.search(query))
     return {"results": results}