import lancedb import os import gradio as gr from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import time db = lancedb.connect(".lancedb") TABLE = db.open_table(os.getenv("TABLE_NAME")) VECTOR_COLUMN = os.getenv("VECTOR_COLUMN", "vector") TEXT_COLUMN = os.getenv("TEXT_COLUMN", "text") BATCH_SIZE = int(os.getenv("BATCH_SIZE", 32)) CROSS_ENCODER = os.getenv("CROSS_ENCODER") retriever = SentenceTransformer(os.getenv("EMB_MODEL")) cross_encoder = AutoModelForSequenceClassification.from_pretrained(CROSS_ENCODER) cross_encoder.eval() cross_encoder_tokenizer = AutoTokenizer.from_pretrained(CROSS_ENCODER) def rerank(query, documents, k): """Use cross-encoder to rerank documents retrieved from the retriever.""" tokens = cross_encoder_tokenizer([query] * len(documents), documents, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): logits = cross_encoder(**tokens).logits scores = logits.reshape(-1).tolist() documents = sorted(zip(documents, scores), key=lambda x: x[1], reverse=True) return [doc[0] for doc in documents[:k]] def retrieve(query, top_k_retriever=25, use_reranking=True, top_k_reranker=5): query_vec = retriever.encode(query) try: documents = TABLE.search(query_vec, vector_column_name=VECTOR_COLUMN).limit(top_k_retriever).to_list() documents = [doc[TEXT_COLUMN] for doc in documents] if use_reranking: documents = rerank(query, documents, top_k_reranker) return documents except Exception as e: raise gr.Error(str(e))