import nest_asyncio
import gradio as gr
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.postprocessor import LLMRerank
import logging
import sys
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.legacy.llms.huggingface import HuggingFaceInferenceAPI, HuggingFaceLLM
from llama_index.core import Settings
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
from transformers import BitsAndBytesConfig
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.openai import OpenAI
import os
import pandas as pd
from llama_index.core import Document
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import QueryBundle
import time
from huggingface_hub import login
nest_asyncio.apply()
hf_token = os.getenv('hf_token')
# Replace 'your_token_here' with your actual Hugging Face API token
login(token=hf_token)
# quantize to save memory
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
)
llm = HuggingFaceLLM(
model_name="kheopss/kheops_hermes-e1-v0.11-bnb-16bit",
tokenizer_name="kheopss/kheops_hermes-e1-v0.11-bnb-16bit",
context_window=3900,
max_new_tokens=2560,
model_kwargs={"quantization_config": quantization_config},
generate_kwargs={"temperature": 0.1, "top_k": 50, "top_p": 0.95},
device_map="cuda:0",
)
embed_model = HuggingFaceEmbedding(
model_name="kheopss/kheops_embedding_e5_v3",
)
Settings.llm=llm
Settings.embed_model=embed_model
# Replace 'file_path.json' with the path to your JSON file
file_path = 'response_metropo_cleaned.json'
data = pd.read_json(file_path)
documents = [Document(text=row['values'],metadata={"filename": row['file_name'], "description":row['file_description']},) for index, row in data.iterrows()]
index = VectorStoreIndex.from_documents(documents, show_progress=True)
def get_retrieved_nodes(
query_str, vector_top_k=10, reranker_top_n=3, with_reranker=False
):
query_bundle = QueryBundle(query_str)
# configure retriever
phase_01_start = time.time()
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=vector_top_k,
)
retrieved_nodes = retriever.retrieve(query_bundle)
phase_01_end = time.time()
print(f"Phase 01
Made by KHEOPS AI
""" demo = gr.ChatInterface( fn=process_final, title="METROPOLE CHATBOT", description=description, ) demo.launch(share=True, debug =True)