import os import textwrap from dotenv import load_dotenv import gradio as gr from haystack import Pipeline from haystack.utils import Secret from haystack.components.builders import PromptBuilder from haystack.components.generators import OpenAIGenerator import pandas as pd load_dotenv() MODEL = "microsoft/Phi-3-mini-4k-instruct" # Load the CSV file df = pd.read_csv("dataset.csv") # Set up components prompt_template = """ Based on the Indian Union Budget data for FY 21-22 to 23-24: {{budget_data}} Answer the given question: {{query}} Answer: """ prompt_builder = PromptBuilder(template=prompt_template) llm = OpenAIGenerator( api_key=Secret.from_env_var("MONSTER_API_KEY"), api_base_url="https://llm.monsterapi.ai/v1/", model=MODEL, generation_kwargs={"max_tokens": 512} ) pipeline = Pipeline() pipeline.add_component("prompt", prompt_builder) pipeline.add_component("llm", llm) pipeline.connect("prompt.prompt", "llm.prompt") # Function to handle the query # def answer_query(query): # # Convert DataFrame to string representation # budget_data = df.to_string() # result = pipeline.run({"prompt": {"budget_data": budget_data, "query": query}}) # return result["llm"]["replies"][0] def answer_query(query): try: # Select a subset of the data (adjust as needed) sample_data = df.sample(n=10).to_string() # Truncate the data if it's too long budget_data = textwrap.shorten(sample_data, width=1000, placeholder="...") result = pipeline.run({"prompt": {"budget_data": budget_data, "query": query}}) return result["llm"]["replies"][0] except Exception as e: return f"An error occurred: {str(e)}" # Gradio interface def chat_interface(query): return answer_query(query) with gr.Blocks() as demo: gr.Markdown("# Indian 2024 Budget Chatbot") query_input = gr.Textbox(label="Enter Your Question") submit_button = gr.Button("Get Answer") output_text = gr.Textbox(label="Answer", interactive=False) submit_button.click(fn=chat_interface, inputs=[query_input], outputs=output_text) # Run the app locally if __name__ == "__main__": demo.launch()