import time from io import BytesIO from langchain.llms import OpenAI from dotenv import load_dotenv import os import streamlit as st import pandas as pd # Set the page configuration here st.set_page_config(page_title="Insightly") def main(): load_dotenv() # Load the OpenAI API key from the environment variable api_key = os.getenv("OPENAI_API_KEY") if api_key is None or api_key == "": st.error("OPENAI_API_KEY is not set") return st.sidebar.image("https://i.ibb.co/bX6GdqG/insightly-wbg.png", use_column_width=True) st.title("Column Analysis 💬") csv_files = st.file_uploader("Upload CSV files", type="csv", accept_multiple_files=True) if csv_files: llm = OpenAI(temperature=0) user_input = st.text_input("Question here:") # Read the CSV file and get the column names for csv_file in csv_files: try: df = pd.read_csv(csv_file) except pd.errors.EmptyDataError: st.error(f"Empty CSV file uploaded: {csv_file.name}") continue if df.empty: st.error(f"Empty CSV file uploaded: {csv_file.name}") continue column_names = df.columns.tolist() # Dropdown to select the column for prompts column_for_prompt = st.selectbox("Select the column for prompts:", [None] + column_names) if column_for_prompt is not None: # Create a list to store the responses and original rows for each CSV file responses_list = [] original_rows_list = [] # Check if the specified column for prompts exists in the DataFrame if column_for_prompt not in df.columns: st.error(f"The column '{column_for_prompt}' does not exist in the CSV file: {csv_file.name}") continue # Example: Accessing columns from the DataFrame column_data = df[column_for_prompt] # Loop through each row in the specified column and pass the user input as prompt for row_value in column_data: # Convert the row value to a string to handle missing or NaN values row_value_str = str(row_value) original_rows_list.append(row_value_str) # Example: Using the preprocessed data with the OpenAI API llm_response = llm.predict(row_value_str + " " + user_input) responses_list.append(llm_response) # Introduce a delay of 1 second between API calls to reduce the rate of requests time.sleep(1) # Create a new DataFrame containing the original rows and responses response_df = pd.DataFrame({ "Original Rows": original_rows_list, "Responses": responses_list }) # Add bold formatting to the "Responses" column response_df["Responses"] = response_df["Responses"].apply(lambda x: f"**{x}**") # Offer the option to download the responses as a CSV file if st.button("Download Responses as CSV"): with BytesIO() as output_file: response_df.to_csv(output_file, index=False) st.download_button( label="Download CSV", data=output_file.getvalue(), file_name="responses.csv", mime="text/csv", ) if __name__ == "__main__": main()