shreyasiv's picture
Update app.py
ea35d0c
import time
from io import BytesIO
from langchain.llms import OpenAI
from dotenv import load_dotenv
import os
import streamlit as st
import pandas as pd
# Set the page configuration here
st.set_page_config(page_title="Insightly")
def main():
load_dotenv()
# Load the OpenAI API key from the environment variable
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None or api_key == "":
st.error("OPENAI_API_KEY is not set")
return
st.sidebar.image("https://i.ibb.co/bX6GdqG/insightly-wbg.png", use_column_width=True)
st.title("Column Analysis πŸ’¬")
csv_files = st.file_uploader("Upload CSV files", type="csv", accept_multiple_files=True)
if csv_files:
llm = OpenAI(temperature=0)
user_input = st.text_input("Question here:")
# Read the CSV file and get the column names
for csv_file in csv_files:
try:
df = pd.read_csv(csv_file)
except pd.errors.EmptyDataError:
st.error(f"Empty CSV file uploaded: {csv_file.name}")
continue
if df.empty:
st.error(f"Empty CSV file uploaded: {csv_file.name}")
continue
column_names = df.columns.tolist()
# Dropdown to select the column for prompts
column_for_prompt = st.selectbox("Select the column for prompts:", [None] + column_names)
if column_for_prompt is not None:
# Create a list to store the responses and original rows for each CSV file
responses_list = []
original_rows_list = []
# Check if the specified column for prompts exists in the DataFrame
if column_for_prompt not in df.columns:
st.error(f"The column '{column_for_prompt}' does not exist in the CSV file: {csv_file.name}")
continue
# Example: Accessing columns from the DataFrame
column_data = df[column_for_prompt]
# Loop through each row in the specified column and pass the user input as prompt
for row_value in column_data:
# Convert the row value to a string to handle missing or NaN values
row_value_str = str(row_value)
original_rows_list.append(row_value_str)
# Example: Using the preprocessed data with the OpenAI API
llm_response = llm.predict(row_value_str + " " + user_input)
responses_list.append(llm_response)
# Introduce a delay of 1 second between API calls to reduce the rate of requests
time.sleep(1)
# Create a new DataFrame containing the original rows and responses
response_df = pd.DataFrame({
"Original Rows": original_rows_list,
"Responses": responses_list
})
# Add bold formatting to the "Responses" column
response_df["Responses"] = response_df["Responses"].apply(lambda x: f"**{x}**")
# Offer the option to download the responses as a CSV file
if st.button("Download Responses as CSV"):
with BytesIO() as output_file:
response_df.to_csv(output_file, index=False)
st.download_button(
label="Download CSV",
data=output_file.getvalue(),
file_name="responses.csv",
mime="text/csv",
)
if __name__ == "__main__":
main()