File size: 3,714 Bytes
964863a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea35d0c
964863a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea35d0c
964863a
 
 
 
 
 
ea35d0c
 
 
 
964863a
ea35d0c
964863a
 
 
 
 
 
 
 
 
 
 
ea35d0c
 
 
964863a
 
 
 
ea35d0c
964863a
 
 
 
 
 
 
 
ea35d0c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import time
from io import BytesIO
from langchain.llms import OpenAI
from dotenv import load_dotenv
import os
import streamlit as st
import pandas as pd

# Set the page configuration here
st.set_page_config(page_title="Insightly")

def main():
    load_dotenv()

    # Load the OpenAI API key from the environment variable
    api_key = os.getenv("OPENAI_API_KEY")
    if api_key is None or api_key == "":
        st.error("OPENAI_API_KEY is not set")
        return

    st.sidebar.image("https://i.ibb.co/bX6GdqG/insightly-wbg.png", use_column_width=True)
    st.title("Column Analysis 💬")

    csv_files = st.file_uploader("Upload CSV files", type="csv", accept_multiple_files=True)
    if csv_files:
        llm = OpenAI(temperature=0)
        user_input = st.text_input("Question here:")

        # Read the CSV file and get the column names
        for csv_file in csv_files:
            try:
                df = pd.read_csv(csv_file)
            except pd.errors.EmptyDataError:
                st.error(f"Empty CSV file uploaded: {csv_file.name}")
                continue

            if df.empty:
                st.error(f"Empty CSV file uploaded: {csv_file.name}")
                continue

            column_names = df.columns.tolist()

            # Dropdown to select the column for prompts
            column_for_prompt = st.selectbox("Select the column for prompts:", [None] + column_names)

            if column_for_prompt is not None:
                # Create a list to store the responses and original rows for each CSV file
                responses_list = []
                original_rows_list = []

                # Check if the specified column for prompts exists in the DataFrame
                if column_for_prompt not in df.columns:
                    st.error(f"The column '{column_for_prompt}' does not exist in the CSV file: {csv_file.name}")
                    continue



                # Example: Accessing columns from the DataFrame
                column_data = df[column_for_prompt]

                # Loop through each row in the specified column and pass the user input as prompt
                for row_value in column_data:
                    # Convert the row value to a string to handle missing or NaN values
                    row_value_str = str(row_value)
                    original_rows_list.append(row_value_str)

                    # Example: Using the preprocessed data with the OpenAI API
                    llm_response = llm.predict(row_value_str + " " + user_input)
                    responses_list.append(llm_response)

                    # Introduce a delay of 1 second between API calls to reduce the rate of requests
                    time.sleep(1)

                # Create a new DataFrame containing the original rows and responses
                response_df = pd.DataFrame({
                    "Original Rows": original_rows_list,
                    "Responses": responses_list
                })

                # Add bold formatting to the "Responses" column
                response_df["Responses"] = response_df["Responses"].apply(lambda x: f"**{x}**")

                # Offer the option to download the responses as a CSV file
                if st.button("Download Responses as CSV"):
                    with BytesIO() as output_file:
                        response_df.to_csv(output_file, index=False)

                        st.download_button(
                            label="Download CSV",
                            data=output_file.getvalue(),
                            file_name="responses.csv",
                            mime="text/csv",
                        )

if __name__ == "__main__":
    main()