hocuf commited on
Commit
9f99478
1 Parent(s): f11b764

Upload 2 files

Browse files
Files changed (2) hide show
  1. data_science_with_promt.py +111 -0
  2. requirements.txt +6 -0
data_science_with_promt.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datahorse
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import io
6
+ import sys
7
+
8
+ st.title('Text to Data Analysis')
9
+
10
+ # Initialize session state for conversation history
11
+ if 'conversation' not in st.session_state:
12
+ st.session_state.conversation = []
13
+
14
+ uploaded_file = st.file_uploader('Upload .csv data: ')
15
+
16
+
17
+ query = st.text_input('Search about data: ')
18
+
19
+ def capture_output(func):
20
+ # Capture stdout
21
+ old_stdout = sys.stdout
22
+ new_stdout = io.StringIO()
23
+ sys.stdout = new_stdout
24
+
25
+ # Capture matplotlib figures
26
+ fig = plt.figure()
27
+
28
+ result = func()
29
+
30
+ # Reset stdout
31
+ sys.stdout = old_stdout
32
+ output = new_stdout.getvalue()
33
+
34
+ # Check if a plot was created
35
+ if plt.gcf().axes:
36
+ return fig
37
+ elif output:
38
+ return output
39
+ else:
40
+ return result
41
+
42
+ if uploaded_file is not None:
43
+
44
+ # Add some example queries
45
+ st.sidebar.header("Example Queries")
46
+ st.sidebar.write("1. Show me a summary of the data")
47
+ st.sidebar.write("2. Create a bar chart of [column_name]")
48
+ st.sidebar.write("3. What is the average of [column_name]?")
49
+ st.sidebar.write("4. Show me the correlation between [column1] and [column2]")
50
+ st.sidebar.write("5. List the unique values in [column_name]")
51
+ st.sidebar.write("6. What is the maximum value in [column_name]?")
52
+ st.sidebar.write("7. Create a line chart of [column_name] over time")
53
+ st.sidebar.write("8. How many missing values are in [column_name]?")
54
+ st.sidebar.write("9. Filter rows where [column_name] is greater than [value]")
55
+ st.sidebar.write("10. Generate a pie chart for [column_name]")
56
+
57
+
58
+ df = datahorse.read(uploaded_file)
59
+
60
+ col1, col2 = st.columns(2)
61
+
62
+
63
+ if col1.button('Search it'):
64
+ # Append user query to conversation history
65
+ user_message = f"""
66
+ <div style='background-color: #f0f0f0; padding: 10px; border-radius: 10px; margin: 5px 0;'>
67
+ <strong>Me:</strong> {query}
68
+ </div>
69
+ """
70
+ st.session_state.conversation.append(user_message)
71
+
72
+ # Get response from datahorse
73
+ response = capture_output(lambda: df.chat(query))
74
+
75
+ st.subheader("Response:")
76
+
77
+ if isinstance(response, plt.Figure):
78
+ st.pyplot(response)
79
+ elif isinstance(response, str):
80
+ st.text(response)
81
+ elif isinstance(response, pd.DataFrame):
82
+ st.dataframe(response)
83
+ elif response is not None:
84
+ st.write(response)
85
+ else:
86
+ st.write("No output was captured.")
87
+
88
+ # Append response to conversation history
89
+ response_message = f"""
90
+ <div style='background-color: #4CAF50; color: white; padding: 10px; border-radius: 10px; margin: 5px 0;'>
91
+ <strong>Datahorse:</strong> Response displayed above
92
+ </div>
93
+ """
94
+ st.session_state.conversation.append(response_message)
95
+ # Clear conversation history
96
+ if col2.button('Clear Conversation'):
97
+ st.session_state.conversation = []
98
+
99
+
100
+ # Display conversation history
101
+ for message in reversed(st.session_state.conversation):
102
+ st.markdown(message, unsafe_allow_html=True)
103
+
104
+ # Display the dataframe
105
+ if uploaded_file is not None:
106
+ st.write("Preview of the uploaded data:")
107
+ st.dataframe(df.head())
108
+
109
+ # Display column names
110
+ st.write("Column names in the dataset:")
111
+ st.write(", ".join(df.columns))
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ datahorse
3
+ pandas
4
+ matplotlib
5
+ io
6
+ sys