Crystallllllll commited on
Commit
cc6d1ca
1 Parent(s): 1b75733

Upload 4 files

Browse files
Files changed (4) hide show
  1. app (1).py +172 -0
  2. data.csv +16 -0
  3. info.md +16 -0
  4. requirements.txt +4 -0
app (1).py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### ----------------------------- ###
2
+ ### libraries ###
3
+ ### ----------------------------- ###
4
+
5
+ import gradio as gr
6
+ import pandas as pd
7
+ import numpy as np
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.linear_model import LogisticRegression
10
+ from sklearn import metrics
11
+
12
+
13
+ ### ------------------------------ ###
14
+ ### data transformation ###
15
+ ### ------------------------------ ###
16
+
17
+ # load dataset
18
+ uncleaned_data = pd.read_csv('data.csv')
19
+
20
+ # remove timestamp from dataset (always first column)
21
+ uncleaned_data = uncleaned_data.iloc[: , 1:]
22
+ data = pd.DataFrame()
23
+
24
+ # keep track of which columns are categorical and what
25
+ # those columns' value mappings are
26
+ # structure: {colname1: {...}, colname2: {...} }
27
+ cat_value_dicts = {}
28
+ final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
29
+
30
+ # for each column...
31
+ for (colname, colval) in uncleaned_data.iteritems():
32
+
33
+ # check if col is already a number; if so, add col directly
34
+ # to new dataframe and skip to next column
35
+ if isinstance(colval.values[0], (np.integer, float)):
36
+ data[colname] = uncleaned_data[colname].copy()
37
+ continue
38
+
39
+ # structure: {0: "lilac", 1: "blue", ...}
40
+ new_dict = {}
41
+ val = 0 # first index per column
42
+ transformed_col_vals = [] # new numeric datapoints
43
+
44
+ # if not, for each item in that column...
45
+ for (row, item) in enumerate(colval.values):
46
+
47
+ # if item is not in this col's dict...
48
+ if item not in new_dict:
49
+ new_dict[item] = val
50
+ val += 1
51
+
52
+ # then add numerical value to transformed dataframe
53
+ transformed_col_vals.append(new_dict[item])
54
+
55
+ # reverse dictionary only for final col (0, 1) => (vals)
56
+ if colname == final_colname:
57
+ new_dict = {value : key for (key, value) in new_dict.items()}
58
+
59
+ cat_value_dicts[colname] = new_dict
60
+ data[colname] = transformed_col_vals
61
+
62
+
63
+ ### -------------------------------- ###
64
+ ### model training ###
65
+ ### -------------------------------- ###
66
+
67
+ # select features and predicton; automatically selects last column as prediction
68
+ cols = len(data.columns)
69
+ num_features = cols - 1
70
+ x = data.iloc[: , :num_features]
71
+ y = data.iloc[: , num_features:]
72
+
73
+ # split data into training and testing sets
74
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
75
+
76
+ # instantiate the model (using default parameters)
77
+ model = LogisticRegression()
78
+ model.fit(x_train, y_train.values.ravel())
79
+ y_pred = model.predict(x_test)
80
+
81
+
82
+ ### -------------------------------- ###
83
+ ### article generation ###
84
+ ### -------------------------------- ###
85
+ # borrow file reading function from reader.py
86
+
87
+ def get_feat():
88
+ feats = [abs(x) for x in model.coef_[0]]
89
+ max_val = max(feats)
90
+ idx = feats.index(max_val)
91
+ return data.columns[idx]
92
+
93
+ acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
94
+ most_imp_feat = get_feat()
95
+ # info = get_article(acc, most_imp_feat)
96
+
97
+
98
+
99
+ ### ------------------------------- ###
100
+ ### interface creation ###
101
+ ### ------------------------------- ###
102
+
103
+
104
+ # predictor for generic number of features
105
+ def general_predictor(*args):
106
+ features = []
107
+
108
+ # transform categorical input
109
+ for colname, arg in zip(data.columns, args):
110
+ if (colname in cat_value_dicts):
111
+ features.append(cat_value_dicts[colname][arg])
112
+ else:
113
+ features.append(arg)
114
+
115
+ # predict single datapoint
116
+ new_input = [features]
117
+ result = model.predict(new_input)
118
+ return cat_value_dicts[final_colname][result[0]]
119
+
120
+ # add data labels to replace those lost via star-args
121
+
122
+
123
+ block = gr.Blocks()
124
+
125
+ with open('info.md') as f:
126
+ with block:
127
+ gr.Markdown(f.readline())
128
+ gr.Markdown('Take the quiz to get a personalized recommendation using AI.')
129
+
130
+ with gr.Row():
131
+ with gr.Group():
132
+ inputls = []
133
+ for colname in data.columns:
134
+ # skip last column
135
+ if colname == final_colname:
136
+ continue
137
+
138
+ # access categories dict if data is categorical
139
+ # otherwise, just use a number input
140
+ if colname in cat_value_dicts:
141
+ radio_options = list(cat_value_dicts[colname].keys())
142
+ inputls.append(gr.Dropdown(radio_options, type="value", label=colname))
143
+ else:
144
+ # add numerical input
145
+ inputls.append(gr.Number(label=colname))
146
+ gr.Markdown("<br />")
147
+
148
+ submit = gr.Button("Click to see your personalized result!", variant="primary")
149
+ gr.Markdown("<br />")
150
+ output = gr.Textbox(label="Your recommendation:", placeholder="your recommendation will appear here")
151
+
152
+ submit.click(fn=general_predictor, inputs=inputls, outputs=output)
153
+ gr.Markdown("<br />")
154
+
155
+ with gr.Row():
156
+ with gr.Group():
157
+ gr.Markdown(f"<h3>Accuracy: </h3>{acc}")
158
+ with gr.Group():
159
+ gr.Markdown(f"<h3>Most important feature: </h3>{most_imp_feat}")
160
+
161
+ gr.Markdown("<br />")
162
+
163
+ with gr.Group():
164
+ gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy and most important feature can be helpful for understanding how the model works, but <em>should not be considered absolute facts about the real world</em>.''')
165
+
166
+ with gr.Group():
167
+ with open('info.md') as f:
168
+ f.readline()
169
+ gr.Markdown(f.read())
170
+
171
+ # show the interface
172
+ block.launch()
data.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Timestamp,"Which year group are you in?
2
+ ","Have you ever tried watching a Korean drama?
3
+ ",Q3: Do you have a favorite Korean actor or actress?,"Do you cry easily?
4
+ ","Are you an easy-going person?
5
+ ",Do you have friends who are really into K-drama?,How do you feel about subtitles?,"Do you prefer shows with many episodes or shorter series?
6
+ ","Are you interested in K-dramas that showcase Korean culture or modern Korean society?
7
+ ","Are you okay with a slow-paced drama focusing on character development?
8
+ ",Which type of storyline appeals to you more?
9
+ 9/2/2024 13:46:05,D: Y10,"C: No, never","C: No, I don’t like anyone",C: Depends,C: Maybe,"A: Yes, a lot of my friends love it",C: I'd like a mix of both.,A: A lot of episodes (20+),B: Not necessarily; I just want a good story.,"A: Yes, I love character-driven stories.",F: A crime/mystery story with investigative elements.
10
+ 9/2/2024 13:50:36,D: Y10,"C: No, never","C: No, I don’t like anyone",C: Depends,C: Maybe,"B: Yes, but only a few friends",C: I'd like a mix of both.,A: A lot of episodes (20+),B: Not necessarily; I just want a good story.,"B: No, I prefer a faster-paced plot.","C: A fun, light-hearted comedy."
11
+ 9/2/2024 19:23:25,D: Y10,"C: No, never","D: No, but I heard someone before",B: No,A: Yes,"A: Yes, a lot of my friends love it",C: I'd like a mix of both.,B: Shorter series (12-16 episodes),B: Not necessarily; I just want a good story.,"B: No, I prefer a faster-paced plot.",D: A story set in a different historical or fantasy world.
12
+ 9/5/2024 10:33:20,D: Y10,"C: No, never","C: No, I don’t like anyone",C: Depends,C: Maybe,"B: Yes, but only a few friends",A: I love them. I don't mind reading while watching.,A: A lot of episodes (20+),B: Not necessarily; I just want a good story.,"A: Yes, I love character-driven stories.",A: A heartwarming romance with emotional depth.
13
+ 9/17/2024 15:58:28,D: Y10,"C: No, never","C: No, I don’t like anyone",B: No,B: No,"B: Yes, but only a few friends",B: I prefer dubbed versions.,B: Shorter series (12-16 episodes),B: Not necessarily; I just want a good story.,"A: Yes, I love character-driven stories.",E: A drama centered on family and friendships.
14
+ 9/17/2024 18:44:50,D: Y10,"B: Yes, I watched a lot","A: Yes, I watched their series",A: Yes,C: Maybe,"B: Yes, but only a few friends",A: I love them. I don't mind reading while watching.,B: Shorter series (12-16 episodes),B: Not necessarily; I just want a good story.,"B: No, I prefer a faster-paced plot.",F: A crime/mystery story with investigative elements.
15
+ 9/17/2024 19:34:50,D: Y10,"A: Yes, but not much","D: No, but I heard someone before",C: Depends,A: Yes,"B: Yes, but only a few friends",A: I love them. I don't mind reading while watching.,A: A lot of episodes (20+),B: Not necessarily; I just want a good story.,"A: Yes, I love character-driven stories.",B: A fast-paced thriller with lots of twists.
16
+ 9/17/2024 22:30:03,D: Y10,"A: Yes, but not much","D: No, but I heard someone before",B: No,C: Maybe,"A: Yes, a lot of my friends love it",A: I love them. I don't mind reading while watching.,B: Shorter series (12-16 episodes),C: A little bit of both.,"A: Yes, I love character-driven stories.",F: A crime/mystery story with investigative elements.
info.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 😌 [Edit info.md - Your app's title here]
2
+
3
+ ### 🧐 Problem Statement and Research Summary
4
+ [add info about your problem statement and your research here!]
5
+
6
+ ### 🎣 Data Collection Plan
7
+ [Edit info.md - add info about what data you collected and why here!]
8
+
9
+ ### 💥 Ethical Considerations (Data Privacy and Bias)
10
+ * Data privacy: [Edit info.md - add info about you considered users' privacy here!]
11
+ * Bias: [Edit info.md - add info about you considered bias here!]
12
+
13
+ ### 👻 Our Team
14
+ [Edit info.md - add info about your team members here!]
15
+
16
+ ![aiEDU logo](https://images.squarespace-cdn.com/content/v1/5e4efdef6d10420691f02bc1/5db5a8a3-1761-4fce-a096-bd5f2515162f/aiEDU+_black+logo+stacked.png?format=100w)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pip>=23.2.1
2
+ pandas==1.3.4
3
+ scikit-learn>=1.0.1
4
+ numpy==1.21.4