Spaces:
Runtime error
Runtime error
File size: 4,299 Bytes
42193eb e0adc3b 42193eb 086c1bf 42193eb 086c1bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
### ----------------------------- ###
### libraries ###
### ----------------------------- ###
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from reader import get_article
### ------------------------------ ###
### data transformation ###
### ------------------------------ ###
# load dataset
uncleaned_data = pd.read_csv('data.csv')
# remove timestamp from dataset (always first column)
uncleaned_data = uncleaned_data.iloc[: , 1:]
data = pd.DataFrame()
# keep track of which columns are categorical and what
# those columns' value mappings are
# structure: {colname1: {...}, colname2: {...} }
cat_value_dicts = {}
final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
# for each column...
for (colname, colval) in uncleaned_data.iteritems():
# check if col is already a number; if so, add col directly
# to new dataframe and skip to next column
if isinstance(colval.values[0], (np.integer, float)):
data[colname] = uncleaned_data[colname].copy()
continue
# structure: {0: "lilac", 1: "blue", ...}
new_dict = {}
val = 0 # first index per column
transformed_col_vals = [] # new numeric datapoints
# if not, for each item in that column...
for (row, item) in enumerate(colval.values):
# if item is not in this col's dict...
if item not in new_dict:
new_dict[item] = val
val += 1
# then add numerical value to transformed dataframe
transformed_col_vals.append(new_dict[item])
# reverse dictionary only for final col (0, 1) => (vals)
if colname == final_colname:
new_dict = {value : key for (key, value) in new_dict.items()}
cat_value_dicts[colname] = new_dict
data[colname] = transformed_col_vals
### -------------------------------- ###
### model training ###
### -------------------------------- ###
# select features and predicton; automatically selects last column as prediction
cols = len(data.columns)
num_features = cols - 1
x = data.iloc[: , :num_features]
y = data.iloc[: , num_features:]
# split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
# instantiate the model (using default parameters)
model = LogisticRegression()
model.fit(x_train, y_train.values.ravel())
y_pred = model.predict(x_test)
### -------------------------------- ###
### article generation ###
### -------------------------------- ###
# borrow file reading function from reader.py
def get_feat():
feats = [abs(x) for x in model.coef_[0]]
max_val = max(feats)
idx = feats.index(max_val)
return data.columns[idx]
acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + '%**'
most_imp_feat = get_feat() + "**"
info = get_article(acc, most_imp_feat)
### ------------------------------- ###
### interface creation ###
### ------------------------------- ###
# predictor for generic number of features
def general_predictor(*args):
features = []
# transform categorical input
for colname, arg in zip(data.columns, args):
if (colname in cat_value_dicts):
features.append(cat_value_dicts[colname][arg])
else:
features.append(arg)
# predict single datapoint
new_input = [features]
result = model.predict(new_input)
return cat_value_dicts[final_colname][result[0]]
# add data labels to replace those lost via star-args
inputls = []
for colname in data.columns:
# skip last column
if colname == final_colname:
continue
# access categories dict if data is categorical
# otherwise, just use a number input
if colname in cat_value_dicts:
radio_options = list(cat_value_dicts[colname].keys())
inputls.append(gr.inputs.Radio(choices=radio_options, type="value", label=colname))
else:
# add numerical input
inputls.append(gr.inputs.Number(label=colname))
# generate gradio interface
interface = gr.Interface(general_predictor, inputs=inputls, outputs="text", article=info['article'], css=info['css'], theme="grass", title=info['title'], allow_flagging='never', description=info['description'])
# show the interface
interface.launch() |