|
from fastapi import FastAPI |
|
from sklearn.model_selection import train_test_split |
|
from request_body import request_body |
|
from utilities import * |
|
from classifier import Classifier |
|
|
|
|
|
nltk.download('punkt') |
|
|
|
|
|
filename = "airline_sentiment_analysis.csv" |
|
raw_data = get_data_for_training(filename) |
|
sentences, labels = get_data_and_labels(raw_data) |
|
sentences = get_word_embeddings(sentences) |
|
|
|
|
|
|
|
|
|
airline_train_data, airline_test_data, airline_train_labels, airline_test_labels = train_test_split( |
|
sentences, labels, test_size=0.5, random_state=42) |
|
|
|
|
|
x_train = vectorize_sequence(airline_train_data, 20000) |
|
x_test = vectorize_sequence(airline_test_data, 20000) |
|
|
|
y_train = np.asarray(airline_train_labels).astype('float32') |
|
y_test = np.asarray(airline_test_labels).astype('float32') |
|
|
|
print(x_train.shape) |
|
print(x_test.shape) |
|
print(y_train.shape) |
|
print(y_test.shape) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
classifier_lr = Classifier("Logistic Regression") |
|
classifier_lr.train(x_train, y_train) |
|
print("LOGISTIC REGRESSION") |
|
print("train shape: " + str(x_train.shape)) |
|
print("score on test: " + str(classifier_lr.score(x_test, y_test))) |
|
print("score on train: " + str(classifier_lr.score(x_train, y_train))) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = FastAPI() |
|
|
|
@app.post('/predict') |
|
def predict(data: request_body): |
|
text = data.text |
|
sequence = get_sequence(text) |
|
sequence = vectorize_sequence(sequence, 20000) |
|
print(sequence) |
|
|
|
class_idx = classifier_lr.classify(sequence)[0] |
|
print(class_idx) |
|
class_idx = (int)(class_idx) |
|
if class_idx==1: |
|
return {'sentiment': "positive"} |
|
else: |
|
return {'sentiment': "negative"} |
|
|