Spaces:
Runtime error
Runtime error
File size: 4,272 Bytes
d0a9720 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import streamlit as st
import numpy as np
import time
import pickle
import torch
import pandas as pd
from gensim.models import KeyedVectors
from transformers import BertTokenizer, BertModel
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from function.lstm_preprocessing import (
clean,
tokin,
predict_ml_class,
predict_sentence,
predict_single_string,
LSTMClassifier
)
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
stemmer = SnowballStemmer('russian')
sw = stopwords.words('russian')
EMBEDDING_DIM = 32
HIDDEN_DIM = 32
SEQ_LEN = 200
VOCAB_SIZE = 196906
EMBEDDING_DIM = 32
wv = KeyedVectors.load("file/wv.wordvectors", mmap='r')
with open('file/vocab_to_int.txt', 'rb') as f:
vocab_to_int = pickle.load(f)
embedding_matrix = np.zeros((VOCAB_SIZE, EMBEDDING_DIM))
for word, i in vocab_to_int.items():
try:
embedding_vector = wv[word]
embedding_matrix[i] = embedding_vector
except KeyError as e:
pass
embedding_layer = torch.nn.Embedding.from_pretrained(torch.FloatTensor(embedding_matrix))
model = LSTMClassifier(embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_DIM, embedding=embedding_layer).to(DEVICE)
model.load_state_dict(torch.load('models/LTSM_model_epoch_7.pt', map_location='cpu'))
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
model_BERT = BertModel.from_pretrained("bert-base-multilingual-cased")
loaded_model = pickle.load(open('models/LogReg.pickle', "rb"))
loaded_classifier = pickle.load(open('models/trained_model.pkl', "rb"))
loaded_vectorizer = pickle.load(open('models/vectorizer.pkl', "rb"))
def main():
st.title("Классификация отзыва на поликлиники")
user_input = st.text_area("Введите ваш отзыв:", "")
return user_input
user_input = main()
def predict_lstm(user_input):
start_time = time.time()
prediction = predict_sentence(user_input, model, SEQ_LEN, vocab_to_int)
end_time = time.time()
return prediction, round((end_time - start_time), 4)
def predict_bert(user_input):
start_time = time.time()
prediction = predict_single_string(user_input, model_BERT, loaded_model)
end_time = time.time()
return prediction, round((end_time - start_time), 4)
def predict_ML(user_input):
start_time = time.time()
prediction = predict_ml_class(user_input, loaded_vectorizer, loaded_classifier)
end_time = time.time()
return prediction, round((end_time - start_time), 4)
if user_input:
prediction_rnn, time_taken_rnn = predict_ML(user_input)
st.write("### Bag-of-Words + LogReg")
st.write("Предсказанный класс:", prediction_rnn)
st.write("Время предсказания:", time_taken_rnn, "сек.")
prediction_rnn, time_taken_rnn = predict_lstm(user_input)
st.write("### LSTM модель")
st.write("Предсказанный класс:", prediction_rnn)
st.write("Время предсказания:", time_taken_rnn, "сек.")
prediction_rnn, time_taken_rnn = predict_bert(user_input)
st.write("### BERT модель + LogReg")
st.write("Предсказанный класс:", prediction_rnn)
st.write("Время предсказания:", time_taken_rnn, "сек.")
st.sidebar.image('images/polyclinic.jpeg', use_column_width=True)
f1_score_classic_ml = 0.87
f1_score_rnn = 0.88
f1_score_bert = 0.83
f1_score_classic_ml_valid = 0.89
f1_score_rnn_valid = 0.92
f1_score_bert_valid = 0.82
# Создание DataFrame для сравнения результатов
st.sidebar.write("### Сравнительная таблица по метрике f1-macro")
results = {
"Модель": ["Классический ML", "LSTM", "BERT-based"],
"train": [f1_score_classic_ml, f1_score_rnn, f1_score_bert],
"valid": [f1_score_classic_ml_valid, f1_score_rnn_valid, f1_score_bert_valid]
}
results_df = pd.DataFrame(results)
st.sidebar.dataframe(results_df) |