lstm_mindspore / app.py
lwdragon's picture
Upload app.py
3a02bb3
raw
history blame
3.75 kB
import math
import zipfile
import numpy as np
import gradio as gr
import mindspore
import mindspore.nn as nn
import mindspore.numpy as mnp
import mindspore.ops as ops
import mindspore.dataset as dataset
from mindspore import Tensor
from mindspore import load_checkpoint, load_param_into_net
from mindspore.common.initializer import Uniform, HeUniform
def load_glove():
embeddings = []
tokens = []
with open("./lstm/glove.6B.100d.txt", encoding='utf-8') as gf:
for glove in gf:
word, embedding = glove.split(maxsplit=1)
tokens.append(word)
embeddings.append(np.fromstring(embedding, dtype=np.float32, sep=' '))
# 添加 <unk>, <pad> 两个特殊占位符对应的embedding
embeddings.append(np.random.rand(100))
embeddings.append(np.zeros((100,), np.float32))
vocab = dataset.text.Vocab.from_list(tokens, special_tokens=["<unk>", "<pad>"], special_first=False)
embeddings = np.array(embeddings).astype(np.float32)
return vocab, embeddings
class RNN(nn.Cell):
def __init__(self, embeddings, hidden_dim, output_dim, n_layers,
bidirectional, dropout, pad_idx):
super().__init__()
vocab_size, embedding_dim = embeddings.shape
self.embedding = nn.Embedding(vocab_size, embedding_dim, embedding_table=Tensor(embeddings), padding_idx=pad_idx)
self.rnn = nn.LSTM(embedding_dim,
hidden_dim,
num_layers=n_layers,
bidirectional=bidirectional,
dropout=dropout,
batch_first=True)
weight_init = HeUniform(math.sqrt(5))
bias_init = Uniform(1 / math.sqrt(hidden_dim * 2))
self.fc = nn.Dense(hidden_dim * 2, output_dim, weight_init=weight_init, bias_init=bias_init)
self.dropout = nn.Dropout(1 - dropout)
self.sigmoid = ops.Sigmoid()
def construct(self, inputs):
embedded = self.dropout(self.embedding(inputs))
_, (hidden, _) = self.rnn(embedded)
hidden = self.dropout(mnp.concatenate((hidden[-2, :, :], hidden[-1, :, :]), axis=1))
output = self.fc(hidden)
return self.sigmoid(output)
score_map = {
1: "Positive",
0: "Negative"
}
def predict_sentiment(model, vocab, sentence):
model.set_train(False)
tokenized = sentence.lower().split()
indexed = vocab.tokens_to_ids(tokenized)
tensor = mindspore.Tensor(indexed, mindspore.int32)
tensor = tensor.expand_dims(0)
prediction = model(tensor)
return prediction.asnumpy()
def prefict_emotion(sentence):
# 加载网路
hidden_size = 256
output_size = 1
num_layers = 2
bidirectional = True
dropout = 0.5
lr = 0.00
vocab, embeddings = load_glove()
pad_idx = vocab.tokens_to_ids('<pad>')
net = RNN(embeddings, hidden_size, output_size, num_layers, bidirectional, dropout, pad_idx)
# 将模型参数存入parameter的字典中
param_dict = load_checkpoint("./lstm/sentiment-analysis.ckpt")
# 将参数加载到网络中
load_param_into_net(net, param_dict)
model = Model(net)
# 预测
pred = predict_sentiment(model, vocab, sentence)
result = {
"Positive 🙂": pred,
"Negative 🙃": 1-pred,
}
return result
gr.Interface(
fn=prefict_emotion,
inputs=gr.inputs.Textbox(
lines=3,
placeholder="Type a phrase that has some emotion",
label="Input Text",
),
outputs="label",
title="Sentiment Analysis",
examples=[
"This film is terrible",
"This film is great",
],
).launch(share=True)