lwdragon commited on
Commit
ba49cb7
1 Parent(s): 5f426e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -49
app.py CHANGED
@@ -1,39 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import math
2
  import numpy as np
3
 
4
  import gradio as gr
5
  import mindspore
6
- import mindspore.nn as nn
7
  import mindspore.numpy as mnp
8
- import mindspore.ops as ops
9
- import mindspore.dataset as dataset
10
- from mindspore import Tensor
11
- from mindspore import load_checkpoint, load_param_into_net
12
  from mindspore.common.initializer import Uniform, HeUniform
13
 
14
 
15
  def load_glove():
16
  embeddings = []
17
  tokens = []
18
- with open("./glove.6B.100d.txt", encoding='utf-8') as gf:
19
- for glove in gf:
20
  word, embedding = glove.split(maxsplit=1)
21
  tokens.append(word)
22
- embeddings.append(np.fromstring(embedding, dtype=np.float32, sep=' '))
 
 
23
  # 添加 <unk>, <pad> 两个特殊占位符对应的embedding
24
  embeddings.append(np.random.rand(100))
25
  embeddings.append(np.zeros((100,), np.float32))
26
 
27
- vocab = dataset.text.Vocab.from_list(tokens, special_tokens=["<unk>", "<pad>"], special_first=False)
 
 
28
  embeddings = np.array(embeddings).astype(np.float32)
29
  return vocab, embeddings
30
 
 
31
  class RNN(nn.Cell):
32
  def __init__(self, embeddings, hidden_dim, output_dim, n_layers,
33
  bidirectional, dropout, pad_idx):
34
  super().__init__()
35
  vocab_size, embedding_dim = embeddings.shape
36
- self.embedding = nn.Embedding(vocab_size, embedding_dim, embedding_table=Tensor(embeddings), padding_idx=pad_idx)
 
 
37
  self.rnn = nn.LSTM(embedding_dim,
38
  hidden_dim,
39
  num_layers=n_layers,
@@ -42,17 +63,22 @@ class RNN(nn.Cell):
42
  batch_first=True)
43
  weight_init = HeUniform(math.sqrt(5))
44
  bias_init = Uniform(1 / math.sqrt(hidden_dim * 2))
45
- self.fc = nn.Dense(hidden_dim * 2, output_dim, weight_init=weight_init, bias_init=bias_init)
 
 
46
  self.dropout = nn.Dropout(1 - dropout)
47
  self.sigmoid = ops.Sigmoid()
48
 
49
  def construct(self, inputs):
50
  embedded = self.dropout(self.embedding(inputs))
51
  _, (hidden, _) = self.rnn(embedded)
52
- hidden = self.dropout(mnp.concatenate((hidden[-2, :, :], hidden[-1, :, :]), axis=1))
53
- output = self.fc(hidden)
 
 
54
  return self.sigmoid(output)
55
 
 
56
  def predict_sentiment(model, vocab, sentence):
57
  model.set_train(False)
58
  tokenized = sentence.lower().split()
@@ -62,44 +88,44 @@ def predict_sentiment(model, vocab, sentence):
62
  prediction = model(tensor)
63
  return prediction.asnumpy()
64
 
65
- def prefict_emotion(sentence):
66
- # 加载网路
67
- hidden_size = 256
68
- output_size = 1
69
- num_layers = 2
70
- bidirectional = True
71
- dropout = 0.5
72
- lr = 0.00
73
-
74
- vocab, embeddings = load_glove()
75
- pad_idx = vocab.tokens_to_ids('<pad>')
76
- net = RNN(embeddings, hidden_size, output_size, num_layers, bidirectional, dropout, pad_idx)
77
-
78
- # 将模型参数存入parameter的字典中
79
- param_dict = load_checkpoint("./sentiment-analysis.ckpt")
80
-
81
- # 将参数加载到网络中
82
- load_param_into_net(net, param_dict)
83
-
84
  # 预测
85
  pred = predict_sentiment(net, vocab, sentence).item()
86
  result = {
87
- "Positive 🙂": pred,
88
- "Negative 🙃": 1-pred,
89
- }
90
  return result
91
-
 
92
  gr.Interface(
93
- fn=prefict_emotion,
94
- inputs=gr.inputs.Textbox(
95
- lines=3,
96
- placeholder="Type a phrase that has some emotion",
97
- label="Input Text",
98
- ),
99
- outputs="label",
100
- title="Sentiment Analysis",
101
- examples=[
102
- "This film is terrible",
103
- "This film is great",
104
- ],
105
- ).launch(share=True)
 
1
+ # Copyright 2022 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """ LSTM inference """
16
+
17
+
18
  import math
19
  import numpy as np
20
 
21
  import gradio as gr
22
  import mindspore
 
23
  import mindspore.numpy as mnp
24
+ from mindspore import Tensor, nn, \
25
+ load_checkpoint, load_param_into_net, ops, dataset
 
 
26
  from mindspore.common.initializer import Uniform, HeUniform
27
 
28
 
29
  def load_glove():
30
  embeddings = []
31
  tokens = []
32
+ with open("./glove.6B.100d.txt", encoding='utf-8') as file:
33
+ for glove in file:
34
  word, embedding = glove.split(maxsplit=1)
35
  tokens.append(word)
36
+ embeddings.append(np.fromstring(embedding,
37
+ dtype=np.float32,
38
+ sep=' '))
39
  # 添加 <unk>, <pad> 两个特殊占位符对应的embedding
40
  embeddings.append(np.random.rand(100))
41
  embeddings.append(np.zeros((100,), np.float32))
42
 
43
+ vocab = dataset.text.Vocab.from_list(tokens,
44
+ special_tokens=["<unk>", "<pad>"],
45
+ special_first=False)
46
  embeddings = np.array(embeddings).astype(np.float32)
47
  return vocab, embeddings
48
 
49
+
50
  class RNN(nn.Cell):
51
  def __init__(self, embeddings, hidden_dim, output_dim, n_layers,
52
  bidirectional, dropout, pad_idx):
53
  super().__init__()
54
  vocab_size, embedding_dim = embeddings.shape
55
+ self.embedding = nn.Embedding(vocab_size, embedding_dim,
56
+ embedding_table=Tensor(embeddings),
57
+ padding_idx=pad_idx)
58
  self.rnn = nn.LSTM(embedding_dim,
59
  hidden_dim,
60
  num_layers=n_layers,
 
63
  batch_first=True)
64
  weight_init = HeUniform(math.sqrt(5))
65
  bias_init = Uniform(1 / math.sqrt(hidden_dim * 2))
66
+ self.fc_layer = nn.Dense(hidden_dim * 2, output_dim,
67
+ weight_init=weight_init,
68
+ bias_init=bias_init)
69
  self.dropout = nn.Dropout(1 - dropout)
70
  self.sigmoid = ops.Sigmoid()
71
 
72
  def construct(self, inputs):
73
  embedded = self.dropout(self.embedding(inputs))
74
  _, (hidden, _) = self.rnn(embedded)
75
+ hidden = self.dropout(mnp.concatenate((hidden[-2, :, :],
76
+ hidden[-1, :, :]),
77
+ axis=1))
78
+ output = self.fc_layer(hidden)
79
  return self.sigmoid(output)
80
 
81
+
82
  def predict_sentiment(model, vocab, sentence):
83
  model.set_train(False)
84
  tokenized = sentence.lower().split()
 
88
  prediction = model(tensor)
89
  return prediction.asnumpy()
90
 
91
+
92
+ vocab, embeddings = load_glove()
93
+
94
+ net = RNN(embeddings,
95
+ hidden_dim=256,
96
+ output_dim=1,
97
+ n_layers=2,
98
+ bidirectional=True,
99
+ dropout=0.5,
100
+ pad_idx=vocab.tokens_to_ids('<pad>'))
101
+ # 将模型参数存入parameter的字典中
102
+ param_dict = load_checkpoint("./sentiment-analysis.ckpt")
103
+
104
+ # 将参数加载到网络中
105
+ load_param_into_net(net, param_dict)
106
+
107
+
108
+ def predict_emotion(sentence):
 
109
  # 预测
110
  pred = predict_sentiment(net, vocab, sentence).item()
111
  result = {
112
+ "Positive 🙂": pred,
113
+ "Negative 🙃": 1 - pred,
114
+ }
115
  return result
116
+
117
+
118
  gr.Interface(
119
+ fn=predict_emotion,
120
+ inputs=gr.inputs.Textbox(
121
+ lines=3,
122
+ placeholder="Type a phrase that has some emotion",
123
+ label="Input Text",
124
+ ),
125
+ outputs="label",
126
+ title="基于LSTM的文本情感分类任务",
127
+ examples=[
128
+ "This film is terrible",
129
+ "This film is great",
130
+ ],
131
+ ).launch(share=True)