Spaces:

geetu040
/

sentiment-analysis

Sleeping

App Files Files Community

geetu040 commited on Jun 2, 2023

Commit

d08668b

•

1 Parent(s): 2b4a01c

Initial Upload

Browse files

Files changed (8) hide show

Dockerfile +14 -0
dumps/model.pt +3 -0
dumps/params.json +1 -0
dumps/vocab.pt +3 -0
main.py +31 -0
requirements.txt +6 -0
utils/model.py +106 -0
utils/preprocess.py +19 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

dumps/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d5d911575429b382c886c2b9764ba3226128fe1d1368ec77bbcca6925014db1
+size 4465302

dumps/params.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ [8000, 128, 0, 64, 1, 2, [64, 64], 0.4]

dumps/vocab.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75ea0d5a228a81d16d25cda6f207ce443f1469431497cdcf914384d2e642907b
+size 131115

main.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# ======= PREPARING THE PIPELINE =======
+import torch
+import os
+from utils.preprocess import get_preprocess
+from utils.model import get_model
+dump_path = "./dumps/"
+vocab_path = os.path.join(dump_path, "vocab.pt")
+model_path = os.path.join(dump_path, "model.pt")
+params_path = os.path.join(dump_path, "params.json")
+preprocess = get_preprocess(vocab_path)
+model = get_model(model_path, params_path)
+def predict(text):
+	x = preprocess(text)
+	x = torch.tensor([x])
+	y = model(x)
+	y = y.detach().numpy().tolist()[0]
+	return y
+# ======= CREATING APP =======
+from fastapi import FastAPI
+app = FastAPI()
+@app.get("/")
+def main(text: str):
+    return predict(text)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi
+uvicorn
+torch
+json
+torchtext
+re

utils/model.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import torch
+import torch.nn as nn
+import json
+def attention(Q, K, V):
+	d = K.shape[-1]
+	QK = Q @ K.transpose(-2, -1)
+	QK_d = QK / (d ** 0.5)
+	weights = torch.softmax(QK_d, axis=-1)
+	outputs = weights @ V
+	return outputs
+class Attention(torch.nn.Module):
+	def __init__(self, emb_dim, n_heads):
+		super(Attention, self).__init__()
+		self.emb_dim = emb_dim
+		self.n_heads = n_heads
+	def forward(self, X):
+		batch_size, seq_len, emb_dim = X.size() # (batch_size, seq_len, emb_dim)
+		n_heads = self.n_heads
+		emb_dim_per_head = emb_dim // n_heads
+		assert emb_dim == self.emb_dim
+		assert emb_dim_per_head * n_heads == emb_dim
+		X = X.transpose(1, 2)
+		output = attention(X, X, X)       # (batch_size, n_heads, seq_len, emb_dim_per_head)
+		output = output.transpose(1, 2)   # (batch_size, seq_len, n_heads, emb_dim_per_head)
+		output = output.contiguous().view(batch_size, seq_len, emb_dim)    # (batch_size, seq_len, emb_dim)
+		return output
+class ClassifierAttention(nn.Module):
+	def __init__(self, vocab_size, emb_dim, padding_idx, hidden_size, n_layers, attention_heads, hidden_layer_units, dropout):
+		super(ClassifierAttention, self).__init__()
+		self.embedding = nn.Embedding(
+			num_embeddings = vocab_size,
+			embedding_dim = emb_dim,
+			padding_idx = padding_idx
+		)
+		self.rnn_1 = nn.LSTM(
+			emb_dim,
+			hidden_size,
+			n_layers,
+			bidirectional = False,
+			batch_first = True,
+		)
+		self.attention = Attention(hidden_size, attention_heads)
+		self.rnn_2 = nn.LSTM(
+			hidden_size,
+			hidden_size,
+			n_layers,
+			bidirectional = False,
+			batch_first = True,
+		)
+		self.dropout = nn.Dropout(dropout)
+		hidden_layer_units = [hidden_size, *hidden_layer_units]
+		self.hidden_layers = nn.ModuleList([])
+		for in_unit, out_unit in zip(hidden_layer_units[:-1], hidden_layer_units[1:]):
+			self.hidden_layers.append(nn.Linear(in_unit, out_unit))
+			self.hidden_layers.append(nn.ReLU())
+			self.hidden_layers.append(self.dropout)
+		self.hidden_layers.append(nn.Linear(hidden_layer_units[-1], 1))
+		self.sigmoid = nn.Sigmoid()
+	def forward(self, x):
+		# x: (batch_size, seq_len)
+		out = self.embedding(x)     # (batch_size, seq_len, emb_dim)
+		out, (hidden_state, cell_state) = self.rnn_1(out)
+		out = self.attention(out)    # (batch_size, seq_len, emb_dim)
+		out = self.dropout(out)
+		output, (hidden_state, cell_state) = self.rnn_2(out)
+		out = hidden_state[-1]      # (batch_size, hidden_size)
+		out = self.dropout(out)
+		# (batch_size, seq_len, hidden_dim)
+		# (n_layers*n_direction, batch_size, hidden_size)
+		# (n_layers*n_direction, batch_size, hidden_size)
+		for layer in self.hidden_layers:
+			out = layer(out)
+		out = self.sigmoid(out) # (batch_size, 1)
+		out = out.squeeze(-1)   # (batch_size)
+		return out
+def get_model(model_path, params_path):
+	with open(params_path, 'rb') as f:
+		params = json.load(f)
+	model = ClassifierAttention(*params)
+	model.load_state_dict(torch.load(model_path))
+	model.eval()
+	return model

utils/preprocess.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import torch
+import torchtext
+import re
+def clean_text(text):
+	# Remove extra spaces
+	text = text.strip()
+	# Convert multiple spaces to single spaces
+	text = re.sub('\s+', ' ', text)
+	# Lowercase the text
+	text = text.lower()
+	# Remove punctuation marks
+	text = re.sub('[^\w\s]', '', text)
+	return text
+def get_preprocess(vocab_path):
+	tokenizer = torchtext.data.utils.get_tokenizer('basic_english')
+	vocab = torch.load(vocab_path)
+	return lambda text: vocab(tokenizer(clean_text(text)))