miteshkotak7 commited on
Commit
e058246
·
1 Parent(s): 666394c

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ # Set up a new user named "user" with user ID 1000
13
+ RUN useradd -m -u 1000 user
14
+ # Switch to the "user" user
15
+ USER user
16
+ # Set home to the user's home directory
17
+ ENV HOME=/home/user \
18
+ PATH=/home/user/.local/bin:$PATH
19
+
20
+ # Set the working directory to the user's home directory
21
+ WORKDIR $HOME/app
22
+
23
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
24
+ COPY --chown=user . $HOME/app
25
+
26
+ CMD ["python", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Apache Software License 2.0
2
+ #
3
+ # Copyright (c) ZenML GmbH 2023. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
__pycache__/__init__.cpython-38.pyc ADDED
Binary file (169 Bytes). View file
 
__pycache__/aws_helper.cpython-38.pyc ADDED
Binary file (965 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Apache Software License 2.0
2
+ #
3
+ # Copyright (c) ZenML GmbH 2023. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ import os
17
+ from os.path import dirname
18
+ from typing import Optional
19
+
20
+ import click
21
+ import numpy as np
22
+ import sagemaker
23
+ from aws_helper import get_sagemaker_session
24
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
25
+ from zenml.client import Client
26
+
27
+ import gradio as gr
28
+
29
+
30
+ @click.command()
31
+ @click.option(
32
+ "--tokenizer_name_or_path",
33
+ default=None,
34
+ help="Name or the path of the tokenizer.",
35
+ )
36
+ @click.option(
37
+ "--model_name_or_path", default=None, help="Name or the path of the model."
38
+ )
39
+ @click.option(
40
+ "--labels", default="Negative,Positive", help="Comma-separated list of labels."
41
+ )
42
+ @click.option(
43
+ "--title", default="ZenML NLP Use-Case", help="Title of the Gradio interface."
44
+ )
45
+ @click.option(
46
+ "--description",
47
+ default="Text Classification - Sentiment Analysis - ZenML - Gradio",
48
+ help="Description of the Gradio interface.",
49
+ )
50
+ @click.option(
51
+ "--interpretation",
52
+ default="default",
53
+ help="Interpretation mode for the Gradio interface.",
54
+ )
55
+ @click.option(
56
+ "--examples",
57
+ default="This is an awesome journey, I love it!",
58
+ help="Comma-separated list of examples to show in the Gradio interface.",
59
+ )
60
+ @click.option(
61
+ "--pipeline_version",
62
+ default="3",
63
+ help="Which version of the deploy pipeline should be deployed.",
64
+ type=int
65
+ )
66
+ def sentiment_analysis(
67
+ tokenizer_name_or_path: Optional[str],
68
+ model_name_or_path: Optional[str],
69
+ labels: Optional[str],
70
+ title: Optional[str],
71
+ description: Optional[str],
72
+ interpretation: Optional[str],
73
+ pipeline_version: int,
74
+ examples: Optional[str]
75
+ ):
76
+ """Launches a Gradio interface for sentiment analysis.
77
+
78
+ This function launches a Gradio interface for text-classification.
79
+ It loads a model and a tokenizer from the provided paths and uses
80
+ them to predict the sentiment of the input text.
81
+
82
+ Args:
83
+ tokenizer_name_or_path (str): Name or the path of the tokenizer.
84
+ model_name_or_path (str): Name or the path of the model.
85
+ labels (str): Comma-separated list of labels.
86
+ title (str): Title of the Gradio interface.
87
+ description (str): Description of the Gradio interface.
88
+ interpretation (str): Interpretation mode for the Gradio interface.
89
+ pipeline_version (int): Which pipeline version to user
90
+ examples (str): Comma-separated list of examples to show in the Gradio interface.
91
+ """
92
+ labels = labels.split(",")
93
+
94
+ def preprocess(text: str) -> str:
95
+ """Preprocesses the text.
96
+
97
+ Args:
98
+ text (str): Input text.
99
+
100
+ Returns:
101
+ str: Preprocessed text.
102
+ """
103
+ new_text = []
104
+ for t in text.split(" "):
105
+ t = "@user" if t.startswith("@") and len(t) > 1 else t
106
+ t = "http" if t.startswith("http") else t
107
+ new_text.append(t)
108
+ return " ".join(new_text)
109
+
110
+ def softmax(x):
111
+ e_x = np.exp(x - np.max(x))
112
+ return e_x / e_x.sum(axis=0)
113
+
114
+ def analyze_text(inference_type, text):
115
+ if inference_type == "local":
116
+ cur_path = os.path.abspath(dirname(__file__))
117
+ model_path, tokenizer_path = cur_path, cur_path
118
+ if model_name_or_path:
119
+ model_path = f"{dirname(__file__)}/{model_name_or_path}/"
120
+ print(f"Loading model from {model_path}")
121
+ if tokenizer_name_or_path:
122
+ tokenizer_path = f"{dirname(__file__)}/{tokenizer_name_or_path}/"
123
+ print(f"Loading tokenizer from {tokenizer_path}")
124
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
125
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
126
+
127
+ text = preprocess(text)
128
+ encoded_input = tokenizer(text, return_tensors="pt")
129
+ output = model(**encoded_input)
130
+ scores_ = output[0][0].detach().numpy()
131
+ scores_ = softmax(scores_)
132
+ scores = {l: float(s) for (l, s) in zip(labels, scores_)}
133
+ else:
134
+ client = Client()
135
+ latest_run = client.get_pipeline(
136
+ "sentinment_analysis_deploy_pipeline", version=pipeline_version
137
+ ).runs[0]
138
+ endpoint_name = (
139
+ latest_run.steps["deploy_hf_to_sagemaker"]
140
+ .outputs["sagemaker_endpoint_name"]
141
+ .load()
142
+ )
143
+
144
+ predictor = sagemaker.Predictor(
145
+ endpoint_name=endpoint_name,
146
+ sagemaker_session=get_sagemaker_session(),
147
+ serializer=sagemaker.serializers.JSONSerializer(),
148
+ deserializer=sagemaker.deserializers.JSONDeserializer(),
149
+ )
150
+ res = predictor.predict({"inputs": text})
151
+ if res[0]["label"] == "LABEL_1":
152
+ scores = {"Negative": 1 - res[0]["score"], "Positive": res[0]["score"]}
153
+ else:
154
+ scores = {"Negative": res[0]["score"], "Positive": 1 - res[0]["score"]}
155
+
156
+ return scores
157
+
158
+ demo = gr.Interface(
159
+ fn=analyze_text,
160
+ inputs=[
161
+ gr.Dropdown(
162
+ ["local", "sagemaker"], label="Select inference type", value="sagemaker"
163
+ ),
164
+ gr.TextArea("Write your text or tweet here", label="Analyze Text"),
165
+ ],
166
+ outputs=["label"],
167
+ title=title,
168
+ description=description,
169
+ interpretation=interpretation,
170
+ )
171
+
172
+ demo.launch(share=True, debug=True)
173
+
174
+
175
+ if __name__ == "__main__":
176
+ sentiment_analysis()
aws_helper.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import boto3
4
+ import sagemaker
5
+
6
+ # Assign default value if env variable not fond
7
+ REGION_NAME = os.getenv("AWS_REGION", "us-east-1")
8
+ ROLE_NAME = os.getenv("AWS_ROLE_NAME", "hamza_connector")
9
+ os.environ["AWS_DEFAULT_REGION"] = REGION_NAME
10
+
11
+ auth_arguments = {
12
+ "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID", None),
13
+ "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY", None),
14
+ "aws_session_token": os.getenv("AWS_SESSION_TOKEN", None),
15
+ "region_name": REGION_NAME,
16
+ }
17
+
18
+
19
+ def get_sagemaker_role():
20
+ iam = boto3.client("iam", **auth_arguments)
21
+ role = iam.get_role(RoleName=ROLE_NAME)["Role"]["Arn"]
22
+ return role
23
+
24
+
25
+ def get_sagemaker_session():
26
+ session = sagemaker.Session(boto3.Session(**auth_arguments))
27
+ return session
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/var/folders/hr/bpgg5x394sq1qt_z8k7_dl740000gn/T/tmp3d28qn75",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "output_past": true,
17
+ "pad_token_id": 0,
18
+ "problem_type": "single_label_classification",
19
+ "qa_dropout": 0.1,
20
+ "seq_classif_dropout": 0.2,
21
+ "sinusoidal_pos_embds": false,
22
+ "tie_weights_": true,
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.31.0",
25
+ "vocab_size": 28996
26
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:297383ad805bd5d73b7f98729631fe936bc1725faa2143b31eae299e6b8e5bf1
3
+ size 263166698
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nltk
2
+ torch
3
+ torchvision
4
+ torchaudio
5
+ gradio
6
+ datasets==2.12.0
7
+ numpy==1.22.4
8
+ pandas==1.5.3
9
+ session_info==1.0.0
10
+ scikit-learn==1.2.2
11
+ transformers==4.28.1
12
+ IPython==7.34.0
serve.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task name (optional), used for display purposes.
2
+ name: ZenML NLP project}
3
+
4
+ resources:
5
+ cloud: gcp # The cloud to use (optional).
6
+ # Working directory (optional), synced to ~/sky_workdir on the remote cluster
7
+ # each time launch or exec is run with the yaml file.
8
+ #
9
+ # Commands in "setup" and "run" will be executed under it.
10
+ #
11
+ # If a .gitignore file (or a .git/info/exclude file) exists in the working
12
+ # directory, files and directories listed in it will be excluded from syncing.
13
+ workdir: ./gradio
14
+
15
+ setup: |
16
+ echo "Begin setup."
17
+ pip install -r requirements.txt
18
+ echo "Setup complete."
19
+
20
+ run: |
21
+ echo 'Starting gradio app...'
22
+ python app.py
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "DistilBertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff