Build error
Build error
Duplicate from os1187/gpt2-chatbot
Browse filesCo-authored-by: Oleg Seifert <>
- .gitattributes +34 -0
- .gitignore +2 -0
- Pipfile +178 -0
- Pipfile.lock +0 -0
- +14 -0
- +139 -0
- requirements.txt +9 -0
@@ -0,0 +1,34 @@
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
5 |
*.ckpt filter=lfs diff=lfs merge=lfs -text
6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
11 |
*.mlmodel filter=lfs diff=lfs merge=lfs -text
12 |
*.model filter=lfs diff=lfs merge=lfs -text
13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
14 |
*.npy filter=lfs diff=lfs merge=lfs -text
15 |
*.npz filter=lfs diff=lfs merge=lfs -text
16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
17 |
*.ot filter=lfs diff=lfs merge=lfs -text
18 |
*.parquet filter=lfs diff=lfs merge=lfs -text
19 |
*.pb filter=lfs diff=lfs merge=lfs -text
20 |
*.pickle filter=lfs diff=lfs merge=lfs -text
21 |
*.pkl filter=lfs diff=lfs merge=lfs -text
22 |
*.pt filter=lfs diff=lfs merge=lfs -text
23 |
*.pth filter=lfs diff=lfs merge=lfs -text
24 |
*.rar filter=lfs diff=lfs merge=lfs -text
25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
28 |
*.tflite filter=lfs diff=lfs merge=lfs -text
29 |
*.tgz filter=lfs diff=lfs merge=lfs -text
30 |
*.wasm filter=lfs diff=lfs merge=lfs -text
31 |
*.xz filter=lfs diff=lfs merge=lfs -text
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
@@ -0,0 +1,2 @@
1 |
2 |
@@ -0,0 +1,178 @@
1 |
2 |
url = ""
3 |
verify_ssl = true
4 |
name = "pypi"
5 |
6 |
7 |
gradio = "==3.10.1"
8 |
tensorflow = "==2.11.0"
9 |
transformers = "==4.24.0"
10 |
absl-py = "==1.3.0"
11 |
aiohttp = "==3.8.3"
12 |
aiosignal = "==1.3.1"
13 |
antlr4-python3-runtime = "==4.8"
14 |
anyio = "==3.6.2"
15 |
appdirs = "==1.4.4"
16 |
astunparse = "==1.6.3"
17 |
async-timeout = "==4.0.2"
18 |
attrs = "==22.1.0"
19 |
audioread = "==3.0.0"
20 |
autoflake = "==2.0.0"
21 |
bcrypt = "==4.0.1"
22 |
bitarray = "==2.6.0"
23 |
blis = "==0.7.9"
24 |
cachetools = "==5.2.0"
25 |
catalogue = "==2.0.8"
26 |
certifi = "==2022.9.24"
27 |
cffi = "==1.15.1"
28 |
charset-normalizer = "==2.1.1"
29 |
ci-sdr = "==0.0.2"
30 |
click = "==8.1.3"
31 |
colorama = "==0.4.6"
32 |
confection = "==0.0.3"
33 |
configargparse = "==1.5.3"
34 |
contourpy = "==1.0.6"
35 |
cryptography = "==38.0.3"
36 |
ctc-segmentation = "==1.7.4"
37 |
cycler = "==0.11.0"
38 |
cymem = "==2.0.7"
39 |
cython = "==0.29.32"
40 |
decorator = "==5.1.1"
41 |
distance = "==0.1.3"
42 |
einops = "==0.6.0"
43 |
en-core-web-sm = {file = ""}
44 |
espnet = "==202209"
45 |
espnet-tts-frontend = "==0.0.3"
46 |
fairseq = "==0.12.2"
47 |
fast-bss-eval = "==0.1.3"
48 |
fastapi = "==0.76.0"
49 |
ffmpy = "==0.3.0"
50 |
filelock = "==3.8.0"
51 |
flatbuffers = "==22.10.26"
52 |
fonttools = "==4.38.0"
53 |
frozenlist = "==1.3.3"
54 |
fsspec = "==2022.11.0"
55 |
g2p-en = "==2.1.0"
56 |
gast = "==0.4.0"
57 |
google-auth = "==2.14.1"
58 |
google-auth-oauthlib = "==0.4.6"
59 |
google-pasta = "==0.2.0"
60 |
grpcio = "==1.34.1"
61 |
h11 = "==0.12.0"
62 |
h5py = "==3.1.0"
63 |
httpcore = "==0.15.0"
64 |
httpx = "==0.23.1"
65 |
huggingface-hub = "==0.11.0"
66 |
humanfriendly = "==10.0"
67 |
hydra-core = "==1.0.7"
68 |
idna = "==3.4"
69 |
importlib-metadata = "==4.13.0"
70 |
inflect = "==6.0.2"
71 |
jaconv = "==0.3"
72 |
jamo = "==0.4.1"
73 |
jinja2 = "==3.1.2"
74 |
joblib = "==1.2.0"
75 |
kaldiio = "==2.17.2"
76 |
keras = "==2.11.0"
77 |
keras-nightly = "==2.5.0.dev2021032900"
78 |
keras-preprocessing = "==1.1.2"
79 |
kiwisolver = "==1.4.4"
80 |
langcodes = "==3.3.0"
81 |
libclang = "==14.0.6"
82 |
librosa = "==0.9.2"
83 |
linkify-it-py = "==1.0.3"
84 |
llvmlite = "==0.39.1"
85 |
lxml = "==4.9.1"
86 |
markdown = "==3.4.1"
87 |
markdown-it-py = "==2.1.0"
88 |
markupsafe = "==2.1.1"
89 |
matplotlib = "==3.6.2"
90 |
mdit-py-plugins = "==0.3.1"
91 |
mdurl = "==0.1.2"
92 |
multidict = "==6.0.2"
93 |
murmurhash = "==1.0.9"
94 |
nltk = "==3.7"
95 |
numba = "==0.56.4"
96 |
numpy = "==1.23.5"
97 |
oauthlib = "==3.2.2"
98 |
omegaconf = "==2.0.6"
99 |
opt-einsum = "==3.3.0"
100 |
orjson = "==3.8.2"
101 |
pandas = "==1.4.4"
102 |
paramiko = "==2.12.0"
103 |
pathy = "==0.10.0"
104 |
pillow = "==9.3.0"
105 |
plotly = "==5.11.0"
106 |
pooch = "==1.6.0"
107 |
portalocker = "==2.6.0"
108 |
preshed = "==3.0.8"
109 |
protobuf = "==3.19.6"
110 |
pyasn1 = "==0.4.8"
111 |
pyasn1-modules = "==0.2.8"
112 |
pycparser = "==2.21"
113 |
pycryptodome = "==3.15.0"
114 |
pydantic = "==1.9.2"
115 |
pydub = "==0.25.1"
116 |
pyflakes = "==3.0.1"
117 |
pynacl = "==1.5.0"
118 |
pyparsing = "==3.0.9"
119 |
pypinyin = "==0.44.0"
120 |
python-dateutil = "==2.8.2"
121 |
python-multipart = "==0.0.5"
122 |
pytorch-wpe = "==0.0.1"
123 |
pytz = "==2022.6"
124 |
pyworld = "==0.3.2"
125 |
pyyaml = "==6.0"
126 |
regex = "==2022.10.31"
127 |
requests = "==2.28.1"
128 |
requests-oauthlib = "==1.3.1"
129 |
resampy = "==0.4.2"
130 |
rfc3986 = "==1.5.0"
131 |
rsa = "==4.9"
132 |
sacrebleu = "==2.3.1"
133 |
scikit-learn = "==1.1.3"
134 |
scipy = "==1.9.3"
135 |
sentencepiece = "==0.1.97"
136 |
six = "==1.15.0"
137 |
smart-open = "==5.2.1"
138 |
sniffio = "==1.3.0"
139 |
soundfile = "==0.11.0"
140 |
spacy = "==3.4.3"
141 |
spacy-legacy = "==3.0.10"
142 |
spacy-loggers = "==1.0.3"
143 |
srsly = "==2.4.5"
144 |
starlette = "==0.18.0"
145 |
tabulate = "==0.9.0"
146 |
tenacity = "==8.1.0"
147 |
tensorboard = "==2.11.0"
148 |
tensorboard-data-server = "==0.6.1"
149 |
tensorboard-plugin-wit = "==1.8.1"
150 |
tensorflow-estimator = "==2.11.0"
151 |
tensorflow-io-gcs-filesystem = "==0.28.0"
152 |
termcolor = "==1.1.0"
153 |
thinc = "==8.1.5"
154 |
threadpoolctl = "==3.1.0"
155 |
tokenizers = "==0.13.2"
156 |
tomli = "==2.0.1"
157 |
torch = "==1.13.0"
158 |
torch-complex = "==0.4.3"
159 |
torchaudio = "==0.13.0"
160 |
tqdm = "==4.64.1"
161 |
typeguard = "==2.13.3"
162 |
typer = "==0.7.0"
163 |
typing-extensions = "==4.4.0"
164 |
uc-micro-py = "==1.0.1"
165 |
unidecode = "==1.3.6"
166 |
urllib3 = "==1.26.12"
167 |
uvicorn = "==0.20.0"
168 |
wasabi = "==0.10.1"
169 |
websockets = "==10.4"
170 |
werkzeug = "==2.2.2"
171 |
wrapt = "==1.12.1"
172 |
yarl = "==1.8.1"
173 |
zipp = "==3.10.0"
174 |
175 |
176 |
177 |
178 |
python_version = "3.9"
The diff for this file is too large to render.
See raw diff
@@ -0,0 +1,14 @@
1 |
2 |
title: Funny Chatbot
3 |
emoji: 🌖
4 |
colorFrom: yellow
5 |
colorTo: yellow
6 |
sdk: gradio
7 |
sdk_version: 3.9.1
8 |
9 |
pinned: false
10 |
license: cc-by-nc-sa-4.0
11 |
duplicated_from: os1187/gpt2-chatbot
12 |
13 |
14 |
Check out the configuration reference at
@@ -0,0 +1,139 @@
1 |
from transformers import TFAutoModelForCausalLM, AutoTokenizer
2 |
import tensorflow as tf
3 |
import gradio as gr
4 |
import spacy
5 |
from spacy import displacy
6 |
from transformers import TFAutoModelForSequenceClassification
7 |
from transformers import AutoTokenizer
8 |
from scipy.special import softmax
9 |
import as px
10 |
import as pio
11 |
12 |
# configuration params
13 |
pio.templates.default = "plotly_dark"
14 |
15 |
# setting up the text in the page
16 |
TITLE = "<center><h1>Talk with an AI</h1></center>"
17 |
DESCRIPTION = r"""<center>This application allows you to talk with a machine/robot with state-of-the-art technology!!<br>
18 |
In the back-end is using the GPT2 model from OpenAI. One of the best models in text generation and comprehension.<br>
19 |
Language processing is done using RoBERTa for sentiment-analysis and spaCy for named-entity recognition and dependency plotting.<br>
20 |
The AI thinks he is a human, so please treat him as such, else he migh get angry!<br>
21 |
22 |
23 |
["What is your favorite videogame?"],
24 |
["What gets you really sad?"],
25 |
["How can I make you really angry? "],
26 |
["What do you do for work?"],
27 |
["What are your hobbies?"],
28 |
["What is your favorite food?"],
29 |
30 |
ARTICLE = r"""<center>
31 |
Done by dr. Gabriel Lopez<br>
32 |
For more please visit: <a href=''>My Page</a><br>
33 |
For info about the chat-bot model can also see the <a href="">ArXiv paper</a><br>
34 |
35 |
36 |
# Loading necessary NLP models
37 |
# dialog
38 |
checkpoint = "microsoft/DialoGPT-medium" # tf
39 |
model_gtp2 = TFAutoModelForCausalLM.from_pretrained(checkpoint)
40 |
tokenizer_gtp2 = AutoTokenizer.from_pretrained(checkpoint)
41 |
# sentiment
42 |
checkpoint = f"cardiffnlp/twitter-roberta-base-emotion"
43 |
model_roberta = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
44 |
tokenizer_roberta = AutoTokenizer.from_pretrained(checkpoint)
45 |
# NER & Dependency
46 |
nlp = spacy.load("en_core_web_sm")
47 |
48 |
# test-to-test : chatting function -- GPT2
49 |
def chat_with_bot(user_input, chat_history_and_input=[]):
50 |
"""Text generation using GPT2"""
51 |
emb_user_input = tokenizer_gtp2.encode(
52 |
user_input + tokenizer_gtp2.eos_token, return_tensors="tf"
53 |
54 |
if chat_history_and_input == []:
55 |
bot_input_ids = emb_user_input # first iteration
56 |
57 |
bot_input_ids = tf.concat(
58 |
[chat_history_and_input, emb_user_input], axis=-1
59 |
) # other iterations
60 |
chat_history_and_input = model_gtp2.generate(
61 |
bot_input_ids, max_length=1000, pad_token_id=tokenizer_gtp2.eos_token_id
62 |
63 |
# print
64 |
bot_response = tokenizer_gtp2.decode(
65 |
chat_history_and_input[:, bot_input_ids.shape[-1] :][0],
66 |
67 |
68 |
return bot_response, chat_history_and_input
69 |
70 |
71 |
# text-to-sentiment
72 |
def text_to_sentiment(text_input):
73 |
"""Sentiment analysis using RoBERTa"""
74 |
labels = ["anger", "joy", "optimism", "sadness"]
75 |
encoded_input = tokenizer_roberta(text_input, return_tensors="tf")
76 |
output = model_roberta(encoded_input)
77 |
scores = output[0][0].numpy()
78 |
scores = softmax(scores)
79 |
return px.histogram(x=labels, y=scores, height=200)
80 |
81 |
82 |
# text_to_semantics
83 |
def text_to_semantics(text_input):
84 |
"""NER and Dependency plot using Spacy"""
85 |
processed_text = nlp(text_input)
86 |
# Dependency
87 |
html_dep = displacy.render(
88 |
89 |
90 |
options={"compact": True, "color": "white", "bg": "light-black"},
91 |
92 |
93 |
html_dep = "" + html_dep + ""
94 |
95 |
pos_tokens = []
96 |
for token in processed_text:
97 |
pos_tokens.extend([(token.text, token.pos_), (" ", None)])
98 |
# html_ner = ("" + html_ner + "")s
99 |
return pos_tokens, html_dep
100 |
101 |
102 |
# gradio interface
103 |
blocks = gr.Blocks()
104 |
with blocks:
105 |
# physical elements
106 |
session_state = gr.State([])
107 |
108 |
109 |
with gr.Row():
110 |
with gr.Column():
111 |
in_text = gr.Textbox(value="How was the class?", label="Start chatting!")
112 |
submit_button = gr.Button("Submit")
113 |
gr.Examples(inputs=in_text, examples=EXAMPLES)
114 |
with gr.Column():
115 |
response_text = gr.Textbox(value="", label="GPT2 response:")
116 |
sentiment_plot = gr.Plot(
117 |
label="How is GPT2 feeling about your conversation?:", visible=True
118 |
119 |
ner_response = gr.Highlight(
120 |
label="Named Entity Recognition (NER) over response"
121 |
122 |
dependency_plot = gr.HTML(label="Dependency plot of response")
123 |
124 |
# event listeners
125 |
126 |
inputs=[in_text, session_state],
127 |
outputs=[response_text, session_state],
128 |
129 |
130 |
131 |
inputs=response_text, outputs=sentiment_plot, fn=text_to_sentiment
132 |
133 |
134 |
135 |
outputs=[ner_response, dependency_plot],
136 |
137 |
138 |
139 |
@@ -0,0 +1,9 @@
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
# spacy internal nlp model
9 |