Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -1,54 +1,54 @@
|
|
1 |
import streamlit as st
|
2 |
import time
|
3 |
-
import torch
|
4 |
|
5 |
from better_transformer import *
|
6 |
|
7 |
-
|
8 |
def main():
|
9 |
|
10 |
# Enable CUDA if available and load in tokenizer
|
11 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
12 |
tokenizer, EMPTY_TOKENS = load_tokenizer(device)
|
13 |
|
14 |
-
st.title("
|
15 |
st.subheader("UCLA DSU Project, Fall 2023")
|
16 |
-
st.markdown("Daniel Mendelevitch
|
17 |
|
18 |
-
st.header("
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
25 |
|
26 |
st.header("Let's make some stories! π")
|
27 |
|
28 |
# Input from user
|
29 |
-
user_input = st.text_input("Enter your prompt:", placeholder="Write a prompt to make a story of your own or leave it empty for a random story!").strip()
|
30 |
|
31 |
if st.checkbox("Show Prompting Tips"):
|
32 |
-
st.markdown("
|
33 |
st.markdown(
|
34 |
"""
|
35 |
- Use simple vocabulary - words and themes that would appear in a children's story
|
36 |
- Avoid using idioms - for example, instead of "hit the gym", say "went to the gym"
|
37 |
- Include plenty of descriptive adjectives
|
38 |
-
- The model often struggles with names
|
39 |
"""
|
40 |
)
|
41 |
## Default values for advanced settings
|
42 |
-
user_seed = None #
|
43 |
generation_method = "top-k"
|
44 |
specified_k = 5
|
45 |
specified_nucleus = 0.5
|
46 |
specified_temperature = 0.9
|
47 |
-
max_tokens =
|
48 |
|
49 |
if st.checkbox("Show Advanced Settings"):
|
50 |
user_seed = st.number_input("Randomness Seed:", value = None, step = 1, placeholder="Use to replicate response", min_value = 1)
|
51 |
-
generation_method = st.selectbox("Method of Generation:", ("top-k", "
|
52 |
|
53 |
if generation_method == "top-k":
|
54 |
specified_k = st.number_input("Value for k:", value = 5, step = 1)
|
@@ -59,7 +59,7 @@ def main():
|
|
59 |
if generation_method == "temperature":
|
60 |
specified_temperature = st.number_input("Value for temperature:", value = 0.9, step = 0.05, min_value = 0.0, max_value = 1.0)
|
61 |
|
62 |
-
max_tokens = st.slider('Max Tokens Generated:',
|
63 |
|
64 |
|
65 |
|
@@ -72,7 +72,6 @@ def main():
|
|
72 |
model.cuda()
|
73 |
|
74 |
|
75 |
-
|
76 |
if st.button('Write my story!'):
|
77 |
placeholder = st.empty()
|
78 |
# if model_version == 'smoll':
|
@@ -114,7 +113,8 @@ def main():
|
|
114 |
if st.button('Clear Output'):
|
115 |
placeholder = st.empty()
|
116 |
|
117 |
-
|
|
|
118 |
|
119 |
|
120 |
if __name__ == "__main__":
|
|
|
1 |
import streamlit as st
|
2 |
import time
|
|
|
3 |
|
4 |
from better_transformer import *
|
5 |
|
|
|
6 |
def main():
|
7 |
|
8 |
# Enable CUDA if available and load in tokenizer
|
9 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
10 |
tokenizer, EMPTY_TOKENS = load_tokenizer(device)
|
11 |
|
12 |
+
st.title("Short Story Transformer Demo")
|
13 |
st.subheader("UCLA DSU Project, Fall 2023")
|
14 |
+
st.markdown("By Daniel Mendelevitch, Terry Ming, Casey Tattersall, Sean Tjoa")
|
15 |
|
16 |
+
st.header("Data and Training")
|
17 |
|
18 |
+
st.markdown("""We used the dataset from the [TinyStories Research Paper](https://arxiv.org/pdf/2305.07759.pdf) (Ronen Eldan and Yuanzhi Li, Microsoft),
|
19 |
+
which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a Transformer LLM that we built from scratch in PyTorch.""")
|
20 |
+
st.markdown("""Our final model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,256) and consists of 8 transformer blocks,
|
21 |
+
16 attention heads, and an embedding dimension of 768, for a total of 133M parameters. The model was trained on 8 H100 GPUs for ~7 hours, and has a cross-entropy validation loss of 1.16,
|
22 |
+
which is superior to any model in the TinyStories paper (likely due to a larger vocab size and far more compute).""")
|
23 |
+
st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
|
24 |
+
quite effective at generating new short stories. **Try it out below!**""")
|
25 |
|
26 |
st.header("Let's make some stories! π")
|
27 |
|
28 |
# Input from user
|
29 |
+
user_input = st.text_input("Enter your prompt:", placeholder="Write a prompt to make a story of your own, or leave it empty for a random story!").strip()
|
30 |
|
31 |
if st.checkbox("Show Prompting Tips"):
|
32 |
+
st.markdown("The model can struggle with some prompts, especially those outside of its limited domain. If a response isn't satisfactory, try repeating the generation, or make the following modifications:")
|
33 |
st.markdown(
|
34 |
"""
|
35 |
- Use simple vocabulary - words and themes that would appear in a children's story
|
36 |
- Avoid using idioms - for example, instead of "hit the gym", say "went to the gym"
|
37 |
- Include plenty of descriptive adjectives
|
38 |
+
- The model often struggles with names. **Using common names and sticking with first names only can help.**
|
39 |
"""
|
40 |
)
|
41 |
## Default values for advanced settings
|
42 |
+
user_seed = None # Remove if we're not rigging the "random" demo
|
43 |
generation_method = "top-k"
|
44 |
specified_k = 5
|
45 |
specified_nucleus = 0.5
|
46 |
specified_temperature = 0.9
|
47 |
+
max_tokens = 750
|
48 |
|
49 |
if st.checkbox("Show Advanced Settings"):
|
50 |
user_seed = st.number_input("Randomness Seed:", value = None, step = 1, placeholder="Use to replicate response", min_value = 1)
|
51 |
+
generation_method = st.selectbox("Method of Generation:", ("top-k", "nucleus", "temperature", "multinomial", "greedy"), index = 0).strip()
|
52 |
|
53 |
if generation_method == "top-k":
|
54 |
specified_k = st.number_input("Value for k:", value = 5, step = 1)
|
|
|
59 |
if generation_method == "temperature":
|
60 |
specified_temperature = st.number_input("Value for temperature:", value = 0.9, step = 0.05, min_value = 0.0, max_value = 1.0)
|
61 |
|
62 |
+
max_tokens = st.slider('Max Tokens Generated:', 50, 750, 750)
|
63 |
|
64 |
|
65 |
|
|
|
72 |
model.cuda()
|
73 |
|
74 |
|
|
|
75 |
if st.button('Write my story!'):
|
76 |
placeholder = st.empty()
|
77 |
# if model_version == 'smoll':
|
|
|
113 |
if st.button('Clear Output'):
|
114 |
placeholder = st.empty()
|
115 |
|
116 |
+
st.markdown('####')
|
117 |
+
st.caption(r'Data Attribution: Tinystories (License: CDLA-Sharing-1.0) https://arxiv.org/abs/2305.07759')
|
118 |
|
119 |
|
120 |
if __name__ == "__main__":
|