Spaces:
Sleeping
Sleeping
File size: 5,662 Bytes
b5f6465 429022a b5f6465 3a4468a b5f6465 de247ac b5f6465 8bd777c de247ac 8bd777c de247ac 6fd8787 b5f6465 de247ac 66f6dbb de247ac 448ea75 de247ac 66f6dbb de247ac 448ea75 66f6dbb de247ac b5f6465 6fd8787 b5f6465 6fd8787 b5f6465 8bd777c 54ea954 b5f6465 6fd8787 b5f6465 7461558 b5f6465 8bd777c b5f6465 54ea954 b5f6465 429022a 3a4468a b5f6465 4be8834 b5f6465 429022a b5f6465 8bd777c b5f6465 8bd777c b5f6465 8bd777c b5f6465 88ac7ef b5f6465 6fd8787 de247ac 6fd8787 b5f6465 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import streamlit as st
import time
from better_transformer import *
def main():
# Enable CUDA if available and load in tokenizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer, EMPTY_TOKENS = load_tokenizer(device)
st.title("TinyStories Transformer Demo 🤖")
st.subheader("Data and Training")
st.markdown("""We used the dataset from Microsoft Research's [TinyStories Paper](https://arxiv.org/pdf/2305.07759.pdf) (Eldan and Li),
which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a PyTorch Transformer LLM.""")
st.markdown("""Our model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,257) and consists of 8 transformer blocks,
16 attention heads, and an embedding dimension of 768, for a total of ~56M non-embedding parameters. The model was trained overnight on 8 H100 GPUs, achieving a lower cross-entropy
validation loss than any of the models in the TinyStories paper (likely due to a larger vocab size).""")
st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
quite effective at generating new short stories. **Try it out below!**""")
st.subheader("How Do I Prompt?")
st.markdown(
"""
Instead of generating a new story from scratch, you can "prompt" the model by writing the first few sentences of a story, and let it finish from there. It can even jump in mid-sentence!
The model can struggle with some prompts, especially those outside of its limited domain. If a response isn't satisfactory, try repeating the generation, or make the following modifications:
"""
)
st.markdown(
"""
- **Use simple vocabulary and syntax** - words, structures, and themes you'd see in a children's story.
- Use common first names only - the model can struggle with longer or uncommon names.
`SAMPLE PROMPT: Once upon a time, there was a little girl named Lily. She loved to play at the park with her friend Timmy. One day, Lily and Timmy`
"""
)
st.subheader("Let's make some stories! 📖")
# Input from user
user_input = st.text_input("Enter your prompt:", placeholder="Write a prompt to make a story of your own, or leave it empty for a random story!").strip()
## Default values for advanced settings
user_seed = None # Remove if we're not rigging the "random" demo
generation_method = "top-k"
specified_k = 5
specified_nucleus = 0.5
specified_temperature = 0.4
max_tokens = 1000
if st.checkbox("Show Advanced Settings"):
user_seed = st.number_input("Randomness Seed:", value = None, step = 1, placeholder="Use to replicate response", min_value = 1)
generation_method = st.selectbox("Method of Generation:", ("top-k", "nucleus", "temperature", "multinomial", "greedy"), index = 0).strip()
if generation_method == "top-k":
specified_k = st.number_input("Value for k:", value = 5, step = 1)
if generation_method == "nucleus":
specified_nucleus = st.number_input("Nucleus Cutoff:", value = 0.5, step = 0.05, min_value = 0.0, max_value = 1.0)
if generation_method == "temperature":
specified_temperature = st.number_input("Value for temperature:", value = 0.4, step = 0.05, min_value = 0.0, max_value = 1.0)
max_tokens = st.slider('Max Tokens Generated:', 50, 1000, 1000)
# model_version = st.radio("Which model would you like to use?", ["smoll", "beeg"])
# small_model = load_casey_model(tokenizer, device)
model = load_big_model(tokenizer, device)
model.to('cuda')
model.cuda()
if st.button('Write my story!'):
placeholder = st.empty()
# if model_version == 'smoll':
# model = load_casey_model(tokenizer, device)
# elif model_version == 'beeg':
# model = load_big_model(tokenizer, device)
# with placeholder.container():
# st.write("Model Loaded! Preparing to Generate...")
with st.spinner(""):
result = generate(model, tokenizer, device, method=generation_method, k=specified_k,
p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
cond=user_input, deterministic=user_seed)
st.markdown("\n")
st.markdown("\n")
st.markdown("\n")
st.markdown("\n")
streamed_input = ""
for word in user_input.split(' '):
streamed_input += word
with placeholder.container():
st.markdown(f"**{streamed_input}**")
streamed_input += " "
time.sleep(0.1)
if user_input != "": ##conditional
result = result[len(user_input) + 3 :]
streamed_result = f"**{streamed_input[:-1]}**"
time.sleep(1)
else: ##unconditional
streamed_result = ""
for word in result.split(' '):
streamed_result += word + ' '
with placeholder.container():
st.markdown(f"{streamed_result}")
time.sleep(0.1)
if st.button('Clear Output'):
placeholder = st.empty()
st.markdown('####')
st.caption('UCLA DSU Project Fall 2023: Daniel Mendelevitch, Terry Ming, Casey Tattersall, Sean Tjoa')
st.caption(r'Data Attribution: Tinystories (License: CDLA-Sharing-1.0) https://arxiv.org/abs/2305.07759')
if __name__ == "__main__":
main()
|