Spaces:

Kc-12
/

TinyStories_Transformer

Sleeping

App Files Files Community

Kc-12 commited on Dec 7, 2023

Commit

8bd777c

•

1 Parent(s): 7461558

Final (hopefully) spacing and text adjustments

Browse files

Files changed (1) hide show

app.py +12 -8

app.py CHANGED Viewed

@@ -15,11 +15,11 @@ def main():
     st.header("Data and Training")
-    st.markdown("""We used the dataset from the [TinyStories Research Paper](https://arxiv.org/pdf/2305.07759.pdf) (Ronen Eldan and Yuanzhi Li, Microsoft),
     which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a Transformer LLM that we built from scratch in PyTorch.""")
-    st.markdown("""Our final model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,257) and consists of 8 transformer blocks,
-    16 attention heads, and an embedding dimension of 768, for a total of ~56M non-embedding parameters. The model was trained on 8 H100 GPUs for ~7 hours, achieving a cross-entropy validation loss of 1.16,
-    which is superior to any model in the TinyStories paper (likely due to a larger vocab size and far more compute).""")
     st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
     quite effective at generating new short stories. **Try it out below!**""")
@@ -43,7 +43,7 @@ def main():
     generation_method = "top-k"
     specified_k = 5
     specified_nucleus = 0.5
-    specified_temperature = 0.9
     max_tokens = 750
     if st.checkbox("Show Advanced Settings"):
@@ -57,7 +57,7 @@ def main():
             specified_nucleus = st.number_input("Nucleus Cutoff:", value = 0.5, step = 0.05, min_value = 0.0, max_value = 1.0)
         if generation_method == "temperature":
-            specified_temperature = st.number_input("Value for temperature:", value = 0.9, step = 0.05, min_value = 0.0, max_value = 1.0)
         max_tokens = st.slider('Max Tokens Generated:', 50, 750, 750)
@@ -88,12 +88,16 @@ def main():
             result = generate(model, tokenizer, device, method=generation_method, k=specified_k,
                             p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
                             cond=user_input, deterministic=user_seed)
         streamed_input = ""
         for word in user_input.split(' '):
             streamed_input += word
             with placeholder.container():
-                st.markdown(f"**{streamed_input}**  \n\n\n\n")
             streamed_input += " "
             time.sleep(0.1)
@@ -108,7 +112,7 @@ def main():
         for word in result.split(' '):
             streamed_result += word + ' '
             with placeholder.container():
-                st.markdown(f"{streamed_result}  \n\n\n\n")
             time.sleep(0.1)
         if st.button('Clear Output'):
             placeholder = st.empty()

     st.header("Data and Training")
+    st.markdown("""We used the dataset from Microsoft Research's [TinyStories Paper](https://arxiv.org/pdf/2305.07759.pdf) (Eldan and Li),
     which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a Transformer LLM that we built from scratch in PyTorch.""")
+    st.markdown("""Our model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,257) and consists of 8 transformer blocks,
+    16 attention heads, and an embedding dimension of 768, for a total of ~56M non-embedding parameters. The model was trained on 8 H100 GPUs for 7 hours, achieving a cross-entropy validation loss of 1.16,
+    which is superior to all models in the TinyStories paper (likely due to a larger vocab size and far more compute).""")
     st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
     quite effective at generating new short stories. **Try it out below!**""")
     generation_method = "top-k"
     specified_k = 5
     specified_nucleus = 0.5
+    specified_temperature = 0.4
     max_tokens = 750
     if st.checkbox("Show Advanced Settings"):
             specified_nucleus = st.number_input("Nucleus Cutoff:", value = 0.5, step = 0.05, min_value = 0.0, max_value = 1.0)
         if generation_method == "temperature":
+            specified_temperature = st.number_input("Value for temperature:", value = 0.4, step = 0.05, min_value = 0.0, max_value = 1.0)
         max_tokens = st.slider('Max Tokens Generated:', 50, 750, 750)
             result = generate(model, tokenizer, device, method=generation_method, k=specified_k,
                             p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
                             cond=user_input, deterministic=user_seed)
+            st.markdown("\n")
+            st.markdown("\n")
+            st.markdown("\n")
+            st.markdown("\n")
         streamed_input = ""
         for word in user_input.split(' '):
             streamed_input += word
             with placeholder.container():
+                st.markdown(f"**{streamed_input}**")
             streamed_input += " "
             time.sleep(0.1)
         for word in result.split(' '):
             streamed_result += word + ' '
             with placeholder.container():
+                st.markdown(f"{streamed_result}")
             time.sleep(0.1)
         if st.button('Clear Output'):
             placeholder = st.empty()