Spaces:
Sleeping
Sleeping
Final (hopefully) spacing and text adjustments
Browse files
app.py
CHANGED
@@ -15,11 +15,11 @@ def main():
|
|
15 |
|
16 |
st.header("Data and Training")
|
17 |
|
18 |
-
st.markdown("""We used the dataset from
|
19 |
which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a Transformer LLM that we built from scratch in PyTorch.""")
|
20 |
-
st.markdown("""Our
|
21 |
-
16 attention heads, and an embedding dimension of 768, for a total of ~56M non-embedding parameters. The model was trained on 8 H100 GPUs for
|
22 |
-
which is superior to
|
23 |
st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
|
24 |
quite effective at generating new short stories. **Try it out below!**""")
|
25 |
|
@@ -43,7 +43,7 @@ def main():
|
|
43 |
generation_method = "top-k"
|
44 |
specified_k = 5
|
45 |
specified_nucleus = 0.5
|
46 |
-
specified_temperature = 0.
|
47 |
max_tokens = 750
|
48 |
|
49 |
if st.checkbox("Show Advanced Settings"):
|
@@ -57,7 +57,7 @@ def main():
|
|
57 |
specified_nucleus = st.number_input("Nucleus Cutoff:", value = 0.5, step = 0.05, min_value = 0.0, max_value = 1.0)
|
58 |
|
59 |
if generation_method == "temperature":
|
60 |
-
specified_temperature = st.number_input("Value for temperature:", value = 0.
|
61 |
|
62 |
max_tokens = st.slider('Max Tokens Generated:', 50, 750, 750)
|
63 |
|
@@ -88,12 +88,16 @@ def main():
|
|
88 |
result = generate(model, tokenizer, device, method=generation_method, k=specified_k,
|
89 |
p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
|
90 |
cond=user_input, deterministic=user_seed)
|
|
|
|
|
|
|
|
|
91 |
|
92 |
streamed_input = ""
|
93 |
for word in user_input.split(' '):
|
94 |
streamed_input += word
|
95 |
with placeholder.container():
|
96 |
-
st.markdown(f"**{streamed_input}**
|
97 |
streamed_input += " "
|
98 |
time.sleep(0.1)
|
99 |
|
@@ -108,7 +112,7 @@ def main():
|
|
108 |
for word in result.split(' '):
|
109 |
streamed_result += word + ' '
|
110 |
with placeholder.container():
|
111 |
-
st.markdown(f"{streamed_result}
|
112 |
time.sleep(0.1)
|
113 |
if st.button('Clear Output'):
|
114 |
placeholder = st.empty()
|
|
|
15 |
|
16 |
st.header("Data and Training")
|
17 |
|
18 |
+
st.markdown("""We used the dataset from Microsoft Research's [TinyStories Paper](https://arxiv.org/pdf/2305.07759.pdf) (Eldan and Li),
|
19 |
which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a Transformer LLM that we built from scratch in PyTorch.""")
|
20 |
+
st.markdown("""Our model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,257) and consists of 8 transformer blocks,
|
21 |
+
16 attention heads, and an embedding dimension of 768, for a total of ~56M non-embedding parameters. The model was trained on 8 H100 GPUs for 7 hours, achieving a cross-entropy validation loss of 1.16,
|
22 |
+
which is superior to all models in the TinyStories paper (likely due to a larger vocab size and far more compute).""")
|
23 |
st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
|
24 |
quite effective at generating new short stories. **Try it out below!**""")
|
25 |
|
|
|
43 |
generation_method = "top-k"
|
44 |
specified_k = 5
|
45 |
specified_nucleus = 0.5
|
46 |
+
specified_temperature = 0.4
|
47 |
max_tokens = 750
|
48 |
|
49 |
if st.checkbox("Show Advanced Settings"):
|
|
|
57 |
specified_nucleus = st.number_input("Nucleus Cutoff:", value = 0.5, step = 0.05, min_value = 0.0, max_value = 1.0)
|
58 |
|
59 |
if generation_method == "temperature":
|
60 |
+
specified_temperature = st.number_input("Value for temperature:", value = 0.4, step = 0.05, min_value = 0.0, max_value = 1.0)
|
61 |
|
62 |
max_tokens = st.slider('Max Tokens Generated:', 50, 750, 750)
|
63 |
|
|
|
88 |
result = generate(model, tokenizer, device, method=generation_method, k=specified_k,
|
89 |
p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
|
90 |
cond=user_input, deterministic=user_seed)
|
91 |
+
st.markdown("\n")
|
92 |
+
st.markdown("\n")
|
93 |
+
st.markdown("\n")
|
94 |
+
st.markdown("\n")
|
95 |
|
96 |
streamed_input = ""
|
97 |
for word in user_input.split(' '):
|
98 |
streamed_input += word
|
99 |
with placeholder.container():
|
100 |
+
st.markdown(f"**{streamed_input}**")
|
101 |
streamed_input += " "
|
102 |
time.sleep(0.1)
|
103 |
|
|
|
112 |
for word in result.split(' '):
|
113 |
streamed_result += word + ' '
|
114 |
with placeholder.container():
|
115 |
+
st.markdown(f"{streamed_result}")
|
116 |
time.sleep(0.1)
|
117 |
if st.button('Clear Output'):
|
118 |
placeholder = st.empty()
|