Spaces:

dumitrescustefan
/

romanian-text-generation

Runtime error

App Files Files Community

Stefan Dumitrescu commited on Sep 3, 2022

Commit

19c9e19

•

1 Parent(s): 2f0ed55

Update

Browse files

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -84,13 +84,10 @@ with col1:
     st.markdown("**Step 2: Adjust specific text generation parameters**")
-    tab_greedy, tab_beamsearch, tab_sampling, tab_typical = st.tabs(["Greedy", "Beam-search", "Sampling", "Typical Sampling"])
     with tab_greedy:
-        st.write("as")
-    with tab_beamsearch:
-        num_beams = st.slider("Num beams", min_value=1, max_value=30, step=5, value=5)
     with tab_sampling:
         top_p = st.slider("Top-p", min_value=0.0, max_value=1.0, step=0.05, value=0.9)
@@ -104,12 +101,13 @@ with col1:
     st.markdown("**Step 3: Adjust common text generation parameters**")
     no_repeat_ngrams = st.slider("No repeat n-grams", value=2, min_value=0, max_value=3)
-    temperature = st.slider("Temperature", value=1.0, min_value=0.0, max_value=1.0, step=0.05)
     max_length = st.slider("Number of tokens to generate", value=50, min_value=10, max_value=256)
     st.markdown("**Step 4: Select a prompt or input your own text, and click generate in the left panel**")
     def update_prompt():
         st.session_state['text'] = prompt
@@ -138,7 +136,7 @@ if button_greedy or button_sampling or button_typical:
     if len(tokenized_text.input_ids[0]) + max_length > 512:  # need to keep less words
         keep_last = 512 - max_length
         print(f"keep last: {keep_last}")
-        input_ids, attention_mask = tokenized_text.input_ids[0][:-keep_last], tokenized_text.attention_mask[0][:-keep_last]
         previous_ids = tokenized_text.input_ids[0][:keep_last]
         st.warning(f"kept last {keep_last}")
     else:
@@ -149,7 +147,9 @@ if button_greedy or button_sampling or button_typical:
     output = greedy_search(model, input_ids.unsqueeze(dim=0), attention_mask.unsqueeze(dim=0), no_repeat_ngrams, length)
     if previous_ids is not None:
-        new_text = tokenizer.decode(torch.cat([previous_ids, output[0]], dim=1), skip_special_tokens=True)
     else:
         new_text = tokenizer.decode(output[0], skip_special_tokens=True)
@@ -199,8 +199,8 @@ text_element = col2.text_area('Text:', height=400, key="text")
 col2.markdown("""---""")
 col2.text("Statistics and details:")
 if details != "":
-    col2.caption("\tGeneration details: " + details)
 if tokenized_text is None:
     tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
 tt = tokenizer(text_element, add_special_tokens=False, return_tensors="pt")
-col2.caption(f"\tText length is {len(text_element)} characters, {len(tt.input_ids[0])} tokens.")

     st.markdown("**Step 2: Adjust specific text generation parameters**")
+    tab_greedy, tab_sampling, tab_typical = st.tabs(["Greedy", "Sampling", "Typical Sampling"])
     with tab_greedy:
+        st.caption("Greedy decoding does not have any special parameters.")
     with tab_sampling:
         top_p = st.slider("Top-p", min_value=0.0, max_value=1.0, step=0.05, value=0.9)
     st.markdown("**Step 3: Adjust common text generation parameters**")
     no_repeat_ngrams = st.slider("No repeat n-grams", value=2, min_value=0, max_value=3)
+    temperature = st.slider("Temperature", value=1.0, min_value=0.1, max_value=1.0, step=0.1)
     max_length = st.slider("Number of tokens to generate", value=50, min_value=10, max_value=256)
     st.markdown("**Step 4: Select a prompt or input your own text, and click generate in the left panel**")
     def update_prompt():
         st.session_state['text'] = prompt
     if len(tokenized_text.input_ids[0]) + max_length > 512:  # need to keep less words
         keep_last = 512 - max_length
         print(f"keep last: {keep_last}")
+        input_ids, attention_mask = tokenized_text.input_ids[0][-keep_last:], tokenized_text.attention_mask[0][-keep_last:]
         previous_ids = tokenized_text.input_ids[0][:keep_last]
         st.warning(f"kept last {keep_last}")
     else:
     output = greedy_search(model, input_ids.unsqueeze(dim=0), attention_mask.unsqueeze(dim=0), no_repeat_ngrams, length)
     if previous_ids is not None:
+        print(f"\nConcat prev id: "+tokenizer.decode(previous_ids, skip_special_tokens=True))
+        print(f"\nWith current decode: " + tokenizer.decode(output[0], skip_special_tokens=True))
+        new_text = tokenizer.decode(torch.cat([previous_ids, output[0]], dim=-1), skip_special_tokens=True)
     else:
         new_text = tokenizer.decode(output[0], skip_special_tokens=True)
 col2.markdown("""---""")
 col2.text("Statistics and details:")
 if details != "":
+    col2.caption("&nbsp;&nbsp;&nbsp;Generation details: " + details)
 if tokenized_text is None:
     tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
 tt = tokenizer(text_element, add_special_tokens=False, return_tensors="pt")
+col2.caption(f"&nbsp;&nbsp;&nbsp;Text length is {len(text_element)} characters, {len(tt.input_ids[0])} tokens.")