Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -300,8 +300,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
|
|
300 |
token_scores,
|
301 |
) = ([], [], [], [], [], [])
|
302 |
|
303 |
-
|
304 |
-
for beam_ix in range(len(beam_trees)):
|
305 |
current_beam = beam_trees[beam_ix]
|
306 |
|
307 |
# skip if the beam is already final
|
@@ -310,18 +309,17 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
|
|
310 |
|
311 |
# Get top cumulative scores for the current beam
|
312 |
current_top_token_indexes = list(
|
313 |
-
np.array(scores[step][
|
314 |
)
|
315 |
top_token_indexes += current_top_token_indexes
|
316 |
-
token_scores += list(np.array(scores[step][
|
317 |
top_cumulative_scores += list(
|
318 |
-
np.array(scores[step][
|
319 |
+ current_beam.cumulative_score
|
320 |
)
|
321 |
beam_indexes += [beam_ix] * n_beams
|
322 |
current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
|
323 |
top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
|
324 |
-
score_idx += 1
|
325 |
|
326 |
top_df = pd.DataFrame.from_dict(
|
327 |
{
|
@@ -358,7 +356,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
|
|
358 |
break
|
359 |
top_df_selected_filtered = top_df_selected.iloc[:beams_to_keep]
|
360 |
|
361 |
-
# Write the scores table
|
362 |
score_idx = 0
|
363 |
for beam_ix in range(len(beam_trees)):
|
364 |
current_beam = beam_trees[beam_ix]
|
|
|
300 |
token_scores,
|
301 |
) = ([], [], [], [], [], [])
|
302 |
|
303 |
+
for beam_ix in range(len(beam_trees)): # not range(n_beams) since there might be more ongoing trees.
|
|
|
304 |
current_beam = beam_trees[beam_ix]
|
305 |
|
306 |
# skip if the beam is already final
|
|
|
309 |
|
310 |
# Get top cumulative scores for the current beam
|
311 |
current_top_token_indexes = list(
|
312 |
+
np.array(scores[step][beam_ix].argsort()[-n_beams:])[::-1]
|
313 |
)
|
314 |
top_token_indexes += current_top_token_indexes
|
315 |
+
token_scores += list(np.array(scores[step][beam_ix][current_top_token_indexes]))
|
316 |
top_cumulative_scores += list(
|
317 |
+
np.array(scores[step][beam_ix][current_top_token_indexes])
|
318 |
+ current_beam.cumulative_score
|
319 |
)
|
320 |
beam_indexes += [beam_ix] * n_beams
|
321 |
current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
|
322 |
top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
|
|
|
323 |
|
324 |
top_df = pd.DataFrame.from_dict(
|
325 |
{
|
|
|
356 |
break
|
357 |
top_df_selected_filtered = top_df_selected.iloc[:beams_to_keep]
|
358 |
|
359 |
+
# Write the scores table in each beam tree
|
360 |
score_idx = 0
|
361 |
for beam_ix in range(len(beam_trees)):
|
362 |
current_beam = beam_trees[beam_ix]
|