File size: 1,824 Bytes
c968fc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from typing import List
import torch
def basic_greedy_search(
model: torch.nn.Module,
encoder_out: torch.Tensor,
encoder_out_lens: torch.Tensor,
n_steps: int = 64,
) -> List[List[int]]:
# fake padding
padding = torch.zeros(1, 1).to(encoder_out.device)
# sos
pred_input_step = torch.tensor([model.blank]).reshape(1, 1)
cache = model.predictor.init_state(1, method="zero", device=encoder_out.device)
new_cache: List[torch.Tensor] = []
t = 0
hyps = []
prev_out_nblk = True
pred_out_step = None
per_frame_max_noblk = n_steps
per_frame_noblk = 0
while t < encoder_out_lens:
encoder_out_step = encoder_out[:, t : t + 1, :] # [1, 1, E]
if prev_out_nblk:
step_outs = model.predictor.forward_step(
pred_input_step, padding, cache
) # [1, 1, P]
pred_out_step, new_cache = step_outs[0], step_outs[1]
joint_out_step = model.joint(encoder_out_step, pred_out_step) # [1,1,v]
joint_out_probs = joint_out_step.log_softmax(dim=-1)
joint_out_max = joint_out_probs.argmax(dim=-1).squeeze() # []
if joint_out_max != model.blank:
hyps.append(joint_out_max.item())
prev_out_nblk = True
per_frame_noblk = per_frame_noblk + 1
pred_input_step = joint_out_max.reshape(1, 1)
# state_m, state_c = clstate_out_m, state_out_c
cache = new_cache
if joint_out_max == model.blank or per_frame_noblk >= per_frame_max_noblk:
if joint_out_max == model.blank:
prev_out_nblk = False
# TODO(Mddct): make t in chunk for streamming
# or t should't be too lang to predict none blank
t = t + 1
per_frame_noblk = 0
return [hyps]
|