patrickvonplaten commited on
Commit
165889d
1 Parent(s): 15af85e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -9
README.md CHANGED
@@ -51,7 +51,7 @@ To transcribe audio files the model can be used as a standalone acoustic model a
51
  ds = ds.map(map_to_array)
52
 
53
  # tokenize
54
- input_values = processor(ds["speech"][:2], return_tensors="pt", padding="longest").input_values # Batch size 1
55
 
56
  # retrieve logits
57
  logits = model(input_values).logits
@@ -78,15 +78,8 @@ librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
78
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-100h").to("cuda")
79
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-100h")
80
 
81
- def map_to_array(batch):
82
- speech, _ = sf.read(batch["file"])
83
- batch["speech"] = speech
84
- return batch
85
-
86
- librispeech_eval = librispeech_eval.map(map_to_array)
87
-
88
  def map_to_pred(batch):
89
- input_values = processor(batch["speech"], return_tensors="pt", padding="longest").input_values
90
  with torch.no_grad():
91
  logits = model(input_values.to("cuda")).logits
92
 
 
51
  ds = ds.map(map_to_array)
52
 
53
  # tokenize
54
+ input_values = processor(ds[0]["audio"]["array"], return_tensors="pt", padding="longest").input_values # Batch size 1
55
 
56
  # retrieve logits
57
  logits = model(input_values).logits
 
78
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-100h").to("cuda")
79
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-100h")
80
 
 
 
 
 
 
 
 
81
  def map_to_pred(batch):
82
+ input_values = processor(batch["audio"]["array"], return_tensors="pt", padding="longest").input_values
83
  with torch.no_grad():
84
  logits = model(input_values.to("cuda")).logits
85