Harveenchadha commited on
Commit
7738eb6
1 Parent(s): 4689bf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -11,15 +11,7 @@ def convert(inputfile, outfile):
11
  file_type="wav", channels=1, encoding="signed-integer", rate=16000, bits=16
12
  )
13
  sox_tfm.build(inputfile, outfile)
14
- def parse_transcription(wav_file):
15
- filename = wav_file.name.split('.')[0]
16
- convert(wav_file.name, filename + "16k.wav")
17
- speech, _ = sf.read(filename + "16k.wav")
18
- input_values = processor(speech, sampling_rate=16_000, return_tensors="pt").input_values
19
- logits = model(input_values).logits
20
- predicted_ids = torch.argmax(logits, dim=-1)
21
- transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
22
- return transcription
23
 
24
 
25
  model_translate = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
@@ -37,6 +29,18 @@ def translate(text):
37
  processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
38
  model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
39
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  output1 = gr.outputs.Textbox(label="Hindi Output from ASR")
41
  output2 = gr.outputs.Textbox(label="English Translated Output")
42
 
 
11
  file_type="wav", channels=1, encoding="signed-integer", rate=16000, bits=16
12
  )
13
  sox_tfm.build(inputfile, outfile)
14
+
 
 
 
 
 
 
 
 
15
 
16
 
17
  model_translate = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
 
29
  processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
30
  model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
31
 
32
+ def parse_transcription(wav_file):
33
+ filename = wav_file.name.split('.')[0]
34
+ convert(wav_file.name, filename + "16k.wav")
35
+ speech, _ = sf.read(filename + "16k.wav")
36
+ input_values = processor(speech, sampling_rate=16_000, return_tensors="pt").input_values
37
+ logits = model(input_values).logits
38
+ predicted_ids = torch.argmax(logits, dim=-1)
39
+ transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
40
+ return transcription, translate(transcription)
41
+
42
+
43
+
44
  output1 = gr.outputs.Textbox(label="Hindi Output from ASR")
45
  output2 = gr.outputs.Textbox(label="English Translated Output")
46