barto17 commited on
Commit
6769f1b
1 Parent(s): ec1a913

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -7,6 +7,7 @@ from transformers.models.whisper.tokenization_whisper import LANGUAGES
7
  from transformers.pipelines.audio_utils import ffmpeg_read
8
 
9
  model_id = "openai/whisper-large-v2"
 
10
 
11
 
12
  LANGUANGE_MAP = {
@@ -58,19 +59,9 @@ LANGUANGE_MAP = {
58
  }
59
 
60
 
61
- processor = WhisperProcessor.from_pretrained(model_id)
62
- model = WhisperForConditionalGeneration.from_pretrained(model_id)
63
- model.eval()
64
- model.to(device)
65
-
66
- sampling_rate = processor.feature_extractor.sampling_rate
67
 
68
- bos_token_id = processor.tokenizer.all_special_ids[-106]
69
- decoder_input_ids = torch.tensor([bos_token_id]).to(device)
70
 
71
 
72
- device = "cuda" if torch.cuda.is_available() else "CPU"
73
-
74
  model_ckpt = "barto17/language-detection-fine-tuned-on-xlm-roberta-base"
75
  model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
76
  tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
@@ -92,6 +83,16 @@ def process_audio_file(file):
92
  return audio
93
 
94
  def transcribe(Microphone, File_Upload):
 
 
 
 
 
 
 
 
 
 
95
  warn_output = ""
96
  if (Microphone is not None) and (File_Upload is not None):
97
  warn_output = "WARNING: You've uploaded an audio file and used the microphone. " \
 
7
  from transformers.pipelines.audio_utils import ffmpeg_read
8
 
9
  model_id = "openai/whisper-large-v2"
10
+ device = "cuda" if torch.cuda.is_available() else "CPU"
11
 
12
 
13
  LANGUANGE_MAP = {
 
59
  }
60
 
61
 
 
 
 
 
 
 
62
 
 
 
63
 
64
 
 
 
65
  model_ckpt = "barto17/language-detection-fine-tuned-on-xlm-roberta-base"
66
  model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
67
  tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
 
83
  return audio
84
 
85
  def transcribe(Microphone, File_Upload):
86
+ processor = WhisperProcessor.from_pretrained(model_id)
87
+ model = WhisperForConditionalGeneration.from_pretrained(model_id)
88
+ model.eval()
89
+ model.to(device)
90
+
91
+ sampling_rate = processor.feature_extractor.sampling_rate
92
+
93
+ bos_token_id = processor.tokenizer.all_special_ids[-106]
94
+ decoder_input_ids = torch.tensor([bos_token_id]).to(device)
95
+
96
  warn_output = ""
97
  if (Microphone is not None) and (File_Upload is not None):
98
  warn_output = "WARNING: You've uploaded an audio file and used the microphone. " \