lang_id_testing

Runtime error

barto17 commited on Sep 25, 2023

Commit

425758f

•

1 Parent(s): 6467c3c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,16 +1,60 @@
 import torch
-import torch.nn.functional as F
-from transformers import WhisperForConditionalGeneration, WhisperProcessor
-from transformers.models.whisper.tokenization_whisper import LANGUAGES
 from transformers.pipelines.audio_utils import ffmpeg_read
 import gradio as gr
 device = "cuda" if torch.cuda.is_available() else "CPU"
-model_ckpt = "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
 model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
 tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

 import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
 from transformers.pipelines.audio_utils import ffmpeg_read
 import gradio as gr
+LANGUANGE_MAP = {
+    0: 'Arabic',
+    1: 'Basque',
+    2: 'Breton',
+    3: 'Catalan',
+    4: 'Chinese_China',
+    5: 'Chinese_Hongkong',
+    6: 'Chinese_Taiwan',
+    7: 'Chuvash',
+    8: 'Czech',
+    9: 'Dhivehi',
+    10: 'Dutch',
+    11: 'English',
+    12: 'Esperanto',
+    13: 'Estonian',
+    14: 'French',
+    15: 'Frisian',
+    16: 'Georgian',
+    17: 'German',
+    18: 'Greek',
+    19: 'Hakha_Chin',
+    20: 'Indonesian',
+    21: 'Interlingua',
+    22: 'Italian',
+    23: 'Japanese',
+    24: 'Kabyle',
+    25: 'Kinyarwanda',
+    26: 'Kyrgyz',
+    27: 'Latvian',
+    28: 'Maltese',
+    29: 'Mongolian',
+    30: 'Persian',
+    31: 'Polish',
+    32: 'Portuguese',
+    33: 'Romanian',
+    34: 'Romansh_Sursilvan',
+    35: 'Russian',
+    36: 'Sakha',
+    37: 'Slovenian',
+    38: 'Spanish',
+    39: 'Swedish',
+    40: 'Tamil',
+    41: 'Tatar',
+    42: 'Turkish',
+    43: 'Ukranian',
+    44: 'Welsh'
+ }
 device = "cuda" if torch.cuda.is_available() else "CPU"
+model_ckpt = "barto17/language-detection-fine-tuned-on-xlm-roberta-base"
 model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
 tokenizer = AutoTokenizer.from_pretrained(model_ckpt)