barto17 commited on
Commit
425758f
1 Parent(s): 6467c3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -6
app.py CHANGED
@@ -1,16 +1,60 @@
1
  import torch
2
- import torch.nn.functional as F
3
-
4
- from transformers import WhisperForConditionalGeneration, WhisperProcessor
5
- from transformers.models.whisper.tokenization_whisper import LANGUAGES
6
  from transformers.pipelines.audio_utils import ffmpeg_read
7
-
8
  import gradio as gr
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  device = "cuda" if torch.cuda.is_available() else "CPU"
12
 
13
- model_ckpt = "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
14
  model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
15
  tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
16
 
 
1
  import torch
2
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
 
 
3
  from transformers.pipelines.audio_utils import ffmpeg_read
 
4
  import gradio as gr
5
 
6
+ LANGUANGE_MAP = {
7
+ 0: 'Arabic',
8
+ 1: 'Basque',
9
+ 2: 'Breton',
10
+ 3: 'Catalan',
11
+ 4: 'Chinese_China',
12
+ 5: 'Chinese_Hongkong',
13
+ 6: 'Chinese_Taiwan',
14
+ 7: 'Chuvash',
15
+ 8: 'Czech',
16
+ 9: 'Dhivehi',
17
+ 10: 'Dutch',
18
+ 11: 'English',
19
+ 12: 'Esperanto',
20
+ 13: 'Estonian',
21
+ 14: 'French',
22
+ 15: 'Frisian',
23
+ 16: 'Georgian',
24
+ 17: 'German',
25
+ 18: 'Greek',
26
+ 19: 'Hakha_Chin',
27
+ 20: 'Indonesian',
28
+ 21: 'Interlingua',
29
+ 22: 'Italian',
30
+ 23: 'Japanese',
31
+ 24: 'Kabyle',
32
+ 25: 'Kinyarwanda',
33
+ 26: 'Kyrgyz',
34
+ 27: 'Latvian',
35
+ 28: 'Maltese',
36
+ 29: 'Mongolian',
37
+ 30: 'Persian',
38
+ 31: 'Polish',
39
+ 32: 'Portuguese',
40
+ 33: 'Romanian',
41
+ 34: 'Romansh_Sursilvan',
42
+ 35: 'Russian',
43
+ 36: 'Sakha',
44
+ 37: 'Slovenian',
45
+ 38: 'Spanish',
46
+ 39: 'Swedish',
47
+ 40: 'Tamil',
48
+ 41: 'Tatar',
49
+ 42: 'Turkish',
50
+ 43: 'Ukranian',
51
+ 44: 'Welsh'
52
+ }
53
+
54
 
55
  device = "cuda" if torch.cuda.is_available() else "CPU"
56
 
57
+ model_ckpt = "barto17/language-detection-fine-tuned-on-xlm-roberta-base"
58
  model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
59
  tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
60