Update README.md
Browse files
README.md
CHANGED
@@ -4,27 +4,28 @@ library_name: peft
|
|
4 |
---
|
5 |
|
6 |
## Config
|
7 |
-
model_name_or_path = "openai/whisper-large-v2"
|
8 |
-
language = "Marathi"
|
9 |
-
language_abbr = "mr"
|
10 |
-
task = "transcribe"
|
11 |
-
dataset_name = "mozilla-foundation/common_voice_11_0"
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
|
30 |
## Training procedure
|
|
|
4 |
---
|
5 |
|
6 |
## Config
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
model_name_or_path = "openai/whisper-large-v2"
|
9 |
+
language = "Marathi"
|
10 |
+
language_abbr = "mr"
|
11 |
+
task = "transcribe"
|
12 |
+
dataset_name = "mozilla-foundation/common_voice_11_0"
|
13 |
+
|
14 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name_or_path)
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, language=language, task=task)
|
16 |
+
processor = AutoProcessor.from_pretrained(model_name_or_path, language=language, task=task)
|
17 |
+
|
18 |
+
|
19 |
+
common_voice["train"] = load_dataset(dataset_name, language_abbr, split="train+validation", use_auth_token=True)
|
20 |
+
common_voice["test"] = load_dataset(dataset_name, language_abbr, split="test", use_auth_token=True)
|
21 |
+
|
22 |
+
|
23 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name_or_path, load_in_8bit=True, device_map="auto")
|
24 |
+
config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
25 |
+
|
26 |
+
model = get_peft_model(model, config)
|
27 |
+
model.print_trainable_parameters()
|
28 |
+
#"trainable params: 15728640 || all params: 1559033600 || trainable%: 1.0088711365810203"
|
29 |
|
30 |
|
31 |
## Training procedure
|