Update README.md
Browse files
README.md
CHANGED
@@ -4,30 +4,30 @@ library_name: peft
|
|
4 |
---
|
5 |
|
6 |
## Config
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
task = "transcribe"
|
12 |
-
dataset_name = "mozilla-foundation/common_voice_11_0"
|
13 |
-
|
14 |
-
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name_or_path)
|
15 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, language=language, task=task)
|
16 |
-
processor = AutoProcessor.from_pretrained(model_name_or_path, language=language, task=task)
|
17 |
-
|
18 |
-
|
19 |
-
common_voice["train"] = load_dataset(dataset_name, language_abbr, split="train+validation", use_auth_token=True)
|
20 |
-
common_voice["test"] = load_dataset(dataset_name, language_abbr, split="test", use_auth_token=True)
|
21 |
-
|
22 |
-
|
23 |
-
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name_or_path, load_in_8bit=True, device_map="auto")
|
24 |
-
config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
25 |
-
|
26 |
-
model = get_peft_model(model, config)
|
27 |
-
model.print_trainable_parameters()
|
28 |
-
#"trainable params: 15728640 || all params: 1559033600 || trainable%: 1.0088711365810203"
|
29 |
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
## Training procedure
|
32 |
|
33 |
|
|
|
4 |
---
|
5 |
|
6 |
## Config
|
7 |
+
```python
|
8 |
+
model_name_or_path = "openai/whisper-large-v2"
|
9 |
+
language = "Marathi"
|
10 |
+
language_abbr = "mr"
|
11 |
+
task = "transcribe"
|
12 |
+
dataset_name = "mozilla-foundation/common_voice_11_0"
|
13 |
|
14 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name_or_path)
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, language=language, task=task)
|
16 |
+
processor = AutoProcessor.from_pretrained(model_name_or_path, language=language, task=task)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
+
common_voice["train"] = load_dataset(dataset_name, language_abbr, split="train+validation", use_auth_token=True)
|
20 |
+
common_voice["test"] = load_dataset(dataset_name, language_abbr, split="test", use_auth_token=True)
|
21 |
+
|
22 |
+
|
23 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name_or_path, load_in_8bit=True, device_map="auto")
|
24 |
+
config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
25 |
+
|
26 |
+
model = get_peft_model(model, config)
|
27 |
+
model.print_trainable_parameters()
|
28 |
+
#"trainable params: 15728640 || all params: 1559033600 || trainable%: 1.0088711365810203"
|
29 |
+
```
|
30 |
+
|
31 |
## Training procedure
|
32 |
|
33 |
|