SujithPulikodan commited on
Commit
6a85d56
1 Parent(s): cfd99fa

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +51 -1
README.md CHANGED
@@ -7,4 +7,54 @@ language:
7
  - kn
8
  base_model:
9
  - openai/whisper-small
10
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  - kn
8
  base_model:
9
  - openai/whisper-small
10
+ ---
11
+
12
+ ```Python
13
+
14
+ import torch
15
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor, WhisperTokenizer,WhisperFeatureExtractor
16
+ import soundfile as sf
17
+
18
+
19
+ model="ARTPARK-IISc/whisper-small-vaani-kannada"
20
+
21
+ # Load tokenizer and feature extractor individually
22
+ feature_extractor = WhisperFeatureExtractor.from_pretrained(model)
23
+ tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="Kannada", task="transcribe")
24
+
25
+
26
+ # Create the processor manually
27
+ processor = WhisperProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer)
28
+
29
+ # Load and preprocess the audio file
30
+ audio_file_path = "Sample_Audio.wav" # replace with your audio file path
31
+
32
+
33
+ device = "cuda" if torch.cuda.is_available() else "cpu"
34
+
35
+ # Load the processor and model
36
+ model = WhisperForConditionalGeneration.from_pretrained(model).to(device)
37
+
38
+
39
+ # load audio
40
+ audio_data, sample_rate = sf.read(audio_file_path)
41
+ # Ensure the audio is 16kHz (Whisper expects 16kHz audio)
42
+ if sample_rate != 16000:
43
+ import torchaudio
44
+ resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
45
+ audio_data = resampler(torch.tensor(audio_data).unsqueeze(0)).squeeze().numpy()
46
+
47
+
48
+ # Use the processor to prepare the input features
49
+ input_features = processor(audio_data, sampling_rate=16000, return_tensors="pt").input_features.to(device)
50
+
51
+ # Generate transcription (disable gradient calculation during inference)
52
+ with torch.no_grad():
53
+ predicted_ids = model.generate(input_features)
54
+
55
+ # Decode the generated IDs into human-readable text
56
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
57
+
58
+ print(transcription)
59
+
60
+ ```