setup whisper base fa
Browse files- .idea/.gitignore +8 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +7 -0
- .idea/modules.xml +8 -0
- .idea/najva.iml +8 -0
- .idea/vcs.xml +6 -0
- main.py +41 -0
.idea/.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Default ignored files
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
4 |
+
# Editor-based HTTP Client requests
|
5 |
+
/httpRequests/
|
6 |
+
# Datasource local storage ignored files
|
7 |
+
/dataSources/
|
8 |
+
/dataSources.local.xml
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
.idea/misc.xml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="Black">
|
4 |
+
<option name="sdkName" value="Python 3.11" />
|
5 |
+
</component>
|
6 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11" project-jdk-type="Python SDK" />
|
7 |
+
</project>
|
.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/najva.iml" filepath="$PROJECT_DIR$/.idea/najva.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
.idea/najva.iml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$" />
|
5 |
+
<orderEntry type="inheritedJdk" />
|
6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
+
</component>
|
8 |
+
</module>
|
.idea/vcs.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="VcsDirectoryMappings">
|
4 |
+
<mapping directory="" vcs="Git" />
|
5 |
+
</component>
|
6 |
+
</project>
|
main.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
3 |
+
import librosa
|
4 |
+
|
5 |
+
processor = WhisperProcessor.from_pretrained("Neurai/NeuraSpeech_WhisperBase")
|
6 |
+
model = WhisperForConditionalGeneration.from_pretrained("Neurai/NeuraSpeech_WhisperBase")
|
7 |
+
forced_decoder_ids = processor.get_decoder_prompt_ids(language="fa", task="transcribe")
|
8 |
+
|
9 |
+
|
10 |
+
def reverse_audio(audio):
|
11 |
+
array, sample_rate = audio
|
12 |
+
sr = 16000
|
13 |
+
array = librosa.to_mono(array)
|
14 |
+
array = librosa.resample(array, orig_sr=sample_rate, target_sr=16000)
|
15 |
+
input_features = processor(array, sampling_rate=sr, return_tensors="pt").input_features
|
16 |
+
|
17 |
+
# generate token ids
|
18 |
+
predicted_ids = model.generate(input_features)
|
19 |
+
# decode token ids to text
|
20 |
+
transcription = processor.batch_decode(predicted_ids, )
|
21 |
+
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
22 |
+
return transcription
|
23 |
+
|
24 |
+
|
25 |
+
input_audio = gr.Audio(
|
26 |
+
sources=["microphone"],
|
27 |
+
waveform_options=gr.WaveformOptions(
|
28 |
+
waveform_color="#01C6FF",
|
29 |
+
waveform_progress_color="#0066B4",
|
30 |
+
skip_length=2,
|
31 |
+
show_controls=True,
|
32 |
+
),
|
33 |
+
)
|
34 |
+
demo = gr.Interface(
|
35 |
+
fn=reverse_audio,
|
36 |
+
inputs=input_audio,
|
37 |
+
outputs="text"
|
38 |
+
)
|
39 |
+
|
40 |
+
if __name__ == "__main__":
|
41 |
+
demo.launch()
|