File size: 2,356 Bytes
fbeec50
 
 
4c622ed
 
 
8c3ddb5
fbeec50
 
c41dac2
 
 
 
 
 
 
 
 
 
 
5569679
c41dac2
 
 
 
07b98ad
c41dac2
 
fbeec50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c41dac2
 
07b98ad
 
 
c41dac2
 
 
 
 
 
 
 
 
 
 
fbeec50
 
 
 
 
c41dac2
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import gradio as gr
from transformers import pipeline
from transformers import pipeline
import gradio as gr

os.system('pip install https://huggingface.co/Armandoliv/es_pipeline/resolve/main/es_pipeline-any-py3-none-any.whl')

pipe = pipeline(model="irena/whisper-small-sv-SE") 
def main_generator(youtube_id:str):
  YouTubeID = youtube_id.split("https://www.youtube.com/watch?v=") #
  if len(YouTubeID)>1:
      YouTubeID = YouTubeID[1]
  else:
      YouTubeID ='xOZM-1p-jAk'
      
  OutputFile = f'test_audio_youtube_{YouTubeID}.m4a'

  os.system(f"youtube-dl -o {OutputFile} {YouTubeID} --extract-audio --restrict-filenames -f 'bestaudio[ext=m4a]'")
  
  result = pipe(OutputFile)
  text = result['text']

  output_list = []

  output_list.append(text)

  return text 



def transcribe(audio):
    text = pipe(audio)["text"]
    return text

demo = gr.Blocks()


iface = gr.Interface(
    fn=transcribe, 
    inputs=gr.Audio(source="microphone", type="filepath"), 
    outputs="text",
    title="Whisper Small Swedish-Microphone",
    description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model. An audio for recognize.",
)

inputs = [gr.Textbox(lines=1, placeholder="Link of youtube video here...", label="Input")]
outputs = gr.HighlightedText()
title="Transcription of Swedish videos"
description = "This demo uses small Whisper to  transcribe what is spoken in a swedish video"
examples = ['https://www.youtube.com/watch?v=6eWhV7xYH-Q']
io = gr.Interface(fn=main_generator, inputs=inputs, outputs=outputs, title=title, description = description, examples = examples,

                  css= """.gr-button-primary { background: -webkit-linear-gradient( 
                    90deg, #355764 0%, #55a8a1 100% ) !important;     background: #355764;
                        background: linear-gradient( 
                    90deg, #355764 0%, #55a8a1 100% ) !important;
                        background: -moz-linear-gradient( 90deg, #355764 0%, #55a8a1 100% ) !important;
                        background: -webkit-linear-gradient( 
                    90deg, #355764 0%, #55a8a1 100% ) !important;
                    color:white !important}"""
                  )


with demo:
    gr.TabbedInterface([iface, yt], ["Transcribe Audio", "Transcribe YouTube"])

demo.launch(enable_queue=True)