antonbol commited on
Commit
0fc26e1
·
1 Parent(s): 0bd7f19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -8
app.py CHANGED
@@ -5,14 +5,8 @@ from PIL import Image
5
  pipe = pipeline(model="fimster/whisper-small-sv-SE") # change to "your-username/the-name-you-picked"
6
  images = ["katt", "melon", "hund", "banan"]
7
  image = random.choice(images)
8
- # query_image = Image.open("./images/" + image + ".jpeg")
9
 
10
- with gr.Blocks() as demo:
11
- with gr.Row():
12
- gr.Label("Vad är detta? Spela in ditt svar med inspelningsknappen!")
13
- input_img = gr.Image("./images/" + image + ".jpeg")
14
- demo.launch()
15
- def transcribe(audio):
16
  text = pipe(audio)["text"]
17
  returntext = ""
18
  if text.lower() != image.lower():
@@ -23,7 +17,7 @@ def transcribe(audio):
23
 
24
  iface = gr.Interface(
25
  fn=transcribe,
26
- inputs=gr.Audio(source="microphone", type="filepath"),
27
  outputs="text",
28
  title="Whisper Small Swedish",
29
  description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
 
5
  pipe = pipeline(model="fimster/whisper-small-sv-SE") # change to "your-username/the-name-you-picked"
6
  images = ["katt", "melon", "hund", "banan"]
7
  image = random.choice(images)
 
8
 
9
+ def transcribe(audio, img):
 
 
 
 
 
10
  text = pipe(audio)["text"]
11
  returntext = ""
12
  if text.lower() != image.lower():
 
17
 
18
  iface = gr.Interface(
19
  fn=transcribe,
20
+ inputs=[gr.Audio(source="microphone", type="filepath"), gr.Image("./images/" + image + ".jpeg")],
21
  outputs="text",
22
  title="Whisper Small Swedish",
23
  description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",