File size: 1,646 Bytes
521091c 4a37dab 521091c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import gradio as gr
from huggingface_hub import snapshot_download
from threading import Thread
import os
import time
import gradio as gr
import base64
import numpy as np
import requests
from server import serve
repo_id = "gpt-omni/mini-omni"
snapshot_download(repo_id, local_dir="./checkpoint", revision="main")
IP='0.0.0.0'
PORT=60808
thread = Thread(target=serve, daemon=True)
thread.start()
API_URL = "http://0.0.0.0:60808/chat"
OUT_CHUNK = 4096
OUT_RATE = 24000
OUT_CHANNELS = 1
def process_audio(audio):
filepath = audio
print(f"filepath: {filepath}")
if filepath is None:
return
cnt = 0
with open(filepath, "rb") as f:
data = f.read()
base64_encoded = str(base64.b64encode(data), encoding="utf-8")
files = {"audio": base64_encoded}
tik = time.time()
with requests.post(API_URL, json=files, stream=True) as response:
try:
for chunk in response.iter_content(chunk_size=OUT_CHUNK):
if chunk:
# Convert chunk to numpy array
if cnt == 0:
print(f"first chunk time cost: {time.time() - tik:.3f}")
cnt += 1
audio_data = np.frombuffer(chunk, dtype=np.int16)
audio_data = audio_data.reshape(-1, OUT_CHANNELS)
yield OUT_RATE, audio_data.astype(np.int16)
except Exception as e:
print(f"error: {e}")
def greet(name):
return "Hello " + name + "!!"
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
demo.launch()
|