OpenGPT-4o

Running

App Files Files Community

KingNish commited on Jun 4

Commit

59ab711

•

1 Parent(s): 795e26d

Update voice_chat.py

Browse files

Files changed (1) hide show

voice_chat.py +49 -51

voice_chat.py CHANGED Viewed

@@ -1,31 +1,25 @@
-import os
 import asyncio
 import tempfile
-import random
-import edge_tts
-from streaming_stt_nemo import Model as nemo
-import gradio as gr
-from transformers import pipeline
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from transformers import AutoModel
-from huggingface_hub import InferenceClient
 import torch
-# Set default language for speech recognition
-default_lang = "en"
-# Initialize speech recognition engine
-engines = {default_lang: nemo(default_lang)}
-# Load pre-trained models for language modeling
-model3 = AutoModel.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
-# Define a function for speech-to-text transcription
-def transcribe(audio):
-    lang = "en"
-    model = engines[lang]
-    text = model.stt_file(audio)[0]
-    return text
 # Get Hugging Face API token
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
@@ -46,41 +40,45 @@ def client_fn(model):
     else:
         return InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
-# Define a function to generate a random seed
-def randomize_seed_fn(seed: int) -> int:
-    seed = random.randint(0, 999999)
-    return seed
-# System instructions for the language model
-system_instructions1 = "[SYSTEM] Answer as Real OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. You will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
-# Define a function for language modeling
-def models(text, model="Mixtral 8x7B", seed=42):
-    seed = int(randomize_seed_fn(seed))
-    generator = torch.Generator().manual_seed(seed)
-    client = client_fn(model)
-    generate_kwargs = dict(
-        max_new_tokens=512,
-        seed=seed,
-    )
     formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
-    stream = client.text_generation(
-        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False
-    )
-    output = ""
-    for response in stream:
-        if not response.token.text == "</s>":
-            output += response.token.text
-    return output
-# Define an asynchronous function to handle voice input and generate responses
-async def respond(audio, model, seed):
     user = transcribe(audio)
-    reply = models(user, model, seed)
     communicate = edge_tts.Communicate(reply)
-    # Save the generated speech to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
         tmp_path = tmp_file.name
         await communicate.save(tmp_path)
-    yield tmp_path

+import gradio as gr
+import edge_tts
 import asyncio
 import tempfile
+import numpy as np
+import soxr
+from pydub import AudioSegment
 import torch
+import sentencepiece as spm
+import onnxruntime as ort
+from huggingface_hub import hf_hub_download, InferenceClient
+# Speech Recognition Model Configuration
+model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
+sample_rate = 16000
+# Download preprocessor, encoder and tokenizer
+preprocessor = torch.jit.load(hf_hub_download(model_name, "preprocessor.ts", subfolder="onnx"))
+encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfolder="onnx"))
+tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
+# Model Configuration
 # Get Hugging Face API token
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
     else:
         return InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+system_instructions1 = "[SYSTEM] Answer as Real OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. You will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
+def resample(audio_fp32, sr):
+    return soxr.resample(audio_fp32, sr, sample_rate)
+def to_float32(audio_buffer):
+    return np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32)
+def transcribe(audio_path):
+    audio_file = AudioSegment.from_file(audio_path)
+    sr = audio_file.frame_rate
+    audio_buffer = np.array(audio_file.get_array_of_samples())
+    audio_fp32 = to_float32(audio_buffer)
+    audio_16k = resample(audio_fp32, sr)
+    input_signal = torch.tensor(audio_16k).unsqueeze(0)
+    length = torch.tensor(len(audio_16k)).unsqueeze(0)
+    processed_signal, _ = preprocessor.forward(input_signal=input_signal, length=length)
+    logits = encoder.run(None, {'audio_signal': processed_signal.numpy(), 'length': length.numpy()})[0][0]
+    blank_id = tokenizer.vocab_size()
+    decoded_prediction = [p for p in logits.argmax(axis=1).tolist() if p != blank_id]
+    text = tokenizer.decode_ids(decoded_prediction)
+    return text
+def model(text, model="Mixtral 8x7B"):
+    client1 = client_fn(model)
     formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
+    stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
+    return "".join([response.token.text for response in stream if response.token.text != "</s>"])
+async def respond(audio, model):
     user = transcribe(audio)
+    reply = model(user, model)
     communicate = edge_tts.Communicate(reply)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
         tmp_path = tmp_file.name
         await communicate.save(tmp_path)
+    return tmp_path