Spaces:

alexbuz
/

interview_copilot

Sleeping

App Files Files Community

alex buz commited on Jul 17

Commit

68ba2e8

•

1 Parent(s): e6868fd

new

Browse files

Files changed (10) hide show

3t_dropdown copy 4.py +46 -0
3t_dropdown copy 5.py +63 -0
3t_dropdown copy 6.py +68 -0
3t_dropdown copy 7.py +71 -0
app copy 6.py +125 -0
app copy 7.py +125 -0
app copy 8.py +146 -0
app copy 9.py +176 -0
app.py +51 -29
requirements.txt +22 -0

3t_dropdown copy 4.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import gradio as gr
+from openai import OpenAI
+def predict(message, history, character, api_key, progress=gr.Progress()):
+    client = OpenAI(api_key=api_key)
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human})
+        history_openai_format.append({"role": "assistant", "content": assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    response = client.chat.completions.create(
+        model='gpt-4',
+        messages=history_openai_format,
+        temperature=1.0,
+        stream=True
+    )
+    partial_message = ""
+    for chunk in progress.tqdm(response, desc="Generating"):
+        if chunk.choices[0].delta.content:
+            partial_message += chunk.choices[0].delta.content
+            yield partial_message
+def reset(character):
+    return [], []
+# Gradio app
+with gr.Blocks() as demo:
+    gr.Markdown(f"<h1 style='text-align: center; margin-bottom: 1rem'>{'My Chatbot'}</h1>")
+    bot = gr.Chatbot(render=False)
+    dropdown = gr.Dropdown(
+        ["Character 1", "Character 2", "Character 3", "Character 4", "Character 5", "Character 6", "Character 7", "Character 8", "Character 9", "Character 10", "Character 11", "Character 12", "Character 13"],
+        label="Characters",
+        info="Select the character that you'd like to speak to",
+        value="Character 1"
+    )
+    chat = gr.ChatInterface(
+        fn=predict,
+        chatbot=bot,
+        additional_inputs=[dropdown, gr.Textbox(label="API Key")],
+    )
+    dropdown.change(fn=reset, inputs=dropdown, outputs=[bot, chat.chatbot_state])
+demo.queue()
+demo.launch()

3t_dropdown copy 5.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import gradio as gr
+from openai import OpenAI
+import threading
+pause_event = threading.Event()
+def predict(message, history, character, api_key, progress=gr.Progress()):
+    client = OpenAI(api_key=api_key)
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human})
+        history_openai_format.append({"role": "assistant", "content": assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    response = client.chat.completions.create(
+        model='gpt-4',
+        messages=history_openai_format,
+        temperature=1.0,
+        stream=True
+    )
+    partial_message = ""
+    for chunk in progress.tqdm(response, desc="Generating"):
+        if pause_event.is_set():
+            break
+        if chunk.choices[0].delta.content:
+            partial_message += chunk.choices[0].delta.content
+            yield partial_message
+def pause():
+    pause_event.set()
+def resume():
+    pause_event.clear()
+def reset(character):
+    return [], []
+# Gradio app
+with gr.Blocks() as demo:
+    gr.Markdown(f"<h1 style='text-align: center; margin-bottom: 1rem'>{'My Chatbot'}</h1>")
+    bot = gr.Chatbot(render=False)
+    dropdown = gr.Dropdown(
+        ["Character 1", "Character 2", "Character 3", "Character 4", "Character 5", "Character 6", "Character 7", "Character 8", "Character 9", "Character 10", "Character 11", "Character 12", "Character 13"],
+        label="Characters",
+        info="Select the character that you'd like to speak to",
+        value="Character 1"
+    )
+    chat = gr.ChatInterface(
+        fn=predict,
+        chatbot=bot,
+        additional_inputs=[dropdown, gr.Textbox(label="API Key")],
+    )
+    dropdown.change(fn=reset, inputs=dropdown, outputs=[bot, chat.chatbot_state])
+    pause_button = gr.Button("Pause")
+    resume_button = gr.Button("Resume")
+    pause_button.click(fn=pause, inputs=None, outputs=None)
+    resume_button.click(fn=resume, inputs=None, outputs=None)
+demo.queue()
+demo.launch()

3t_dropdown copy 6.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import gradio as gr
+from openai import OpenAI
+import threading
+pause_event = threading.Event()
+resume_event = threading.Event()
+def predict(message, history, character, api_key, progress=gr.Progress()):
+    client = OpenAI(api_key=api_key)
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human})
+        history_openai_format.append({"role": "assistant", "content": assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    response = client.chat.completions.create(
+        model='gpt-4o',
+        messages=history_openai_format,
+        temperature=1.0,
+        stream=True
+    )
+    partial_message = ""
+    for chunk in progress.tqdm(response, desc="Generating"):
+        while pause_event.is_set():
+            resume_event.wait()
+        if chunk.choices[0].delta.content:
+            partial_message += chunk.choices[0].delta.content
+            yield partial_message
+def pause():
+    pause_event.set()
+    resume_event.clear()
+def resume():
+    pause_event.clear()
+    resume_event.set()
+def reset(character):
+    return [], []
+# Gradio app
+with gr.Blocks() as demo:
+    gr.Markdown(f"<h1 style='text-align: center; margin-bottom: 1rem'>{'My Chatbot'}</h1>")
+    bot = gr.Chatbot(render=False)
+    dropdown = gr.Dropdown(
+        ["Character 1", "Character 2", "Character 3", "Character 4", "Character 5", "Character 6", "Character 7", "Character 8", "Character 9", "Character 10", "Character 11", "Character 12", "Character 13"],
+        label="Characters",
+        info="Select the character that you'd like to speak to",
+        value="Character 1"
+    )
+    api_key_input = gr.Textbox(label="API Key")
+    chat = gr.ChatInterface(
+        fn=predict,
+        chatbot=bot,
+        additional_inputs=[dropdown, api_key_input],
+    )
+    dropdown.change(fn=reset, inputs=dropdown, outputs=[bot, chat.chatbot_state])
+    pause_button = gr.Button("Pause")
+    resume_button = gr.Button("Resume")
+    pause_button.click(fn=pause, inputs=None, outputs=None)
+    resume_button.click(fn=resume, inputs=None, outputs=None)
+demo.queue()
+demo.launch()

3t_dropdown copy 7.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import gradio as gr
+from openai import OpenAI
+import threading
+pause_event = threading.Event()
+resume_event = threading.Event()
+def predict(message, history, character, api_key, progress=gr.Progress()):
+    client = OpenAI(api_key=api_key)
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human})
+        history_openai_format.append({"role": "assistant", "content": assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    response = client.chat.completions.create(
+        model='gpt-4',
+        messages=history_openai_format,
+        temperature=1.0,
+        stream=True
+    )
+    partial_message = ""
+    for chunk in progress.tqdm(response, desc="Generating"):
+        while pause_event.is_set():
+            resume_event.wait()
+        if chunk.choices[0].delta.content:
+            partial_message += chunk.choices[0].delta.content
+            yield partial_message
+def pause():
+    pause_event.set()
+    resume_event.clear()
+    return "Paused"
+def resume():
+    pause_event.clear()
+    resume_event.set()
+    return "Resumed"
+def reset(character):
+    return [], []
+# Gradio app
+with gr.Blocks() as demo:
+    gr.Markdown(f"<h1 style='text-align: center; margin-bottom: 1rem'>{'My Chatbot'}</h1>")
+    bot = gr.Chatbot(render=False)
+    dropdown = gr.Dropdown(
+        ["Character 1", "Character 2", "Character 3", "Character 4", "Character 5", "Character 6", "Character 7", "Character 8", "Character 9", "Character 10", "Character 11", "Character 12", "Character 13"],
+        label="Characters",
+        info="Select the character that you'd like to speak to",
+        value="Character 1"
+    )
+    api_key_input = gr.Textbox(label="API Key")
+    chat = gr.ChatInterface(
+        fn=predict,
+        chatbot=bot,
+        additional_inputs=[dropdown, api_key_input],
+    )
+    dropdown.change(fn=reset, inputs=dropdown, outputs=[bot, chat.chatbot_state])
+    pause_button = gr.Button("Pause")
+    resume_button = gr.Button("Resume")
+    status = gr.Textbox(label="Status", interactive=False)
+    pause_button.click(fn=pause, inputs=None, outputs=status)
+    resume_button.click(fn=resume, inputs=None, outputs=status)
+demo.queue()
+demo.launch()

app copy 6.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import time
+import gradio as gr
+from transformers import pipeline
+import numpy as np
+from openai import OpenAI
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
+def predict(message, history, api_key, is_paused):
+    client = OpenAI(api_key=api_key)
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human})
+        history_openai_format.append({"role": "assistant", "content": assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    response = client.chat.completions.create(
+        model='gpt-4o',
+        messages=history_openai_format,
+        temperature=1.0,
+        stream=True
+    )
+    partial_message = ""
+    for chunk in response:
+        print(is_paused)
+        if is_paused[0]:  # Check if paused
+            while is_paused[0]:
+                print('paused')
+                time.sleep(0.1)
+        print('not paused')
+        if chunk.choices[0].delta.content:
+            partial_message += chunk.choices[0].delta.content
+            yield partial_message
+def chat_with_api_key(api_key, message, history, is_paused):
+    accumulated_message = ""
+    for partial_message in predict(message, history, api_key, is_paused):
+        if is_paused[0]:  # Check if paused
+            break
+        accumulated_message = partial_message
+        history.append((message, accumulated_message))
+        yield message, [[message, accumulated_message]]
+def transcribe(audio):
+    if audio is None:
+        return "No audio recorded."
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+def answer(transcription):
+    context = "You are a chatbot answering general questions"
+    result = qa_model(question=transcription, context=context)
+    return result['answer']
+def process_audio(audio):
+    if audio is None:
+        return "No audio recorded.", []
+    transcription = transcribe(audio)
+    answer_result = answer(transcription)
+    return transcription, [[transcription, answer_result]]
+def update_output(api_key, audio_input, state, is_paused):
+    if is_paused[0]:  # Check if paused
+        yield "", state  # Return current state without making changes
+    else:
+        message = transcribe(audio_input)
+        responses = chat_with_api_key(api_key, message, state, is_paused)
+        accumulated_response = ""
+        for response, updated_state in responses:
+            if is_paused[0]:  # Check if paused
+                break
+            accumulated_response = response
+            yield accumulated_response, updated_state
+def clear_all():
+    return None, "", []
+def toggle_pause(is_paused):
+    is_paused[0] = not is_paused[0]
+    return is_paused
+def update_button_label(is_paused):
+    return "Resume" if is_paused[0] else "Pause"
+with gr.Blocks() as demo:
+    answer_output = gr.Chatbot(label="Answer Result")
+    with gr.Row():
+        audio_input = gr.Audio(label="Audio Input", sources=["microphone"], type="numpy")
+        with gr.Column():
+            api_key = gr.Textbox(label="API Key", placeholder="Enter your API key", type="password")
+            transcription_output = gr.Textbox(label="Transcription")
+            clear_button = gr.Button("Clear")
+            pause_button = gr.Button("Pause")
+    state = gr.State([])
+    is_paused = gr.State([False])  # Using a list to hold the mutable pause state
+    audio_input.stop_recording(
+        fn=update_output,
+        inputs=[api_key, audio_input, state, is_paused],
+        outputs=[transcription_output, answer_output]
+    )
+    clear_button.click(
+        fn=clear_all,
+        inputs=[],
+        outputs=[audio_input, transcription_output, answer_output]
+    )
+    pause_button.click(
+        fn=toggle_pause,
+        inputs=[is_paused],
+        outputs=[is_paused]
+    ).then(
+        fn=update_button_label,
+        inputs=[is_paused],
+        outputs=[pause_button]
+    )
+demo.launch()

app copy 7.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import time
+import gradio as gr
+from transformers import pipeline
+import numpy as np
+from openai import OpenAI
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
+def predict(message, history, api_key, is_paused):
+    client = OpenAI(api_key=api_key)
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human})
+        history_openai_format.append({"role": "assistant", "content": assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    response = client.chat.completions.create(
+        model='gpt-4o',
+        messages=history_openai_format,
+        temperature=1.0,
+        stream=True
+    )
+    partial_message = ""
+    for chunk in response:
+        print(is_paused)
+        if is_paused[0]:  # Check if paused
+            while is_paused[0]:
+                print('paused')
+                time.sleep(0.1)
+        print('not paused')
+        if chunk.choices[0].delta.content:
+            partial_message += chunk.choices[0].delta.content
+            yield partial_message
+def chat_with_api_key(api_key, message, history, is_paused):
+    accumulated_message = ""
+    for partial_message in predict(message, history, api_key, is_paused):
+        if is_paused[0]:  # Check if paused
+            break
+        accumulated_message = partial_message
+        history.append((message, accumulated_message))
+        yield message, [[message, accumulated_message]]
+def transcribe(audio):
+    if audio is None:
+        return "No audio recorded."
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+def answer(transcription):
+    context = "You are a chatbot answering general questions"
+    result = qa_model(question=transcription, context=context)
+    return result['answer']
+def process_audio(audio):
+    if audio is None:
+        return "No audio recorded.", []
+    transcription = transcribe(audio)
+    answer_result = answer(transcription)
+    return transcription, [[transcription, answer_result]]
+def update_output(api_key, audio_input, state, is_paused):
+    if is_paused[0]:  # Check if paused
+        yield "", state  # Return current state without making changes
+    else:
+        message = transcribe(audio_input)
+        responses = chat_with_api_key(api_key, message, state, is_paused)
+        accumulated_response = ""
+        for response, updated_state in responses:
+            if is_paused[0]:  # Check if paused
+                break
+            accumulated_response = response
+            yield accumulated_response, updated_state
+def clear_all():
+    return None, "", []
+def toggle_pause(is_paused):
+    is_paused[0] = not is_paused[0]
+    return is_paused
+def update_button_label(is_paused):
+    return "Resume" if is_paused[0] else "Pause"
+with gr.Blocks() as demo:
+    answer_output = gr.Chatbot(label="Answer Result")
+    with gr.Row():
+        audio_input = gr.Audio(label="Audio Input", sources=["microphone"], type="numpy")
+        with gr.Column():
+            api_key = gr.Textbox(label="API Key", placeholder="Enter your API key", type="password")
+            transcription_output = gr.Textbox(label="Transcription")
+            clear_button = gr.Button("Clear")
+            pause_button = gr.Button("Pause")
+    state = gr.State([])
+    is_paused = gr.State([False])  # Using a list to hold the mutable pause state
+    audio_input.stop_recording(
+        fn=update_output,
+        inputs=[api_key, audio_input, state, is_paused],
+        outputs=[transcription_output, answer_output]
+    )
+    clear_button.click(
+        fn=clear_all,
+        inputs=[],
+        outputs=[audio_input, transcription_output, answer_output]
+    )
+    pause_button.click(
+        fn=toggle_pause,
+        inputs=[is_paused],
+        outputs=[is_paused]
+    ).then(
+        fn=update_button_label,
+        inputs=[is_paused],
+        outputs=[pause_button]
+    )
+demo.launch()

app copy 8.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import time
+import gradio as gr
+from transformers import pipeline
+import numpy as np
+from openai import OpenAI
+import threading
+import queue
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
+class PubSub:
+    def __init__(self):
+        self.subscribers = []
+    def subscribe(self, callback):
+        self.subscribers.append(callback)
+    def publish(self, message):
+        for subscriber in self.subscribers:
+            subscriber(message)
+def predict(message, history, api_key, is_paused, pubsub):
+    def run_prediction():
+        client = OpenAI(api_key=api_key)
+        history_openai_format = []
+        for human, assistant in history:
+            history_openai_format.append({"role": "user", "content": human})
+            history_openai_format.append({"role": "assistant", "content": assistant})
+        history_openai_format.append({"role": "user", "content": message})
+        response = client.chat.completions.create(
+            model='gpt-4o',
+            messages=history_openai_format,
+            temperature=1.0,
+            stream=True
+        )
+        partial_message = ""
+        for chunk in response:
+            if is_paused[0]:
+                while is_paused[0]:
+                    time.sleep(0.1)
+            if chunk.choices[0].delta.content:
+                partial_message += chunk.choices[0].delta.content
+                pubsub.publish(partial_message)
+    thread = threading.Thread(target=run_prediction)
+    thread.start()
+def chat_with_api_key(api_key, message, history, is_paused):
+    pubsub = PubSub()
+    result_queue = queue.Queue()
+    def update_message(partial_message):
+        result_queue.put(partial_message)
+    pubsub.subscribe(update_message)
+    predict(message, history, api_key, is_paused, pubsub)
+    while True:
+        try:
+            accumulated_message = result_queue.get(timeout=0.1)
+            history.append((message, accumulated_message))
+            yield message, [[message, accumulated_message]]
+        except queue.Empty:
+            if not any(thread.is_alive() for thread in threading.enumerate() if thread != threading.current_thread()):
+                break
+def transcribe(audio):
+    if audio is None:
+        return "No audio recorded."
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+def answer(transcription):
+    context = "You are a chatbot answering general questions"
+    result = qa_model(question=transcription, context=context)
+    return result['answer']
+def process_audio(audio):
+    if audio is None:
+        return "No audio recorded.", []
+    transcription = transcribe(audio)
+    answer_result = answer(transcription)
+    return transcription, [[transcription, answer_result]]
+def update_output(api_key, audio_input, state, is_paused):
+    if is_paused[0]:
+        yield "", state
+    else:
+        message = transcribe(audio_input)
+        responses = chat_with_api_key(api_key, message, state, is_paused)
+        for response, updated_state in responses:
+            if is_paused[0]:
+                break
+            yield response, updated_state
+def clear_all():
+    return None, "", []
+def toggle_pause(is_paused):
+    is_paused[0] = not is_paused[0]
+    return is_paused
+def update_button_label(is_paused):
+    return "Resume" if is_paused[0] else "Pause"
+with gr.Blocks() as demo:
+    answer_output = gr.Chatbot(label="Answer Result")
+    with gr.Row():
+        audio_input = gr.Audio(label="Audio Input", sources=["microphone"], type="numpy")
+        with gr.Column():
+            api_key = gr.Textbox(label="API Key", placeholder="Enter your API key", type="password")
+            transcription_output = gr.Textbox(label="Transcription")
+            clear_button = gr.Button("Clear")
+            pause_button = gr.Button("Pause")
+    state = gr.State([])
+    is_paused = gr.State([False])
+    audio_input.stop_recording(
+        fn=update_output,
+        inputs=[api_key, audio_input, state, is_paused],
+        outputs=[transcription_output, answer_output]
+    )
+    clear_button.click(
+        fn=clear_all,
+        inputs=[],
+        outputs=[audio_input, transcription_output, answer_output]
+    )
+    pause_button.click(
+        fn=toggle_pause,
+        inputs=[is_paused],
+        outputs=[is_paused]
+    ).then(
+        fn=update_button_label,
+        inputs=[is_paused],
+        outputs=[pause_button]
+    )
+demo.launch()

app copy 9.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import time
+import gradio as gr
+from transformers import pipeline
+import numpy as np
+from openai import OpenAI
+import threading
+import queue
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
+class PubSub:
+    def __init__(self):
+        self.subscribers = []
+    def subscribe(self, callback):
+        self.subscribers.append(callback)
+    def publish(self, message):
+        for subscriber in self.subscribers:
+            subscriber(message)
+def predict(message, history, api_key, is_paused, pubsub):
+    def run_prediction():
+        client = OpenAI(api_key=api_key)
+        history_openai_format = []
+        for human, assistant in history:
+            history_openai_format.append({"role": "user", "content": human})
+            history_openai_format.append({"role": "assistant", "content": assistant})
+        history_openai_format.append({"role": "user", "content": message})
+        response = client.chat.completions.create(
+            model='gpt-4o',
+            messages=history_openai_format,
+            temperature=1.0,
+            stream=True
+        )
+        partial_message = ""
+        for chunk in response:
+            if is_paused[0]:
+                while is_paused[0]:
+                    time.sleep(0.1)
+            if chunk.choices[0].delta.content:
+                partial_message += chunk.choices[0].delta.content
+                pubsub.publish(partial_message)
+    thread = threading.Thread(target=run_prediction)
+    thread.start()
+def chat_with_api_key(api_key, message, history, is_paused):
+    pubsub = PubSub()
+    result_queue = queue.Queue()
+    def update_message(partial_message):
+        result_queue.put(partial_message)
+    pubsub.subscribe(update_message)
+    predict(message, history, api_key, is_paused, pubsub)
+    while True:
+        try:
+            accumulated_message = result_queue.get(timeout=0.1)
+            history.append((message, accumulated_message))
+            yield message, [[message, accumulated_message]]
+        except queue.Empty:
+            if not any(thread.is_alive() for thread in threading.enumerate() if thread != threading.current_thread()):
+                break
+def transcribe(audio):
+    if audio is None:
+        return "No audio recorded."
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+def answer(transcription):
+    context = "You are a chatbot answering general questions"
+    result = qa_model(question=transcription, context=context)
+    return result['answer']
+def process_audio(audio):
+    if audio is None:
+        return "No audio recorded.", []
+    transcription = transcribe(audio)
+    answer_result = answer(transcription)
+    return transcription, [[transcription, answer_result]]
+def update_output(api_key, audio_input, state, is_paused):
+    if is_paused[0]:
+        yield "", state
+    else:
+        message = transcribe(audio_input)
+        responses = chat_with_api_key(api_key, message, state, is_paused)
+        for response, updated_state in responses:
+            if is_paused[0]:
+                break
+            yield response, updated_state
+def clear_all():
+    return None, "", []
+def toggle_pause(is_paused):
+    is_paused[0] = not is_paused[0]
+    return is_paused
+def update_button_label(is_paused):
+    return "Resume" if is_paused[0] else "Pause"
+with gr.Blocks() as demo:
+    gr.HTML("""
+    <script>
+    function ensureScrollable() {
+        var chatbox = document.querySelector('.chatbot');
+        if (chatbox) {
+            chatbox.style.overflowY = 'auto';
+            chatbox.style.maxHeight = '300px';
+        }
+    }
+    function scrollToBottom() {
+        var chatbox = document.querySelector('.chatbot');
+        if (chatbox) {
+            chatbox.scrollTop = chatbox.scrollHeight;
+        }
+    }
+    function setupScrolling() {
+        ensureScrollable();
+        setInterval(scrollToBottom, 100);
+    }
+    if (document.readyState === 'loading') {
+        document.addEventListener('DOMContentLoaded', setupScrolling);
+    } else {
+        setupScrolling();
+    }
+    </script>
+    """)
+    answer_output = gr.Chatbot(label="Answer Result", height=300)
+    with gr.Row():
+        audio_input = gr.Audio(label="Audio Input", sources=["microphone"], type="numpy")
+        with gr.Column():
+            api_key = gr.Textbox(label="API Key", placeholder="Enter your API key", type="password")
+            transcription_output = gr.Textbox(label="Transcription")
+            clear_button = gr.Button("Clear")
+            pause_button = gr.Button("Pause")
+    state = gr.State([])
+    is_paused = gr.State([False])
+    audio_input.stop_recording(
+        fn=update_output,
+        inputs=[api_key, audio_input, state, is_paused],
+        outputs=[transcription_output, answer_output]
+    )
+    clear_button.click(
+        fn=clear_all,
+        inputs=[],
+        outputs=[audio_input, transcription_output, answer_output]
+    )
+    pause_button.click(
+        fn=toggle_pause,
+        inputs=[is_paused],
+        outputs=[is_paused]
+    ).then(
+        fn=update_button_label,
+        inputs=[is_paused],
+        outputs=[pause_button]
+    )
+demo.launch()

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 from transformers import pipeline
 import numpy as np
@@ -6,8 +7,7 @@ from openai import OpenAI
 transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
-def predict(message, history, api_key):
-    print('in predict')
     client = OpenAI(api_key=api_key)
     history_openai_format = []
     for human, assistant in history:
@@ -24,19 +24,25 @@ def predict(message, history, api_key):
     partial_message = ""
     for chunk in response:
         if chunk.choices[0].delta.content:
-            print(111, chunk.choices[0].delta.content)
             partial_message += chunk.choices[0].delta.content
             yield partial_message
-def chat_with_api_key(api_key, message, history):
-    print('in chat_with_api_key')
     accumulated_message = ""
-    for partial_message in predict(message, history, api_key):
         accumulated_message = partial_message
         history.append((message, accumulated_message))
-        # yield accumulated_message, history
-        yield message,[[message, accumulated_message]]
 def transcribe(audio):
     if audio is None:
@@ -44,7 +50,6 @@ def transcribe(audio):
     sr, y = audio
     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
     return transcriber({"sampling_rate": sr, "raw": y})["text"]
 def answer(transcription):
@@ -59,18 +64,29 @@ def process_audio(audio):
     answer_result = answer(transcription)
     return transcription, [[transcription, answer_result]]
-def update_output(api_key, audio_input, state):
-    print('in update_output')
-    message = transcribe(audio_input)
-    responses = chat_with_api_key(api_key, message, state)
-    accumulated_response = ""
-    for response, updated_state in responses:
-        accumulated_response = response
-        yield accumulated_response, updated_state
 def clear_all():
     return None, "", []
 with gr.Blocks() as demo:
     answer_output = gr.Chatbot(label="Answer Result")
     with gr.Row():
@@ -79,19 +95,16 @@ with gr.Blocks() as demo:
             api_key = gr.Textbox(label="API Key", placeholder="Enter your API key", type="password")
             transcription_output = gr.Textbox(label="Transcription")
             clear_button = gr.Button("Clear")
     state = gr.State([])
-    if 1:
-        audio_input.stop_recording(
-            fn=update_output,
-            inputs=[api_key, audio_input, state],
-            outputs=[transcription_output, answer_output]
-        )
-    if 0:
-        audio_input.stop_recording(
-            fn=process_audio,
-            inputs=[audio_input],
-            outputs=[transcription_output, answer_output]
-        )
     clear_button.click(
         fn=clear_all,
@@ -99,5 +112,14 @@ with gr.Blocks() as demo:
         outputs=[audio_input, transcription_output, answer_output]
     )
 demo.launch()

+import time
 import gradio as gr
 from transformers import pipeline
 import numpy as np
 transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
+def predict(message, history, api_key, is_paused):
     client = OpenAI(api_key=api_key)
     history_openai_format = []
     for human, assistant in history:
     partial_message = ""
     for chunk in response:
+        print(is_paused)
+        if is_paused[0]:  # Check if paused
+            while is_paused[0]:
+                print('paused')
+                time.sleep(0.1)
+        print('not paused')
         if chunk.choices[0].delta.content:
             partial_message += chunk.choices[0].delta.content
             yield partial_message
+def chat_with_api_key(api_key, message, history, is_paused):
     accumulated_message = ""
+    for partial_message in predict(message, history, api_key, is_paused):
+        if is_paused[0]:  # Check if paused
+            break
         accumulated_message = partial_message
         history.append((message, accumulated_message))
+        yield message, [[message, accumulated_message]]
 def transcribe(audio):
     if audio is None:
     sr, y = audio
     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
     return transcriber({"sampling_rate": sr, "raw": y})["text"]
 def answer(transcription):
     answer_result = answer(transcription)
     return transcription, [[transcription, answer_result]]
+def update_output(api_key, audio_input, state, is_paused):
+    if is_paused[0]:  # Check if paused
+        yield "", state  # Return current state without making changes
+    else:
+        message = transcribe(audio_input)
+        responses = chat_with_api_key(api_key, message, state, is_paused)
+        accumulated_response = ""
+        for response, updated_state in responses:
+            if is_paused[0]:  # Check if paused
+                break
+            accumulated_response = response
+            yield accumulated_response, updated_state
 def clear_all():
     return None, "", []
+def toggle_pause(is_paused):
+    is_paused[0] = not is_paused[0]
+    return is_paused
+def update_button_label(is_paused):
+    return "Resume" if is_paused[0] else "Pause"
 with gr.Blocks() as demo:
     answer_output = gr.Chatbot(label="Answer Result")
     with gr.Row():
             api_key = gr.Textbox(label="API Key", placeholder="Enter your API key", type="password")
             transcription_output = gr.Textbox(label="Transcription")
             clear_button = gr.Button("Clear")
+            pause_button = gr.Button("Pause")
     state = gr.State([])
+    is_paused = gr.State([False])  # Using a list to hold the mutable pause state
+    audio_input.stop_recording(
+        fn=update_output,
+        inputs=[api_key, audio_input, state, is_paused],
+        outputs=[transcription_output, answer_output]
+    )
     clear_button.click(
         fn=clear_all,
         outputs=[audio_input, transcription_output, answer_output]
     )
+    pause_button.click(
+        fn=toggle_pause,
+        inputs=[is_paused],
+        outputs=[is_paused]
+    ).then(
+        fn=update_button_label,
+        inputs=[is_paused],
+        outputs=[pause_button]
+    )
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 aiofiles==23.2.1
 altair==5.3.0
 annotated-types==0.7.0
 anyio==4.4.0
@@ -9,6 +11,8 @@ click==8.1.7
 colorama==0.4.6
 contourpy==1.2.1
 cycler==0.12.1
 dnspython==2.6.1
 email_validator==2.2.0
 fastapi==0.111.1
@@ -16,9 +20,11 @@ fastapi-cli==0.0.4
 ffmpy==0.3.2
 filelock==3.15.4
 fonttools==4.53.1
 fsspec==2024.6.1
 gradio==4.29.0
 gradio_client==0.16.1
 h11==0.14.0
 httpcore==1.0.5
 httptools==0.6.1
@@ -28,17 +34,28 @@ idna==3.7
 importlib_resources==6.4.0
 intel-openmp==2021.4.0
 Jinja2==3.1.4
 jsonschema==4.23.0
 jsonschema-specifications==2023.12.1
 kiwisolver==1.4.5
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
 matplotlib==3.9.1
 mdurl==0.1.2
 mkl==2021.4.0
 mpmath==1.3.0
 networkx==3.3
 numpy==1.26.4
 orjson==3.10.6
 packaging==24.1
 pandas==2.2.2
@@ -48,6 +65,7 @@ pydantic_core==2.20.1
 pydub==0.25.1
 Pygments==2.18.0
 pyparsing==3.1.2
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 python-multipart==0.0.9
@@ -65,9 +83,11 @@ shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.1
 SpeechRecognition==3.10.4
 starlette==0.37.2
 sympy==1.13.0
 tbb==2021.13.0
 tokenizers==0.19.1
 tomlkit==0.12.0
 toolz==0.12.1
@@ -76,9 +96,11 @@ torchaudio==2.3.1
 tqdm==4.66.4
 transformers==4.42.4
 typer==0.12.3
 typing_extensions==4.12.2
 tzdata==2024.1
 urllib3==2.2.2
 uvicorn==0.30.1
 watchfiles==0.22.0
 websockets==11.0.3

 aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
 altair==5.3.0
 annotated-types==0.7.0
 anyio==4.4.0
 colorama==0.4.6
 contourpy==1.2.1
 cycler==0.12.1
+dataclasses-json==0.6.7
+distro==1.9.0
 dnspython==2.6.1
 email_validator==2.2.0
 fastapi==0.111.1
 ffmpy==0.3.2
 filelock==3.15.4
 fonttools==4.53.1
+frozenlist==1.4.1
 fsspec==2024.6.1
 gradio==4.29.0
 gradio_client==0.16.1
+greenlet==3.0.3
 h11==0.14.0
 httpcore==1.0.5
 httptools==0.6.1
 importlib_resources==6.4.0
 intel-openmp==2021.4.0
 Jinja2==3.1.4
+jsonpatch==1.33
+jsonpointer==3.0.0
 jsonschema==4.23.0
 jsonschema-specifications==2023.12.1
 kiwisolver==1.4.5
+langchain==0.2.8
+langchain-community==0.2.7
+langchain-core==0.2.20
+langchain-text-splitters==0.2.2
+langsmith==0.1.88
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
+marshmallow==3.21.3
 matplotlib==3.9.1
 mdurl==0.1.2
 mkl==2021.4.0
 mpmath==1.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
 networkx==3.3
 numpy==1.26.4
+openai==1.35.14
 orjson==3.10.6
 packaging==24.1
 pandas==2.2.2
 pydub==0.25.1
 Pygments==2.18.0
 pyparsing==3.1.2
+Pypubsub==4.0.3
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 python-multipart==0.0.9
 six==1.16.0
 sniffio==1.3.1
 SpeechRecognition==3.10.4
+SQLAlchemy==2.0.31
 starlette==0.37.2
 sympy==1.13.0
 tbb==2021.13.0
+tenacity==8.5.0
 tokenizers==0.19.1
 tomlkit==0.12.0
 toolz==0.12.1
 tqdm==4.66.4
 transformers==4.42.4
 typer==0.12.3
+typing-inspect==0.9.0
 typing_extensions==4.12.2
 tzdata==2024.1
 urllib3==2.2.2
 uvicorn==0.30.1
 watchfiles==0.22.0
 websockets==11.0.3
+yarl==1.9.4