Spaces:

alexbuz
/

interview_copilot

Sleeping

App Files Files Community

alex buz commited on Jul 17

Commit

a7368c8

•

1 Parent(s): 84ce934

new

Browse files

Files changed (6) hide show

.gitignore +1 -0
1t.py +17 -0
app copy.py +55 -0
app.py +66 -62
push.bat +3 -0
requirements.txt +84 -1

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ cache

1t.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+import torch
+# Load a GPT-2 model for general question answering
+tokenizer = AutoTokenizer.from_pretrained("gpt2-medium", cache_dir="./cache")
+model = AutoModelForCausalLM.from_pretrained("gpt2-medium", cache_dir="./cache")
+question = "What is the capital of France?"
+question = "List all US presidents in order of their presidency"
+input_ids = tokenizer.encode(f"Q: {question}\nA:", return_tensors="pt")
+# Generate a response
+with torch.no_grad():
+    output = model.generate(input_ids, max_length=150, num_return_sequences=1,
+                            temperature=0.7, top_k=50, top_p=0.95)
+response = tokenizer.decode(output[0], skip_special_tokens=True)
+print(response)

app copy.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import gradio as gr
+from transformers import pipeline
+import numpy as np
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
+def transcribe(audio):
+    if audio is None:
+        return "No audio recorded."
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+def answer(transcription):
+    context = "You are chatbot answering general questions"
+    print(transcription)
+    result = qa_model(question=transcription, context=context)
+    print(result)
+    return result['answer']
+def process_audio(audio):
+    if audio is None:
+        return "No audio recorded.", ""
+    transcription = transcribe(audio)
+    answer_result = answer(transcription)
+    return transcription, answer_result
+def clear_all():
+    return None, "", ""
+with gr.Blocks() as demo:
+    gr.Markdown("# Audio Transcription and Question Answering")
+    audio_input = gr.Audio(label="Audio Input", sources=["microphone"], type="numpy")
+    transcription_output = gr.Textbox(label="Transcription")
+    answer_output = gr.Textbox(label="Answer Result")
+    clear_button = gr.Button("Clear")
+    audio_input.stop_recording(
+        fn=process_audio,
+        inputs=[audio_input],
+        outputs=[transcription_output, answer_output]
+    )
+    clear_button.click(
+        fn=clear_all,
+        inputs=[],
+        outputs=[audio_input, transcription_output, answer_output]
+    )
+demo.launch()

app.py CHANGED Viewed

@@ -1,63 +1,67 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+import numpy as np
+import torch
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+# Load a GPT-2 model for general question answering
+tokenizer = AutoTokenizer.from_pretrained("gpt2-medium", cache_dir="./cache")
+model = AutoModelForCausalLM.from_pretrained("gpt2-medium", cache_dir="./cache")
+def transcribe(audio):
+    if audio is None:
+        return "No audio recorded."
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+def answer(question):
+    input_ids = tokenizer.encode(f"Q: {question}\nA:", return_tensors="pt")
+    # Generate a response
+    with torch.no_grad():
+        output = model.generate(input_ids, max_length=150, num_return_sequences=1,
+                                temperature=0.7, top_k=50, top_p=0.95)
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Extract only the answer part
+    answer = response.split("A:")[-1].strip()
+    print(answer)
+    return response
+def process_audio(audio):
+    if audio is None:
+        return "No audio recorded.", ""
+    transcription = transcribe(audio)
+    answer_result = answer(transcription)
+    return transcription, answer_result
+def clear_all():
+    return None, "", ""
+with gr.Blocks() as demo:
+    gr.Markdown("# Audio Transcription and Question Answering")
+    audio_input = gr.Audio(label="Audio Input", sources=["microphone"], type="numpy")
+    transcription_output = gr.Textbox(label="Transcription")
+    answer_output = gr.Textbox(label="Answer Result", lines=10)
+    clear_button = gr.Button("Clear")
+    audio_input.stop_recording(
+        fn=process_audio,
+        inputs=[audio_input],
+        outputs=[transcription_output, answer_output]
+    )
+    clear_button.click(
+        fn=clear_all,
+        inputs=[],
+        outputs=[audio_input, transcription_output, answer_output]
+    )
+demo.launch()

push.bat ADDED Viewed

	@@ -0,0 +1,3 @@

+git add .
+git commit -m "%1"
+git push

requirements.txt CHANGED Viewed

	@@ -1 +1,84 @@
1	- ~~huggingface_hub~~==0.22.2

+aiofiles==23.2.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+attrs==23.2.0
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+contourpy==1.2.1
+cycler==0.12.1
+dnspython==2.6.1
+email_validator==2.2.0
+fastapi==0.111.1
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.15.4
+fonttools==4.53.1
+fsspec==2024.6.1
+gradio==4.29.0
+gradio_client==0.16.1
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.5
+idna==3.7
+importlib_resources==6.4.0
+intel-openmp==2021.4.0
+Jinja2==3.1.4
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.1
+mdurl==0.1.2
+mkl==2021.4.0
+mpmath==1.3.0
+networkx==3.3
+numpy==1.26.4
+orjson==3.10.6
+packaging==24.1
+pandas==2.2.2
+pillow==10.4.0
+pydantic==2.8.2
+pydantic_core==2.20.1
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.19.0
+ruff==0.5.2
+safetensors==0.4.3
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+SpeechRecognition==3.10.4
+starlette==0.37.2
+sympy==1.13.0
+tbb==2021.13.0
+tokenizers==0.19.1
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.3.1
+torchaudio==2.3.1
+tqdm==4.66.4
+transformers==4.42.4
+typer==0.12.3
+typing_extensions==4.12.2
+tzdata==2024.1
+urllib3==2.2.2
+uvicorn==0.30.1
+watchfiles==0.22.0
+websockets==11.0.3