Spaces:
Running
Running
| import os | |
| import openai | |
| import torch | |
| import gradio as gr | |
| import pytube as pt | |
| from transformers import pipeline | |
| from huggingface_hub import model_info | |
| openai.api_key = os.getenv('OPEN_AI_KEY') | |
| hf_t_key = ('HF_TOKEN_KEY') | |
| MODEL_NAME = "openai/whisper-small" | |
| lang = "en" | |
| device = 0 if torch.cuda.is_available() else "cpu" | |
| pipe = pipeline( | |
| task="automatic-speech-recognition", | |
| model=MODEL_NAME, | |
| chunk_length_s=30, | |
| device=device, | |
| ) | |
| pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe") | |
| def transcribe(microphone, file_upload): | |
| warn_output = "" | |
| if (microphone is not None) and (file_upload is not None): | |
| warn_output = ( | |
| "WARNING: You've uploaded a recorded audio file . " | |
| "The recorded file from the microphone uploaded, transcribed and immediately discarded.\n" | |
| ) | |
| elif (microphone is None) and (file_upload is None): | |
| return "ERROR: You have to either use the microphone or upload an audio file" | |
| file = microphone if microphone is not None else file_upload | |
| text = pipe(file)["text"] | |
| return warn_output + text | |
| def _return_yt_html_embed(yt_url): | |
| video_id = yt_url.split("?v=")[-1] | |
| HTML_str = ( | |
| f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>' | |
| " </center>" | |
| ) | |
| return HTML_str | |
| def yt_transcribe(yt_url): | |
| yt = pt.YouTube(yt_url) | |
| html_embed_str = _return_yt_html_embed(yt_url) | |
| stream = yt.streams.filter(only_audio=True)[0] | |
| stream.download(filename="audio.mp3") | |
| text = pipe("audio.mp3")["text"] | |
| return html_embed_str, text | |
| def predict(message, history): | |
| history_openai_format = [] | |
| for human, assistant in history: | |
| history_openai_format.append({"role": "user", "content": human }) | |
| history_openai_format.append({"role": "assistant", "content": assistant}) | |
| history_openai_format.append({"role": "user", "content": message}) | |
| response = openai.ChatCompletion.create( | |
| model='ft:gpt-3.5-turbo-1106:2292030-peach-tech::8cxzbHH4', | |
| messages= history_openai_format, | |
| temperature=1.0, | |
| stream=True | |
| ) | |
| partial_message = "" | |
| for chunk in response: | |
| if len(chunk['choices'][0]['delta']) != 0: | |
| partial_message = partial_message + chunk['choices'][0]['delta']['content'] | |
| yield partial_message | |
| A1 = gr.ChatInterface(predict, | |
| title="COLLEAGUE", | |
| description="The Consummate AI Productivity Companion Suite for Students, Freelancers, Entrepreneurs, and Professionals that Chats, Writes, Transcribes, and Creates, Created By Peach State Innovation and Technology. Select Tab For Accessibility", | |
| textbox=gr.Textbox(placeholder="Enter your question/prompt here..."), | |
| theme= gr.themes.Glass(primary_hue="neutral", neutral_hue="slate"), | |
| retry_btn=None, | |
| clear_btn="Clear Conversation") | |
| A3 = gr.load( | |
| "models/Salesforce/blip-image-captioning-large", | |
| title=" ", | |
| description="Upload Any Type of Imagery (photos, medical imagery, etc.), I'll Give You Its Description", | |
| outputs=[gr.Textbox(label="I see...")], | |
| theme= gr.themes.Glass(primary_hue="neutral", neutral_hue="slate")) | |
| A4 = gr.load( | |
| "models/stabilityai/stable-diffusion-xl-base-1.0", | |
| inputs=[gr.Textbox(label="Enter Your Image Description")], | |
| outputs=[gr.Image(label="Image")], | |
| title=" ", | |
| description="Bring Your Imagination Into Existence, Create Unique Images With COLLEAGUE", | |
| allow_flagging="never", | |
| examples=["A gigantic celtic leprechaun wandering the streets of downtown Atlanta","A child eating pizza in a Brazilian favela"]) | |
| A5 = gr.HTML( | |
| value=(""" | |
| <iframe | |
| src="https://peachtechai-colleague-scribe.hf.space" | |
| frameborder="0" | |
| width="1250" | |
| height="1450" | |
| ></iframe>"""), | |
| ) | |
| mf_transcribe = gr.Interface( | |
| fn=transcribe, | |
| inputs=[ | |
| gr.Microphone(type="filepath"), | |
| gr.Audio(type="filepath"), | |
| ], | |
| outputs="text", | |
| title=" ", | |
| description=( | |
| "Transcribe real-time speech and audio files of any length at the click of a button." | |
| ), | |
| allow_flagging="never", | |
| ) | |
| yt_transcribe = gr.Interface( | |
| fn=yt_transcribe, | |
| inputs=[gr.Textbox(lines=1, placeholder="Paste your YouTube video URL/web address here", label="YouTube Video URL")], | |
| outputs=["html", "text"], | |
| title=" ", | |
| description=( | |
| "Transcribe YouTube videos at the click of a button." | |
| ), | |
| allow_flagging="never", | |
| ) | |
| clp = gr.TabbedInterface([A1, A5, mf_transcribe, yt_transcribe, A3, A4], ["Chat", "Write", "Transcribe", "Transcribe YouTube Videos", "Describe", "Create"], theme= gr.themes.Glass(primary_hue="neutral", neutral_hue="slate")) | |
| clp.queue().launch() |