File size: 5,327 Bytes
15fc611
78e2d06
13130e9
82cb152
93cf2fd
78e2d06
93cf2fd
 
 
308ddec
aba8ba9
e30edef
78e2d06
6bc3f87
df44802
93cf2fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36f02d2
93cf2fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78e2d06
 
 
 
ce9a8c3
b2a615a
78e2d06
 
7d6200b
78e2d06
 
 
 
 
 
 
 
 
 
 
 
e30edef
2ecfb53
129f965
98f29e9
78e2d06
8af3424
78e2d06
21056ba
 
30af5b2
91de607
f8bd239
fd67f34
98f29e9
78e2d06
21056ba
9249141
38e745a
35fcfc0
91de607
d92b130
35fcfc0
21056ba
7b72dca
65cbee0
f6bb188
 
 
 
5d8def8
f6bb188
 
197f17c
8ad7151
d35bcd9
 
 
 
 
94faa55
ba19106
d35bcd9
 
26dc0a6
93cf2fd
 
 
7aa87d8
a5bf1be
93cf2fd
 
 
 
af891d0
93cf2fd
 
 
 
 
 
ddfbd9a
93cf2fd
 
 
c389298
93cf2fd
 
 
 
 
ec76b3d
93cf2fd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
import openai
import torch
import urllib

import gradio as gr
import pytube as pt
from transformers import pipeline
from huggingface_hub import model_info

openai.api_key = os.getenv('OPEN_AI_KEY')
hf_t_key = ('HF_TOKEN_KEY')

MODEL_NAME = "openai/whisper-small" 
lang = "en"

device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")

def transcribe(microphone, file_upload):
    warn_output = ""
    if (microphone is not None) and (file_upload is not None):
        warn_output = (
            "WARNING: You've uploaded a recorded audio file . "
            "The recorded file from the microphone uploaded, transcribed and immediately discarded.\n"
        )

    elif (microphone is None) and (file_upload is None):
        return "ERROR: You have to either use the microphone or upload an audio file"

    file = microphone if microphone is not None else file_upload

    text = pipe(file)["text"]

    return warn_output + text


def _return_yt_html_embed(yt_url):
    video_id = yt_url.split("?v=")[-1]
    HTML_str = (
        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
        " </center>"
    )
    return HTML_str


def yt_transcribe(yt_url):
    yt = pt.YouTube(yt_url)
    html_embed_str = _return_yt_html_embed(yt_url)
    stream = yt.streams.filter(only_audio=True)[0]
    stream.download(filename="audio.mp3")

    text = pipe("audio.mp3")["text"]

    return html_embed_str, text


def predict(message, history):
    history_openai_format = []
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human })
        history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": message})

    response = openai.ChatCompletion.create(
        model= 'ft:gpt-4o-mini-2024-07-18:2292030-peach-tech:colleague-ai:9wqEywaW',
        messages= history_openai_format,
        temperature=1.0,
        stream=True
    )

    partial_message = ""
    for chunk in response:
        if len(chunk['choices'][0]['delta']) != 0:
            partial_message = partial_message + chunk['choices'][0]['delta']['content']
            yield partial_message

A1 = gr.ChatInterface(predict,
                title="COLLEAGUE",
                description="An All-In-One AI Productivity Suite By Peach State Innovation and Technology. Select The Corresponding Tab For Accessibility",
                textbox=gr.Textbox(placeholder="Enter your question/prompt here..."),
                theme= gr.themes.Glass(primary_hue="neutral", neutral_hue="slate"),
                retry_btn=None,
                clear_btn="Clear Conversation")


A3 = gr.load(
             "models/Salesforce/blip-image-captioning-large",
              title=" ",
              description="Upload Any Type of Imagery (photos, medical imagery, etc.), I'll Give You Its Description",
              outputs=[gr.Textbox(label="I see...")],
              theme= gr.themes.Glass(primary_hue="neutral", neutral_hue="slate"))

A4 = gr.load(
             "models/stabilityai/stable-diffusion-xl-base-1.0",
              inputs=[gr.Textbox(label="Enter Your Image Description")],
              outputs=[gr.Image(label="Image")],
              title=" ",
              description="Bring Your Imagination Into Existence, Create Unique Images With COLLEAGUE",
              allow_flagging="never", 
              examples=["A gigantic celtic leprechaun wandering the streets of downtown Atlanta","A child eating pizza in a Brazilian favela"])

A5 = gr.HTML(
            value=("""
                    <iframe
	                src="https://peachtechai-colleague-scribe.hf.space"
	                frameborder="0"
	                width="1245"
	                height="1450"
                    ></iframe>"""),
                 )

A6 = gr.HTML(
            value=("""
                   <iframe
	               src="https://peachtechai-text-summarization.hf.space"
	               frameborder="0"
	               width="1285"
	               height="1000"
                   ></iframe> """),
                 )
    
mf_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Microphone(type="filepath"),
        gr.Audio(type="filepath"),
    ],
    outputs="text",
    title=" ",
    description=(
        "Transcribe real-time speech and audio files of any length at the click of a button."
    ),
    allow_flagging="never",
)

yt_transcribe = gr.Interface(
    fn=yt_transcribe,
    inputs=[gr.Textbox(lines=1, placeholder="Paste your YouTube video URL/web address here", label="YouTube Video URL")],
    outputs=["html", "text"],
    title=" ",
    description=(
        "Short on Time? Get The Core Details and Transcribe YouTube videos at the click of a button."
      
    ),
    allow_flagging="never",
)

clp = gr.TabbedInterface([A1, A5, A6, mf_transcribe, A3, A4], ["Chat", "Write", "Summarize", "Audio Transcription", "Describe", "Create"], theme= gr.themes.Glass(primary_hue="neutral", neutral_hue="slate"))
clp.queue().launch()