LLaMA_3.1_Vision

Sleeping

App Files Files Community

SFP commited on Jul 31, 2024

Commit

7a1e0cc

verified ·

1 Parent(s): 1856462

Upload 7 files

Browse files

Files changed (7) hide show

GPTSimple.py +101 -0
app.py +75 -0
delete.svg +1 -0
requirements.txt +3 -0
retry.svg +1 -0
style.css +62 -0
vision.py +25 -0

GPTSimple.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import requests
+import json
+base_urls = {'deepinfra':"https://api.deepinfra.com/v1/openai/chat/completions", "openai":"https://api.openai.com/v1/chat/completions"}
+def print_token(token):
+    if token.token == None:
+        print()
+    else:
+        print(token.token, end="", flush=True)
+def get_direct_output(history, model, api_key, stream = False, base_url="openai"):
+    if base_url in base_urls:
+        url = base_urls[base_url]
+    else:
+        url = base_url
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    data = {
+        "model": model,
+        "stream":stream,
+        "messages": history
+    }
+    response = requests.post(url, json=data, headers=headers, stream=stream)
+    if stream:
+        return response
+    return response.json()
+class conversation:
+    class token:
+        def __init__(self, line):
+            if line['choices'][0]['finish_reason'] == "stop":
+                self.token = None
+                self.model = line["model"]
+                self.message = {'role':'assistant','content':None}
+                self.response = line
+            else:
+                self.token = line["choices"][0]['delta']['content']
+                self.model = line["model"]
+                self.message = line["choices"][0]['delta']
+                self.response = line
+    def streamingResponse(self, lines, invis):
+        message = ""
+        iters = lines.iter_lines(decode_unicode=True)
+        for line in iters:
+            if 'data: ' not in line:
+                continue
+            line_js = json.loads(line.split('data: ')[1])
+            if line_js['choices'][0]['finish_reason'] == "stop":
+                if not invis:
+                    self.history.append({'role':'assistant', 'content':message})
+                yield self.token(line_js)
+                break
+            token = self.token(line_js)
+            message += token.token
+            yield token
+    class response:
+        def __init__(self, json):
+            self.response = json
+            self.model = json['model']
+            self.id = json['id']
+            self.choices = json['choices']
+            self.text = json['choices'][0]['message']['content']
+            self.message = json['choices'][0]['message']
+            self.usage = json['usage']
+            self.prompt_tokens = json['usage']['prompt_tokens']
+            self.output_tokens = json['usage']['completion_tokens']
+            self.total_tokens = json['usage']['total_tokens']
+    def __init__(self, api_key='', model='gpt-3.5-turbo', history=None, system_prompt="You are a helpful assistant", base_url="openai"):
+        if base_url.lower() == "deepinfra" and model == "gpt-3.5-turbo":
+            model = "meta-llama/Llama-2-70b-chat-hf"
+        self.base_url = base_url.lower()
+        self.api_key = api_key
+        self.model = model
+        self.history = [{'role':'system',"content":system_prompt}]
+        if history is not None:
+            self.history = history
+    def generate(self, invisible=False, stream=False):
+        if stream:
+            res = self.streamingResponse(get_direct_output(self.history, self.model, self.api_key, stream=True, base_url=self.base_url), invisible)
+        else:
+            res = self.response(get_direct_output(self.history, self.model, self.api_key, base_url=self.base_url))
+            if not invisible:
+                self.history.append(res.message)
+        return res
+    def ask(self, message, invisible=False, stream=False):
+        if invisible:
+            out = self.history.copy()
+            out.append({"role":"user", "content":message})
+        else:
+            self.history.append({"role":"user", "content":message})
+            out = self.history
+        if stream:
+            res = self.streamingResponse(get_direct_output(out, self.model, self.api_key, stream=True, base_url=self.base_url), invisible)
+        else:
+            res = self.response(get_direct_output(out, self.model, self.api_key, base_url=self.base_url))
+            if not invisible:
+                self.history.append(res.message)
+        return res

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import gradio as gr
+import os
+import time
+import GPTSimple as ai
+import random
+import vision
+# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.
+DESC = "# LLaMA 3.1 Vision\n<p>LLaMA 3.1 Vision uses LLaMA 3.1 405B and Florence 2 to give vision to LLaMA</p>"
+def print_like_dislike(x: gr.LikeData):
+    print(x.index, x.value, x.liked)
+def add_message(history, message):
+    for x in message["files"]:
+        history.append(((x,), None))
+    if message["text"] is not None:
+        history.append((message["text"], None))
+    return history, gr.MultimodalTextbox(value=None, interactive=False)
+def bot(history):
+    his = [{"role": "system", "content": "you are a helpful assistant. you can\"see\" image that the user sends by the description being in [IMG][/IMG]. don't reference how you can only see a description"}]
+    nextone = ""
+    for i in history:
+        if isinstance(i[0], tuple):
+            nextone += "[IMG]" + vision.see_file(i[0][0]) + "[/IMG]\n"
+        else:
+            his.append({"role": "user", "content": nextone + i[0]})
+            nextone = ""
+        if i[1] is not None:
+            his.append({"role": "assistant", "content": i[1]})
+    chat = ai.conversation(base_url="deepinfra", model="meta-llama/Meta-Llama-3.1-405B-Instruct", history=his)
+    print(his)
+    stre = chat.generate(stream=True)
+    history[-1][1] = ""
+    for character in stre:
+        if character.token is not None:
+            history[-1][1] += character.token
+            yield history
+def clear_history():
+    return [], {"text":"", "files":[]}
+def retry_last(history):
+    history[-1][1]=None
+    res = bot(history)
+    for i in res:
+        yield i
+with gr.Blocks(fill_height=True, theme=gr.themes.Soft(), css="style.css") as demo:
+    gr.Markdown(DESC)
+    chatbot = gr.Chatbot(
+        elem_id="chatbot",
+        bubble_full_width=False,
+        scale=1,
+        show_label=False
+    )
+    with gr.Row():
+        dl = gr.Button("", icon="delete.svg")
+        chat_input = gr.MultimodalTextbox(interactive=True,
+                                          file_count="multiple",
+                                          placeholder="Enter message or upload file...", show_label=False)
+        re = gr.Button("", icon="retry.svg")
+    chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
+    bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
+    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
+    dl.click(clear_history, None, [chatbot, chat_input])
+    re.click(retry_last, [chatbot], chatbot)
+demo.queue()
+demo.launch()

delete.svg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+gradio_client
+requests

retry.svg ADDED Viewed

style.css ADDED Viewed

	@@ -0,0 +1,62 @@

+h1 {
+    text-align: center;
+}
+textarea {
+    border-radius: 32px;
+    margin-left: 10px;
+    margin-right: 10px
+}
+.scroll-hide.svelte-it7283 {
+    padding-top: 12px;
+}
+.upload-button.svelte-it7283, .submit-button.svelte-it7283 {
+    border-radius: 32px;
+    min-width: 42px;
+    height: 42px;
+    margin-bottom: 0px;
+}
+.flex-wrap.user.svelte-1ggj411.svelte-1ggj411 {
+    border-color: var(--color-accent-soft);
+    border-width: 2px;
+    border-radius: 21px;
+    border-bottom-right-radius: 0;
+    padding: 6px 15px;
+    border-color: var(--color-accent-soft);
+}
+:not(.component-wrap).flex-wrap.bot.svelte-1ggj411.svelte-1ggj411 {
+    border-color: var(--color-accent-soft);
+    border-width: 2px;
+    border-radius: 21px;
+    border-bottom-left-radius: 0;
+    padding: 6px 15px;
+}
+.lg.svelte-cmf5ev {
+    border-radius: 32px;
+    padding: 0px;
+    max-width: 50px;
+    min-width: 50px;
+    min-height: 50px;
+    max-height: 50px;
+    align-self: center;
+    align-content: center;
+    padding-left: 8px;
+    background-color: var(--block-background-fill);
+}
+gradio-app .gradio-container.gradio-container-4-39-0 .contain .pending.svelte-1gpwetz {
+    align-self: baseline;
+    background-color: transparent;
+    gap: 2px;
+    width: 25%;
+    height: 42px;
+    border-color: var(--color-accent-soft);
+    border-width: 2px;
+    border-radius: 21px !important;
+    border-bottom-left-radius: 0px !important;
+    margin-left: 29px;
+    padding: 0px;
+}

vision.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from gradio_client import Client, handle_file
+cache = {}
+client = Client("gokaygokay/Florence-2", verbose=False)
+def see_url(url):
+    result = client.predict(
+            image=handle_file(url),
+            task_prompt="More Detailed Caption",
+            text_input=None,
+            model_id="microsoft/Florence-2-large",
+            api_name="/process_image"
+    )
+    return(result[0].replace("{'<MORE_DETAILED_CAPTION>': '", "").replace("'}", ""))
+def see_file(fp):
+    if fp in cache:
+        return cache[fp]
+    result = client.predict(
+            image=handle_file(fp),
+            task_prompt="More Detailed Caption",
+            text_input=None,
+            model_id="microsoft/Florence-2-large",
+            api_name="/process_image"
+    )
+    r=result[0].replace("{'<MORE_DETAILED_CAPTION>': '", "").replace("'}", "")
+    cache[fp] = r
+    return r