Spaces:
Runtime error
Runtime error
File size: 5,364 Bytes
b5da0b6 dbb472d b5da0b6 dbb472d b5da0b6 de886af b5da0b6 dbb472d b5da0b6 de886af b5da0b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import gradio as gr
import os
import json
import requests
#Streaming endpoint
API_URL = "https://api.openai.com/v1/chat/completions" #os.getenv("API_URL") + "/generate_stream"
#Testing with my Open AI Key
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
def predict(inputs, top_p, temperature, openai_api_key, chat_counter, chatbot=[], history=[]): #repetition_penalty, top_k
payload = {
"model": "gpt-4",
"messages": [{"role": "user", "content": f"{inputs}"}],
"temperature" : 1.0,
"top_p":1.0,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai_api_key}"
}
print(f"chat_counter - {chat_counter}")
if chat_counter != 0 :
messages=[]
for data in chatbot:
temp1 = {}
temp1["role"] = "user"
temp1["content"] = data[0]
temp2 = {}
temp2["role"] = "assistant"
temp2["content"] = data[1]
messages.append(temp1)
messages.append(temp2)
temp3 = {}
temp3["role"] = "user"
temp3["content"] = inputs
messages.append(temp3)
#messages
payload = {
"model": "gpt-4",
"messages": messages, #[{"role": "user", "content": f"{inputs}"}],
"temperature" : temperature, #1.0,
"top_p": top_p, #1.0,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
chat_counter+=1
history.append(inputs)
print(f"payload is - {payload}")
# make a POST request to the API endpoint using the requests.post method, passing in stream=True
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
#response = requests.post(API_URL, headers=headers, json=payload, stream=True)
token_counter = 0
partial_words = ""
counter=0
for chunk in response.iter_lines():
#Skipping first chunk
if counter == 0:
counter+=1
continue
#counter+=1
# check whether each line is non-empty
if chunk.decode() :
chunk = chunk.decode()
# decode each line as response data is in bytes
if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
#if len(json.loads(chunk.decode()[6:])['choices'][0]["delta"]) == 0:
# break
partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
if token_counter == 0:
history.append(" " + partial_words)
else:
history[-1] = partial_words
chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
token_counter+=1
yield chat, history, chat_counter # resembles {chatbot: chat, state: history}
def reset_textbox():
return gr.update(value='')
title = """<h1 align="center">🔥GPT4 with ChatCompletions API +🚀Gradio-Streaming</h1>"""
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
```
User: <utterance>
Assistant: <utterance>
User: <utterance>
Assistant: <utterance>
...
```
In this app, you can explore the outputs of a gpt-4 LLM.
"""
with gr.Blocks(css = """#col_container {width: 1000px; margin-left: auto; margin-right: auto;}
#chatbot {height: 520px; overflow: auto;}""") as demo:
gr.HTML(title)
gr.HTML('''<center><a href="https://huggingface.co/spaces/ysharma/ChatGPT4?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your OpenAI API Key</center>''')
with gr.Column(elem_id = "col_container"):
openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
chatbot = gr.Chatbot(elem_id='chatbot') #c
inputs = gr.Textbox(placeholder= "Hi there!", label= "Type an input and press Enter") #t
state = gr.State([]) #s
b1 = gr.Button()
#inputs, top_p, temperature, top_k, repetition_penalty
with gr.Accordion("Parameters", open=False):
top_p = gr.Slider( minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider( minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
#top_k = gr.Slider( minimum=1, maximum=50, value=4, step=1, interactive=True, label="Top-k",)
#repetition_penalty = gr.Slider( minimum=0.1, maximum=3.0, value=1.03, step=0.01, interactive=True, label="Repetition Penalty", )
chat_counter = gr.Number(value=0, visible=False, precision=0)
inputs.submit( predict, [inputs, top_p, temperature, openai_api_key, chat_counter, chatbot, state], [chatbot, state, chat_counter],)
b1.click( predict, [inputs, top_p, temperature, openai_api_key, chat_counter, chatbot, state], [chatbot, state, chat_counter],)
b1.click(reset_textbox, [], [inputs])
inputs.submit(reset_textbox, [], [inputs])
#gr.Markdown(description)
demo.queue().launch(debug=True)
|