Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from openai import AsyncOpenAI | |
| base_url = "http://127.0.0.1:8080/v1" | |
| client = AsyncOpenAI(base_url=base_url, api_key="-") | |
| """ | |
| frequency_penalty: Optional[float] = None, | |
| logit_bias: Optional[List[float]] = None, | |
| logprobs: Optional[bool] = None, | |
| top_logprobs: Optional[int] = None, | |
| max_tokens: Optional[int] = None, | |
| n: Optional[int] = None, | |
| presence_penalty: Optional[float] = None, | |
| stream: bool = False, | |
| seed: Optional[int] = None, | |
| temperature: Optional[float] = None, | |
| top_p: Optional[float] = None, | |
| tools: Optional[List[Tool]] = None, | |
| tool_choice: Optional[str] = None, | |
| """ | |
| def _default_parameters(): | |
| return { | |
| "max_tokens": 256, | |
| "stream": True, | |
| "temperature": 0.9, | |
| } | |
| def _translate_messages(history): | |
| messages = [] | |
| for conv in history: | |
| messages.append({"role":"user", "content":conv[0]}) | |
| messages.append({"role":"assistant", "content":conv[1]}) | |
| return messages | |
| async def echo(message, history): | |
| parameters = _default_parameters() | |
| messages = _translate_messages(history) | |
| messages.append({"role":"user", "content":message}) | |
| responses = await client.chat.completions.create( | |
| model="tgi", messages=messages, **parameters | |
| ) | |
| full_resp = "" | |
| async for resp in responses: | |
| full_resp = full_resp + resp.choices[0].delta.content | |
| yield full_resp | |
| demo = gr.ChatInterface( | |
| fn=echo, | |
| examples=["hello", "how are you?", "What is Large Language Model?"], | |
| title="Space of Gradio β Text Generation Inference", | |
| multimodal=False | |
| ) | |
| demo.queue().launch(server_name="0.0.0.0", server_port=3000) |