import os from typing import List, Tuple import gradio as gr from openai import OpenAI client = OpenAI( base_url=f"{os.environ['BASE_URL']}/v1", api_key=os.environ["API_KEY"], ) def respond( message, history: List[Tuple[str, str]], conversational, max_tokens, ): messages = [] if conversational: for val in history[-2:]: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) completion = client.chat.completions.create( model="neongeckocom/NeonLLM", messages=messages, max_tokens=max_tokens, temperature=0, extra_body={ "repetition_penalty": 1.05, "use_beam_search": True, "best_of": 5, }, ) response = completion.choices[0].message.content return response demo = gr.ChatInterface( respond, additional_inputs=[ gr.Checkbox(value=True, label="conversational"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), ], title="NeonLLM (v2024-06-06)", concurrency_limit=5, ) if __name__ == "__main__": demo.launch()