chansung commited on
Commit
f9aa63c
β€’
1 Parent(s): 71388ca
Files changed (3) hide show
  1. app/gen/openllm.py +46 -0
  2. app/main.py +2 -51
  3. app/ui.py +13 -0
app/gen/openllm.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import AsyncOpenAI
2
+
3
+ base_url = "http://127.0.0.1:8080/v1"
4
+ client = AsyncOpenAI(base_url=base_url, api_key="-")
5
+
6
+ def _default_parameters():
7
+ """
8
+ frequency_penalty: Optional[float] = None,
9
+ logit_bias: Optional[List[float]] = None,
10
+ logprobs: Optional[bool] = None,
11
+ top_logprobs: Optional[int] = None,
12
+ max_tokens: Optional[int] = None,
13
+ n: Optional[int] = None,
14
+ presence_penalty: Optional[float] = None,
15
+ stream: bool = False,
16
+ seed: Optional[int] = None,
17
+ temperature: Optional[float] = None,
18
+ top_p: Optional[float] = None,
19
+ tools: Optional[List[Tool]] = None,
20
+ tool_choice: Optional[str] = None,
21
+ """
22
+ return {
23
+ "max_tokens": 256,
24
+ "stream": True,
25
+ "temperature": 0.9,
26
+ }
27
+
28
+ def ranslate_messages(history):
29
+ messages = []
30
+
31
+ for conv in history:
32
+ messages.append({"role":"user", "content":conv[0]})
33
+ messages.append({"role":"assistant", "content":conv[1]})
34
+
35
+ return messages
36
+
37
+ async def chat(messages, parameters=None):
38
+ if parameters is None:
39
+ parameters = _default_parameters()
40
+
41
+ responses = await client.chat.completions.create(
42
+ model="tgi", messages=messages, **parameters
43
+ )
44
+
45
+ async for resp in responses:
46
+ yield resp.choices[0].delta.content
app/main.py CHANGED
@@ -1,59 +1,10 @@
1
  import argparse
2
  import gradio as gr
3
- from openai import AsyncOpenAI
4
-
5
- base_url = "http://127.0.0.1:8080/v1"
6
- client = AsyncOpenAI(base_url=base_url, api_key="-")
7
-
8
- """
9
- frequency_penalty: Optional[float] = None,
10
- logit_bias: Optional[List[float]] = None,
11
- logprobs: Optional[bool] = None,
12
- top_logprobs: Optional[int] = None,
13
- max_tokens: Optional[int] = None,
14
- n: Optional[int] = None,
15
- presence_penalty: Optional[float] = None,
16
- stream: bool = False,
17
- seed: Optional[int] = None,
18
- temperature: Optional[float] = None,
19
- top_p: Optional[float] = None,
20
- tools: Optional[List[Tool]] = None,
21
- tool_choice: Optional[str] = None,
22
- """
23
-
24
- def _default_parameters():
25
- return {
26
- "max_tokens": 256,
27
- "stream": True,
28
- "temperature": 0.9,
29
- }
30
-
31
- def _translate_messages(history):
32
- messages = []
33
-
34
- for conv in history:
35
- messages.append({"role":"user", "content":conv[0]})
36
- messages.append({"role":"assistant", "content":conv[1]})
37
-
38
- return messages
39
-
40
- async def echo(message, history):
41
- parameters = _default_parameters()
42
- messages = _translate_messages(history)
43
- messages.append({"role":"user", "content":message})
44
-
45
- responses = await client.chat.completions.create(
46
- model="tgi", messages=messages, **parameters
47
- )
48
-
49
- full_resp = ""
50
- async for resp in responses:
51
- full_resp = full_resp + resp.choices[0].delta.content
52
- yield full_resp
53
 
54
  def main(args):
55
  demo = gr.ChatInterface(
56
- fn=echo,
57
  examples=["hello", "how are you?", "What is Large Language Model?"],
58
  title="Space of Gradio βž• Text Generation Inference",
59
  multimodal=False
 
1
  import argparse
2
  import gradio as gr
3
+ from app.ui import chat
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  def main(args):
6
  demo = gr.ChatInterface(
7
+ fn=chat,
8
  examples=["hello", "how are you?", "What is Large Language Model?"],
9
  title="Space of Gradio βž• Text Generation Inference",
10
  multimodal=False
app/ui.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.gen.openllm import (
2
+ chat,
3
+ translate_messages,
4
+ )
5
+
6
+ async def chat(message, history):
7
+ messages = translate_messages(history)
8
+ messages.append({"role":"user", "content":message})
9
+
10
+ full_resp = ""
11
+ async for resp in chat(messages):
12
+ full_resp = full_resp + resp
13
+ yield full_resp