mvkvc commited on
Commit
6fe4e50
1 Parent(s): f009e61

Add streaming toggle

Browse files
Files changed (6) hide show
  1. README.md +1 -1
  2. app.py +34 -24
  3. poetry.lock +0 -0
  4. requirements.txt +0 -0
  5. sh/reqs.sh +1 -1
  6. sh/start.sh +1 -1
README.md CHANGED
@@ -7,7 +7,7 @@ sdk: gradio
7
  sdk_version: 4.24.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: 'Join the network at: https://replicantzk.com.'
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference.
 
7
  sdk_version: 4.24.0
8
  app_file: app.py
9
  pinned: false
10
+ short_description: 'Learn more at https://replicantzk.com.'
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference.
app.py CHANGED
@@ -1,13 +1,22 @@
 
1
  from openai import AsyncOpenAI
2
  import gradio as gr
3
 
4
- client = AsyncOpenAI()
 
 
 
 
 
 
 
 
5
 
6
- async def predict(message, history, model, temperature):
7
  history_openai_format = []
8
  for human, assistant in history:
9
  history_openai_format.append({"role": "user", "content": human})
10
  history_openai_format.append({"role": "assistant", "content": assistant})
 
11
  history_openai_format.append({"role": "user", "content": message})
12
 
13
  try:
@@ -15,31 +24,32 @@ async def predict(message, history, model, temperature):
15
  model=model,
16
  messages=history_openai_format,
17
  temperature=temperature,
18
- stream=True,
19
  )
20
 
21
- partial_message = ""
22
- async for chunk in response:
23
- if chunk.choices[0].delta.content is not None:
24
- partial_message += chunk.choices[0].delta.content
25
- yield partial_message
 
 
 
26
 
27
  except Exception as e:
28
- raise gr.Error(str(e))
29
-
30
- model_dropdown = gr.Dropdown(
31
- label="Model",
32
- choices=[
33
- "llama3:8b-instruct-q4_K_M",
34
- "codestral:22b-v0.1-q4_K_M",
35
- ],
36
- value="llama3:8b-instruct-q4_K_M",
 
 
 
37
  )
38
- temperature_slider = gr.Slider(0, 1, value=0, label="Temperature")
39
- api_key = gr.Textbox(label="OpenAI API Key")
40
-
41
- desc = "Learn more and join here: https://replicantzk.com."
42
 
43
- gr.ChatInterface(
44
- predict, additional_inputs=[model_dropdown, temperature_slider], description=desc
45
- ).launch()
 
1
+ import os
2
  from openai import AsyncOpenAI
3
  import gradio as gr
4
 
5
+ default_model = "llama3:8b-instruct-q4_K_M"
6
+ models = ["llama3:8b-instruct-q4_K_M", "codestral:22b-v0.1-q4_K_M"]
7
+ description = "Learn more at https://replicantzk.com."
8
+ base_url = os.getenv("OPENAI_BASE_URL") or "https://platform.replicantzk.com"
9
+ api_key = os.getenv("OPENAI_API_KEY")
10
+
11
+
12
+ async def predict(message, history, model, temperature, stream, base_url, api_key):
13
+ client = AsyncOpenAI(base_url=base_url, api_key=api_key)
14
 
 
15
  history_openai_format = []
16
  for human, assistant in history:
17
  history_openai_format.append({"role": "user", "content": human})
18
  history_openai_format.append({"role": "assistant", "content": assistant})
19
+
20
  history_openai_format.append({"role": "user", "content": message})
21
 
22
  try:
 
24
  model=model,
25
  messages=history_openai_format,
26
  temperature=temperature,
27
+ stream=stream,
28
  )
29
 
30
+ if stream:
31
+ partial_message = ""
32
+ async for chunk in response:
33
+ if chunk.choices[0].delta.content is not None:
34
+ partial_message += chunk.choices[0].delta.content
35
+ yield partial_message
36
+ else:
37
+ yield response.choices[0].message.content
38
 
39
  except Exception as e:
40
+ raise gr.Error(str(e))
41
+
42
+
43
+ model = gr.Dropdown(label="Model", choices=models, value=default_model)
44
+ temperature = gr.Slider(0, 1, value=0, label="Temperature")
45
+ stream = gr.Checkbox(value=True, label="Stream")
46
+ base_url = gr.Textbox(label="OpenAI-compatible base URL", value=base_url)
47
+ api_key = gr.Textbox(label="OpenAI-compatible API key", type="password", value=api_key)
48
+ demo = gr.ChatInterface(
49
+ fn=predict,
50
+ additional_inputs=[model, temperature, stream, base_url, api_key],
51
+ description=description,
52
  )
 
 
 
 
53
 
54
+ if __name__ == "__main__":
55
+ demo.launch()
 
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
The diff for this file is too large to render. See raw diff
 
sh/reqs.sh CHANGED
@@ -1,3 +1,3 @@
1
  #! /bin/sh
2
 
3
- poetry export -f requirements.txt --output requirements.txt
 
1
  #! /bin/sh
2
 
3
+ poetry export -f requirements.txt --output ./requirements.txt
sh/start.sh CHANGED
@@ -1,3 +1,3 @@
1
  #! /bin/sh
2
 
3
- poetry run python ./app.py
 
1
  #! /bin/sh
2
 
3
+ poetry run gradio ./app.py