zR commited on
Commit
83a2412
·
1 Parent(s): b485d38
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import subprocess
 
2
  # Installing flash_attn
3
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 
4
 
5
  from threading import Thread
6
  import spaces
@@ -27,7 +29,7 @@ class StopOnTokens(StoppingCriteria):
27
  return False
28
 
29
 
30
- @spaces.GPU(duration=120)
31
  def predict(history, prompt, max_length, top_p, temperature):
32
  stop = StopOnTokens()
33
  messages = []
@@ -80,10 +82,10 @@ with gr.Blocks() as demo:
80
  <a href="https://arxiv.org/pdf/2408.07055">📜 arxiv </a>
81
  </div>
82
  <div style="text-align: center; font-size: 15px; font-weight: bold; color: red; margin-bottom: 20px;">
83
- ⚠️ This is just a basic demo. Due to the scheduling limitations of Huggingface ZERO GPUs, the output
84
- length is restricted to under 4K per request (Otherwise may cause timeout ERROR).
85
- If you wish to experience the full capabilities of the model (output exceeding 10K),
86
- please deploy the model yourself.
87
  </div>
88
  """
89
  )
@@ -100,7 +102,8 @@ with gr.Blocks() as demo:
100
  pBtn = gr.Button("Set Prompt")
101
  with gr.Column(scale=1):
102
  emptyBtn = gr.Button("Clear History")
103
- max_length = gr.Slider(0, 128000, value=4096, step=1.0, label="Maximum length(Input + Output)", interactive=True)
 
104
  top_p = gr.Slider(0, 1, value=0.8, step=0.01, label="Top P", interactive=True)
105
  temperature = gr.Slider(0.01, 1, value=0.6, step=0.01, label="Temperature", interactive=True)
106
 
 
1
  import subprocess
2
+
3
  # Installing flash_attn
4
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
5
+ shell=True)
6
 
7
  from threading import Thread
8
  import spaces
 
29
  return False
30
 
31
 
32
+ @spaces.GPU(duration=300)
33
  def predict(history, prompt, max_length, top_p, temperature):
34
  stop = StopOnTokens()
35
  messages = []
 
82
  <a href="https://arxiv.org/pdf/2408.07055">📜 arxiv </a>
83
  </div>
84
  <div style="text-align: center; font-size: 15px; font-weight: bold; color: red; margin-bottom: 20px;">
85
+ ⚠️ Due to the limitations of Huggingface ZERO GPUs, in order to output 10K characters in one go,
86
+ we need to request a 5-minute quota each time.
87
+ This will result in you only being able to use it once every 4 hours.
88
+ If you plan to use it long-term, please consider deploying the model yourself.
89
  </div>
90
  """
91
  )
 
102
  pBtn = gr.Button("Set Prompt")
103
  with gr.Column(scale=1):
104
  emptyBtn = gr.Button("Clear History")
105
+ max_length = gr.Slider(0, 128000, value=4096, step=1.0, label="Maximum length(Input + Output)",
106
+ interactive=True)
107
  top_p = gr.Slider(0, 1, value=0.8, step=0.01, label="Top P", interactive=True)
108
  temperature = gr.Slider(0.01, 1, value=0.6, step=0.01, label="Temperature", interactive=True)
109