Spaces:
Sleeping
Sleeping
zR
commited on
Commit
·
83a2412
1
Parent(s):
b485d38
5min
Browse files
app.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import subprocess
|
|
|
2 |
# Installing flash_attn
|
3 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
|
|
4 |
|
5 |
from threading import Thread
|
6 |
import spaces
|
@@ -27,7 +29,7 @@ class StopOnTokens(StoppingCriteria):
|
|
27 |
return False
|
28 |
|
29 |
|
30 |
-
@spaces.GPU(duration=
|
31 |
def predict(history, prompt, max_length, top_p, temperature):
|
32 |
stop = StopOnTokens()
|
33 |
messages = []
|
@@ -80,10 +82,10 @@ with gr.Blocks() as demo:
|
|
80 |
<a href="https://arxiv.org/pdf/2408.07055">📜 arxiv </a>
|
81 |
</div>
|
82 |
<div style="text-align: center; font-size: 15px; font-weight: bold; color: red; margin-bottom: 20px;">
|
83 |
-
⚠️
|
84 |
-
|
85 |
-
|
86 |
-
please
|
87 |
</div>
|
88 |
"""
|
89 |
)
|
@@ -100,7 +102,8 @@ with gr.Blocks() as demo:
|
|
100 |
pBtn = gr.Button("Set Prompt")
|
101 |
with gr.Column(scale=1):
|
102 |
emptyBtn = gr.Button("Clear History")
|
103 |
-
max_length = gr.Slider(0, 128000, value=4096, step=1.0, label="Maximum length(Input + Output)",
|
|
|
104 |
top_p = gr.Slider(0, 1, value=0.8, step=0.01, label="Top P", interactive=True)
|
105 |
temperature = gr.Slider(0.01, 1, value=0.6, step=0.01, label="Temperature", interactive=True)
|
106 |
|
|
|
1 |
import subprocess
|
2 |
+
|
3 |
# Installing flash_attn
|
4 |
+
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
5 |
+
shell=True)
|
6 |
|
7 |
from threading import Thread
|
8 |
import spaces
|
|
|
29 |
return False
|
30 |
|
31 |
|
32 |
+
@spaces.GPU(duration=300)
|
33 |
def predict(history, prompt, max_length, top_p, temperature):
|
34 |
stop = StopOnTokens()
|
35 |
messages = []
|
|
|
82 |
<a href="https://arxiv.org/pdf/2408.07055">📜 arxiv </a>
|
83 |
</div>
|
84 |
<div style="text-align: center; font-size: 15px; font-weight: bold; color: red; margin-bottom: 20px;">
|
85 |
+
⚠️ Due to the limitations of Huggingface ZERO GPUs, in order to output 10K characters in one go,
|
86 |
+
we need to request a 5-minute quota each time.
|
87 |
+
This will result in you only being able to use it once every 4 hours.
|
88 |
+
If you plan to use it long-term, please consider deploying the model yourself.
|
89 |
</div>
|
90 |
"""
|
91 |
)
|
|
|
102 |
pBtn = gr.Button("Set Prompt")
|
103 |
with gr.Column(scale=1):
|
104 |
emptyBtn = gr.Button("Clear History")
|
105 |
+
max_length = gr.Slider(0, 128000, value=4096, step=1.0, label="Maximum length(Input + Output)",
|
106 |
+
interactive=True)
|
107 |
top_p = gr.Slider(0, 1, value=0.8, step=0.01, label="Top P", interactive=True)
|
108 |
temperature = gr.Slider(0.01, 1, value=0.6, step=0.01, label="Temperature", interactive=True)
|
109 |
|