Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -193,7 +193,7 @@ parser.add_argument(
|
|
193 |
parser.add_argument("--base-model-path", type=str, default="lmsys/vicuna-7b-v1.3",
|
194 |
help="path of basemodel, huggingface project or local path")
|
195 |
parser.add_argument(
|
196 |
-
"--load-in-8bit", action="
|
197 |
)
|
198 |
parser.add_argument(
|
199 |
"--load-in-4bit", action="store_true", help="Use 4-bit quantization"
|
@@ -233,7 +233,7 @@ with gr.Blocks(css=custom_css) as demo:
|
|
233 |
speed_box = gr.Textbox(label="Speed", elem_id="speed", interactive=False, value="0.00 tokens/s")
|
234 |
compression_box = gr.Textbox(label="Compression Ratio", elem_id="speed", interactive=False, value="0.00")
|
235 |
note1 = gr.Markdown(show_label=False, interactive=False,
|
236 |
-
value='''The Compression Ratio is defined as the number of generated tokens divided by the number of forward passes in the original LLM. The original LLM is Vicuna
|
237 |
note=gr.Markdown(show_label=False,interactive=False,value='''The tokens that EAGLE correctly guesses will be highlighted in orange. Note: This highlighting may lead to special formatting rendering issues in some instances, particularly when generating code.''')
|
238 |
|
239 |
|
|
|
193 |
parser.add_argument("--base-model-path", type=str, default="lmsys/vicuna-7b-v1.3",
|
194 |
help="path of basemodel, huggingface project or local path")
|
195 |
parser.add_argument(
|
196 |
+
"--load-in-8bit", action="store_flase", help="Use 8-bit quantization"
|
197 |
)
|
198 |
parser.add_argument(
|
199 |
"--load-in-4bit", action="store_true", help="Use 4-bit quantization"
|
|
|
233 |
speed_box = gr.Textbox(label="Speed", elem_id="speed", interactive=False, value="0.00 tokens/s")
|
234 |
compression_box = gr.Textbox(label="Compression Ratio", elem_id="speed", interactive=False, value="0.00")
|
235 |
note1 = gr.Markdown(show_label=False, interactive=False,
|
236 |
+
value='''The Compression Ratio is defined as the number of generated tokens divided by the number of forward passes in the original LLM. The original LLM is Vicuna 7B, with inference conducted on a T4 GPU and at a precision of int8.''')
|
237 |
note=gr.Markdown(show_label=False,interactive=False,value='''The tokens that EAGLE correctly guesses will be highlighted in orange. Note: This highlighting may lead to special formatting rendering issues in some instances, particularly when generating code.''')
|
238 |
|
239 |
|