yuhuili commited on
Commit
9647dd6
1 Parent(s): b66c71a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -193,7 +193,7 @@ parser.add_argument(
193
  parser.add_argument("--base-model-path", type=str, default="lmsys/vicuna-7b-v1.3",
194
  help="path of basemodel, huggingface project or local path")
195
  parser.add_argument(
196
- "--load-in-8bit", action="store_true", help="Use 8-bit quantization"
197
  )
198
  parser.add_argument(
199
  "--load-in-4bit", action="store_true", help="Use 4-bit quantization"
@@ -233,7 +233,7 @@ with gr.Blocks(css=custom_css) as demo:
233
  speed_box = gr.Textbox(label="Speed", elem_id="speed", interactive=False, value="0.00 tokens/s")
234
  compression_box = gr.Textbox(label="Compression Ratio", elem_id="speed", interactive=False, value="0.00")
235
  note1 = gr.Markdown(show_label=False, interactive=False,
236
- value='''The Compression Ratio is defined as the number of generated tokens divided by the number of forward passes in the original LLM. The original LLM is Vicuna 13B, with inference conducted on RTX 3090 GPUs and at a precision of fp16.''')
237
  note=gr.Markdown(show_label=False,interactive=False,value='''The tokens that EAGLE correctly guesses will be highlighted in orange. Note: This highlighting may lead to special formatting rendering issues in some instances, particularly when generating code.''')
238
 
239
 
 
193
  parser.add_argument("--base-model-path", type=str, default="lmsys/vicuna-7b-v1.3",
194
  help="path of basemodel, huggingface project or local path")
195
  parser.add_argument(
196
+ "--load-in-8bit", action="store_flase", help="Use 8-bit quantization"
197
  )
198
  parser.add_argument(
199
  "--load-in-4bit", action="store_true", help="Use 4-bit quantization"
 
233
  speed_box = gr.Textbox(label="Speed", elem_id="speed", interactive=False, value="0.00 tokens/s")
234
  compression_box = gr.Textbox(label="Compression Ratio", elem_id="speed", interactive=False, value="0.00")
235
  note1 = gr.Markdown(show_label=False, interactive=False,
236
+ value='''The Compression Ratio is defined as the number of generated tokens divided by the number of forward passes in the original LLM. The original LLM is Vicuna 7B, with inference conducted on a T4 GPU and at a precision of int8.''')
237
  note=gr.Markdown(show_label=False,interactive=False,value='''The tokens that EAGLE correctly guesses will be highlighted in orange. Note: This highlighting may lead to special formatting rendering issues in some instances, particularly when generating code.''')
238
 
239