jadehardouin
commited on
Commit
•
bbc8453
1
Parent(s):
4424c49
Update models.py
Browse files
models.py
CHANGED
@@ -112,11 +112,21 @@ class OpenSourceLlama2Model(BaseTCOModel):
|
|
112 |
|
113 |
def on_model_change(model):
|
114 |
if model == "Llama 2 7B":
|
115 |
-
return [gr.Dropdown.update(choices=vm_choices),
|
|
|
|
|
|
|
|
|
|
|
116 |
else:
|
117 |
not_supported_vm = ["1x Nvidia A100 (Azure NC24ads A100 v4)", "2x Nvidia A100 (Azure NC48ads A100 v4)"]
|
118 |
choices = [x for x in vm_choices if x not in not_supported_vm]
|
119 |
-
return [gr.Dropdown.update(choices=choices
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
def on_vm_change(model, vm):
|
122 |
# TO DO: load info from CSV
|
@@ -144,10 +154,9 @@ class OpenSourceLlama2Model(BaseTCOModel):
|
|
144 |
)
|
145 |
self.input_length = gr.Number(233, label="Average number of input tokens", info="This is the number of input tokens used when the model was benchmarked to get the number of tokens/second it processes",
|
146 |
interactive=False, visible=False)
|
147 |
-
self.
|
148 |
-
self.info_70B = gr.Markdown("To see the benchmark results used for the Llama2-70B model, [click here](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper)", interactive=False, visible=False)
|
149 |
|
150 |
-
self.model.change(on_model_change, inputs=self.model, outputs=[self.vm, self.
|
151 |
self.vm.change(on_vm_change, inputs=[self.model, self.vm], outputs=[self.vm_cost_per_hour, self.tokens_per_second])
|
152 |
self.maxed_out = gr.Slider(minimum=0.01, value=50., step=0.01, label="% maxed out",
|
153 |
info="How much the GPU is fully used",
|
|
|
112 |
|
113 |
def on_model_change(model):
|
114 |
if model == "Llama 2 7B":
|
115 |
+
return [gr.Dropdown.update(choices=vm_choices),
|
116 |
+
gr.Markdown.update(value="To see the script used to benchmark the Llama2-7B model, [click here](https://example.com/script)"),
|
117 |
+
gr.Number.update(value=3.6730),
|
118 |
+
gr.Number.update(value=694.38),
|
119 |
+
gr.Number.update(visible=True)
|
120 |
+
]
|
121 |
else:
|
122 |
not_supported_vm = ["1x Nvidia A100 (Azure NC24ads A100 v4)", "2x Nvidia A100 (Azure NC48ads A100 v4)"]
|
123 |
choices = [x for x in vm_choices if x not in not_supported_vm]
|
124 |
+
return [gr.Dropdown.update(choices=choices, value="4x Nvidia A100 (Azure NC48ads A100 v4)"),
|
125 |
+
gr.Markdown.update(value="To see the benchmark results used for the Llama2-70B model, [click here](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper)"),
|
126 |
+
gr.Number.update(value=14.692),
|
127 |
+
gr.Number.update(value=18.6),
|
128 |
+
gr.Number.update(visible=False)
|
129 |
+
]
|
130 |
|
131 |
def on_vm_change(model, vm):
|
132 |
# TO DO: load info from CSV
|
|
|
154 |
)
|
155 |
self.input_length = gr.Number(233, label="Average number of input tokens", info="This is the number of input tokens used when the model was benchmarked to get the number of tokens/second it processes",
|
156 |
interactive=False, visible=False)
|
157 |
+
self.info = gr.Markdown("To see the script used to benchmark the Llama2-7B model, [click here](https://example.com/script)", interactive=False, visible=False)
|
|
|
158 |
|
159 |
+
self.model.change(on_model_change, inputs=self.model, outputs=[self.vm, self.info, self.vm_cost_per_hour, self.tokens_per_second, self.input_length])
|
160 |
self.vm.change(on_vm_change, inputs=[self.model, self.vm], outputs=[self.vm_cost_per_hour, self.tokens_per_second])
|
161 |
self.maxed_out = gr.Slider(minimum=0.01, value=50., step=0.01, label="% maxed out",
|
162 |
info="How much the GPU is fully used",
|