eduardo-alvarez
commited on
Commit
•
cd2355c
1
Parent(s):
15635cd
create leaderboard
Browse files- __pycache__/app.cpython-38.pyc +0 -0
- app.py +181 -0
- info/__pycache__/citation.cpython-38.pyc +0 -0
- info/__pycache__/deployment.cpython-38.pyc +0 -0
- info/__pycache__/programs.cpython-38.pyc +0 -0
- info/__pycache__/submit.cpython-38.pyc +0 -0
- info/__pycache__/train_a_model.cpython-38.pyc +0 -0
- info/citation.py +8 -0
- info/deployment.py +126 -0
- info/programs.py +37 -0
- info/submit.py +37 -0
- info/train_a_model.py +73 -0
- leaderboard_status_030424.csv +5 -0
- src/__pycache__/processing.cpython-38.pyc +0 -0
- src/leaderboard_filtered.csv +7 -0
- src/processing.py +20 -0
- src/submit.py +0 -0
__pycache__/app.cpython-38.pyc
ADDED
Binary file (4.74 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import random
|
4 |
+
import time
|
5 |
+
|
6 |
+
from info.train_a_model import (
|
7 |
+
LLM_BENCHMARKS_TEXT)
|
8 |
+
from info.submit import (
|
9 |
+
SUBMIT_TEXT)
|
10 |
+
from info.deployment import (
|
11 |
+
DEPLOY_TEXT)
|
12 |
+
from info.programs import (
|
13 |
+
PROGRAMS_TEXT)
|
14 |
+
from info.citation import(
|
15 |
+
CITATION_TEXT)
|
16 |
+
from src.processing import filter_benchmarks_table, make_clickable
|
17 |
+
|
18 |
+
demo = gr.Blocks()
|
19 |
+
|
20 |
+
with demo:
|
21 |
+
|
22 |
+
gr.HTML("""<h1 align="center" id="space-title">🤗Powered-by-Intel LLM Leaderboard 💻</h1>""")
|
23 |
+
gr.Markdown("This leaderboard is designed to evaluate, score, and rank open-source large language \
|
24 |
+
models that have been pre-trained or fine-tuned on Intel Hardware 🦾")
|
25 |
+
gr.Markdown("Models submitted to the leaderboard are evaluated \
|
26 |
+
on the Intel Developer Cloud ☁️")
|
27 |
+
|
28 |
+
# TODO: Coming soon comparison tool
|
29 |
+
#with gr.Accordion("🥊Large Language Model Boxing Ring 🥊", open=False):
|
30 |
+
# with gr.Row():
|
31 |
+
# chat_a = gr.Chatbot()
|
32 |
+
# chat_b = gr.Chatbot()
|
33 |
+
# msg = gr.Textbox()
|
34 |
+
# gr.ClearButton([msg, chat_a])
|
35 |
+
#
|
36 |
+
# def respond(message, chat_history):
|
37 |
+
# bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
|
38 |
+
# chat_history.append((message, bot_message))
|
39 |
+
# time.sleep(2)
|
40 |
+
# return "", chat_history
|
41 |
+
#
|
42 |
+
# msg.submit(respond, inputs = [msg, chat_a],outputs = [msg, chat_a])
|
43 |
+
|
44 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
45 |
+
with gr.TabItem("🏆 LLM Benchmark", elem_id="llm-benchmark-table", id=0):
|
46 |
+
with gr.Row():
|
47 |
+
with gr.Column():
|
48 |
+
filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
|
49 |
+
label="Select Training Platform*",
|
50 |
+
elem_id="compute_platforms",
|
51 |
+
value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"])
|
52 |
+
filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","GCP","Local"],
|
53 |
+
label="Training Infrastructure*",
|
54 |
+
elem_id="training_infra",
|
55 |
+
value=["Intel Developer Cloud","AWS","Azure","GCP","Local"])
|
56 |
+
filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
|
57 |
+
label="Intel Program Affiliation",
|
58 |
+
elem_id="program_affiliation",
|
59 |
+
value=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"])
|
60 |
+
|
61 |
+
with gr.Column():
|
62 |
+
filter_size = gr.CheckboxGroup(choices=[1,3,5,7,13,35,60,70,100],
|
63 |
+
label="Model Sizes (Billion of Parameters)",
|
64 |
+
elem_id="parameter_size",
|
65 |
+
value=[1,3,5,7,13,35,60,70,100])
|
66 |
+
filter_precision = gr.CheckboxGroup(choices=["fp8","fp16","bf16","int8","4bit"],
|
67 |
+
label="Model Precision",
|
68 |
+
elem_id="precision",
|
69 |
+
value=["fp8","fp16","bf16","int8","4bit"])
|
70 |
+
filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"],
|
71 |
+
label="Model Types",
|
72 |
+
elem_id="model_types",
|
73 |
+
value=["pretrained","fine-tuned","chat-models","merges/moerges"])
|
74 |
+
|
75 |
+
initial_df = pd.read_csv("leaderboard_status_030424.csv")
|
76 |
+
|
77 |
+
gradio_df_display = gr.Dataframe()
|
78 |
+
|
79 |
+
def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
|
80 |
+
filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
|
81 |
+
affiliation_selected=affiliation_selected, size_selected=size_selected,
|
82 |
+
precision_selected=precision_selected, type_selected=type_selected)
|
83 |
+
return filtered_df
|
84 |
+
|
85 |
+
filter_hw.change(fn=update_df,
|
86 |
+
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
|
87 |
+
outputs=[gradio_df_display])
|
88 |
+
filter_platform.change(fn=update_df,
|
89 |
+
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
|
90 |
+
outputs=[gradio_df_display])
|
91 |
+
filter_affiliation.change(fn=update_df,
|
92 |
+
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
|
93 |
+
outputs=[gradio_df_display])
|
94 |
+
filter_size.change(fn=update_df,
|
95 |
+
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
|
96 |
+
outputs=[gradio_df_display])
|
97 |
+
filter_precision.change(fn=update_df,
|
98 |
+
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
|
99 |
+
outputs=[gradio_df_display])
|
100 |
+
filter_type.change(fn=update_df,
|
101 |
+
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
|
102 |
+
outputs=[gradio_df_display])
|
103 |
+
|
104 |
+
|
105 |
+
with gr.TabItem("🧰 Train a Model", elem_id="getting-started", id=1):
|
106 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
107 |
+
with gr.TabItem("🚀 Deployment Tips", elem_id="deployment-tips", id=2):
|
108 |
+
gr.Markdown(DEPLOY_TEXT, elem_classes="markdown-text")
|
109 |
+
with gr.TabItem("👩💻 Developer Programs", elem_id="hardward-program", id=3):
|
110 |
+
gr.Markdown(PROGRAMS_TEXT, elem_classes="markdown-text")
|
111 |
+
with gr.TabItem("🏎️ Submit", elem_id="submit", id=4):
|
112 |
+
gr.Markdown(SUBMIT_TEXT, elem_classes="markdown-text")
|
113 |
+
with gr.Row():
|
114 |
+
gr.Markdown("# Submit Model for Evaluation 🏎️", elem_classes="markdown-text")
|
115 |
+
with gr.Row():
|
116 |
+
with gr.Column():
|
117 |
+
model_name_textbox = gr.Textbox(label="Model name")
|
118 |
+
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
119 |
+
model_type = gr.Dropdown(
|
120 |
+
choices=["pretrained","fine-tuned","chat models","merges/moerges"],
|
121 |
+
label="Model type",
|
122 |
+
multiselect=False,
|
123 |
+
value="pretrained",
|
124 |
+
interactive=True,
|
125 |
+
)
|
126 |
+
|
127 |
+
hw_type = gr.Dropdown(
|
128 |
+
choices=["Gaudi","Xeon","GPU Max","Arc GPU"],
|
129 |
+
label="Training Hardware",
|
130 |
+
multiselect=False,
|
131 |
+
value="Gaudi2",
|
132 |
+
interactive=True,
|
133 |
+
)
|
134 |
+
terms = gr.Checkbox(
|
135 |
+
label="Check if you have read and agreed to terms and conditions associated with submitting\
|
136 |
+
a model to the leaderboard.",
|
137 |
+
value=False,
|
138 |
+
interactive=True,
|
139 |
+
)
|
140 |
+
with gr.Column():
|
141 |
+
precision = gr.Dropdown(
|
142 |
+
choices=["fp8","fp16","bf16","int8","4bit"],
|
143 |
+
label="Precision",
|
144 |
+
multiselect=False,
|
145 |
+
value="fp16",
|
146 |
+
interactive=True,
|
147 |
+
)
|
148 |
+
weight_type = gr.Dropdown(
|
149 |
+
choices=["Original", "Adapter", "Delta"],
|
150 |
+
label="Weights type",
|
151 |
+
multiselect=False,
|
152 |
+
value="Original",
|
153 |
+
interactive=True,
|
154 |
+
)
|
155 |
+
training_infra = gr.Dropdown(
|
156 |
+
choices=["IDC","AWS","Azure","GCP","Local"],
|
157 |
+
label="Training Infrastructure",
|
158 |
+
multiselect=False,
|
159 |
+
value="IDC",
|
160 |
+
interactive=True,
|
161 |
+
)
|
162 |
+
affiliation = gr.Dropdown(
|
163 |
+
choices=["No Affiliation","Innovator","Student Ambassador","Intel Liftoff", "Intel Labs", "Other"],
|
164 |
+
label="Affiliation with Intel",
|
165 |
+
multiselect=False,
|
166 |
+
value="Independent",
|
167 |
+
interactive=True,
|
168 |
+
)
|
169 |
+
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
170 |
+
|
171 |
+
#submit_button = gr.Button("Submit Eval")
|
172 |
+
#submission_result = gr.Markdown()
|
173 |
+
gr.Markdown("Community Submissions Coming soon!")
|
174 |
+
|
175 |
+
with gr.Accordion("📙 Citation", open=False):
|
176 |
+
citation =gr.Textbox(value = CITATION_TEXT,
|
177 |
+
lines=6,
|
178 |
+
label="Use the following to cite this content")
|
179 |
+
|
180 |
+
|
181 |
+
demo.launch()
|
info/__pycache__/citation.cpython-38.pyc
ADDED
Binary file (466 Bytes). View file
|
|
info/__pycache__/deployment.cpython-38.pyc
ADDED
Binary file (4.89 kB). View file
|
|
info/__pycache__/programs.cpython-38.pyc
ADDED
Binary file (2.48 kB). View file
|
|
info/__pycache__/submit.cpython-38.pyc
ADDED
Binary file (2.56 kB). View file
|
|
info/__pycache__/train_a_model.cpython-38.pyc
ADDED
Binary file (3.54 kB). View file
|
|
info/citation.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CITATION_TEXT = r"""@misc{powered-by-intel-llm-leaderboard,
|
2 |
+
author = {Eduardo Alvarez},
|
3 |
+
title = {Powered By Intel LLM Leaderboard},
|
4 |
+
year = {2024},
|
5 |
+
publisher = {Intel},
|
6 |
+
howpublished = "\url{https://huggingface.co/spaces/Intel/powered_by_intel_leaderboard}"
|
7 |
+
}
|
8 |
+
"""
|
info/deployment.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
DEPLOY_TEXT = f"""
|
2 |
+
|
3 |
+
Having table full of powerful models is nice and call but at the end of the day, you have to be able to use
|
4 |
+
them for something. Below you will find sample code to help you load models and perform inference.
|
5 |
+
|
6 |
+
|
7 |
+
## Inference with Gaudi 2
|
8 |
+
Habana's SDK, Intel Gaudi Software, supports PyTorch and DeepSpeed for accelerating LLM training and inference.
|
9 |
+
The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
|
10 |
+
(e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
|
11 |
+
and graph-level optimizations).
|
12 |
+
|
13 |
+
Optimum Habana provides covenient functionality for various tasks, below you'll find the command line
|
14 |
+
snippet that you would run to perform inference on Gaudi with meta-llama/Llama-2-7b-hf.
|
15 |
+
|
16 |
+
The "run_generation.py" script below can be found [here](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
|
17 |
+
|
18 |
+
```bash
|
19 |
+
python run_generation.py \
|
20 |
+
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
21 |
+
--use_hpu_graphs \
|
22 |
+
--use_kv_cache \
|
23 |
+
--max_new_tokens 100 \
|
24 |
+
--do_sample \
|
25 |
+
--batch_size 2 \
|
26 |
+
--prompt "Hello world" "How are you?"
|
27 |
+
|
28 |
+
```
|
29 |
+
|
30 |
+
# Inference Intel Extension for Transformers
|
31 |
+
Intel® Extension for Transformers is an innovative toolkit designed to accelerate GenAI/LLM
|
32 |
+
everywhere with the optimal performance of Transformer-based models on various Intel platforms,
|
33 |
+
including Intel Gaudi2, Intel CPU, and Intel GPU.
|
34 |
+
|
35 |
+
### INT4 Inference (CPU)
|
36 |
+
```python
|
37 |
+
from transformers import AutoTokenizer
|
38 |
+
from intel_extension_for_transformers.transformers import AutoModelForCausalLM
|
39 |
+
model_name = "Intel/neural-chat-7b-v3-1"
|
40 |
+
prompt = "When winter becomes spring, the flowers..."
|
41 |
+
|
42 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
43 |
+
inputs = tokenizer(prompt, return_tensors="pt").input_ids
|
44 |
+
|
45 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True)
|
46 |
+
outputs = model.generate(inputs)
|
47 |
+
|
48 |
+
```
|
49 |
+
### INT4 Inference (GPU)
|
50 |
+
```python
|
51 |
+
import intel_extension_for_pytorch as ipex
|
52 |
+
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
|
53 |
+
from transformers import AutoTokenizer
|
54 |
+
|
55 |
+
device_map = "xpu"
|
56 |
+
model_name ="Qwen/Qwen-7B"
|
57 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
58 |
+
prompt = "When winter becomes spring, the flowers..."
|
59 |
+
inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(device_map)
|
60 |
+
|
61 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
|
62 |
+
device_map=device_map, load_in_4bit=True)
|
63 |
+
|
64 |
+
model = ipex.optimize_transformers(model, inplace=True, dtype=torch.float16, woq=True, device=device_map)
|
65 |
+
|
66 |
+
output = model.generate(inputs)
|
67 |
+
```
|
68 |
+
|
69 |
+
# Intel Extension for PyTorch
|
70 |
+
Intel® Extension for PyTorch extends PyTorch with up-to-date features optimizations for an
|
71 |
+
extra performance boost on Intel hardware. Optimizations take advantage of Intel® Advanced
|
72 |
+
Vector Extensions 512 (Intel® AVX-512) Vector Neural Network Instructions (VNNI) and Intel®
|
73 |
+
Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel Xe Matrix Extensions
|
74 |
+
(XMX) AI engines on Intel discrete GPUs. Moreover, Intel® Extension for PyTorch* provides easy
|
75 |
+
GPU acceleration for Intel discrete GPUs through the PyTorch* xpu device.
|
76 |
+
|
77 |
+
There are a few flavors of PyTorch that can be leveraged for inference. For detailed documentation,
|
78 |
+
the visit https://intel.github.io/intel-extension-for-pytorch/#introduction
|
79 |
+
|
80 |
+
### IPEX with Optimum Intel (no quantization)
|
81 |
+
Requires installing/updating optimum `pip install --upgrade-strategy eager optimum[ipex]
|
82 |
+
`
|
83 |
+
```python
|
84 |
+
from optimum.intel import IPEXModelForCausalLM
|
85 |
+
from transformers import AutoTokenizer, pipeline
|
86 |
+
|
87 |
+
model = IPEXModelForCausalLM.from_pretrained(model_id)
|
88 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
89 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
90 |
+
results = pipe("A fisherman at sea...")
|
91 |
+
```
|
92 |
+
|
93 |
+
### IPEX with Stock PyTorch with Mixed Precision
|
94 |
+
```python
|
95 |
+
import torch
|
96 |
+
import intel_extension_for_pytorch as ipex
|
97 |
+
import transformers
|
98 |
+
|
99 |
+
model= transformers.AutoModelForCausalLM(model_name_or_path).eval()
|
100 |
+
|
101 |
+
dtype = torch.float # or torch.bfloat16
|
102 |
+
model = ipex.llm.optimize(model, dtype=dtype)
|
103 |
+
|
104 |
+
# generation inference loop
|
105 |
+
with torch.inference_mode():
|
106 |
+
model.generate()
|
107 |
+
```
|
108 |
+
|
109 |
+
# OpenVINO Toolkit
|
110 |
+
|
111 |
+
```python
|
112 |
+
from optimum.intel import OVModelForCausalLM
|
113 |
+
from transformers import AutoTokenizer, pipeline
|
114 |
+
|
115 |
+
model_id = "helenai/gpt2-ov"
|
116 |
+
model = OVModelForCausalLM.from_pretrained(model_id)
|
117 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
118 |
+
|
119 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
120 |
+
|
121 |
+
pipe("In the spring, beautiful flowers bloom...")
|
122 |
+
|
123 |
+
```
|
124 |
+
|
125 |
+
|
126 |
+
"""
|
info/programs.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROGRAMS_TEXT= """
|
2 |
+
Intel offers a range of programs to grant early, short, and long-term access to developers. A great way to build
|
3 |
+
and share models on the "Powered by Intel" LLM Leaderboard is to join one of these programs. Learn more about
|
4 |
+
these opportunities below:
|
5 |
+
|
6 |
+
## Intel Liftoff Program
|
7 |
+
Intel® Liftoff for startups is open to early-stage AI and machine learning startups. This free virtual program
|
8 |
+
helps you innovate and scale, no matter where you are in your entrepreneurial journey.
|
9 |
+
|
10 |
+
Through Intel Liftoff, startups can access the computational power they need to build powerful LLMs on platforms
|
11 |
+
like Gaudi, Max Series GPUs, and Xeon Processors.
|
12 |
+
|
13 |
+
Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/liftoff.html
|
14 |
+
|
15 |
+
## Intel Student Ambassador Program
|
16 |
+
This program is focused on undergraduate and graduate students who are passionate about technology and
|
17 |
+
working with developer communities to promote learning, sharing, and collaboration. It provides opportunities
|
18 |
+
for students to enhance their AI and oneAPI skills, expand their network, and learn about the cutting-edge Intel®
|
19 |
+
hardware and software products.
|
20 |
+
|
21 |
+
Through the Student Ambassador Program you can get early access to latest technology developments that are under
|
22 |
+
a nondisclosure agreement (NDA) and extended access to Intel® Developer Cloud.
|
23 |
+
|
24 |
+
Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/training/academic-program/student-ambassador.html#gs.5f5oi3
|
25 |
+
|
26 |
+
|
27 |
+
## Intel Innovator Program
|
28 |
+
This program is for developers using oneAPI on Intel® architecture who provide technical leadership and inspiration
|
29 |
+
to the global developer community through their projects, expertise, and advocacies. It provides recognition as a
|
30 |
+
oneAPI expert, opportunities for event support, spotlight showcase for projects, and more.
|
31 |
+
|
32 |
+
Through this program you will have free ccess to paid features on Intel® Developer Cloud, a development
|
33 |
+
environment for projects on the latest Intel technology and as a oneAPI expert, and interactions with
|
34 |
+
others in the community and within Intel
|
35 |
+
|
36 |
+
Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/community/innovators/oneapi-innovator.html
|
37 |
+
"""
|
info/submit.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
SUBMIT_TEXT = f"""
|
3 |
+
# Evaluation Queue for the 🤗"Powered by Intel" LLM Leaderboard 💻
|
4 |
+
Models added here will be queued for evaluation on the Intel Developer Cloud ☁️
|
5 |
+
## First steps before submitting a model
|
6 |
+
|
7 |
+
### 1) Make sure you can load your model and tokenizer using AutoClasses:
|
8 |
+
```python
|
9 |
+
from transformers import AutoConfig, AutoModel, AutoTokenizer
|
10 |
+
config = AutoConfig.from_pretrained("your model name", revision=revision)
|
11 |
+
model = AutoModel.from_pretrained("your model name", revision=revision)
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
|
13 |
+
```
|
14 |
+
If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
|
15 |
+
Note: make sure your model is public!
|
16 |
+
Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
|
17 |
+
|
18 |
+
### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
|
19 |
+
It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
|
20 |
+
|
21 |
+
### 3) Make sure your model has an open license!
|
22 |
+
This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
|
23 |
+
|
24 |
+
### 4) Fill up your model card
|
25 |
+
We use your model card to better understand the properties of your model and make them more easily discoverable for other users.
|
26 |
+
Model cards are required to have mentions of the hardware, software, and infrastructure used for training - without this information
|
27 |
+
we cannot accept your model as a valid submission.
|
28 |
+
|
29 |
+
### 5) Select the correct precision
|
30 |
+
Not all models are converted properly from `float16` to `bfloat16`, and selecting the wrong precision can sometimes cause evaluation error (as loading a `bf16` model in `fp16` can sometimes generate NaNs, depending on the weight range).
|
31 |
+
|
32 |
+
## In case of model failure
|
33 |
+
If your model is displayed in the `FAILED` category, its execution stopped.
|
34 |
+
Make sure you have followed the above steps first.
|
35 |
+
If everything is done, check you can launch the EleutherAIHarness on your model locally, using the command in the About tab under "Reproducibility" with all arguments specified (you can add `--limit` to limit the number of examples per task).
|
36 |
+
|
37 |
+
"""
|
info/train_a_model.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
LLM_BENCHMARKS_TEXT = f"""
|
3 |
+
# Use the Resources Below to Start Training a Model Today
|
4 |
+
|
5 |
+
Intel offers a variety of platforms that can be used to train LLMs including datacenter and consumer grade cpus, gpus, and ASICs.
|
6 |
+
Below, you'll find documentation on how to access free and paid resources to train a model and submit it to the "Intel Inside Leaderboard".
|
7 |
+
|
8 |
+
## Intel Developer Cloud - Quick Start
|
9 |
+
The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
|
10 |
+
224 Core 4th Generation Xeon Baremetal nodes with 4x Max Series GPU 1100 GPUs. To access these resources please follow the instructions below:
|
11 |
+
1. Visit [cloud.intel.com](cloud.intel.com) and create a free account.
|
12 |
+
2. Navigate to the "Training" module under the "Software" section in the left panel
|
13 |
+
3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch"
|
14 |
+
4. Follow the instructions in the notebook to train your model using Intel Max Series 1100 GPUs
|
15 |
+
5. Upload your model to the Hugging Face Model Hub
|
16 |
+
6. Go to the "Submit" tab follow instructions to create a leaderboard evaluation request
|
17 |
+
|
18 |
+
## Additional Training Code Samples
|
19 |
+
|
20 |
+
Below you will find a list of additional resources for training models on different intel hardware platforms:
|
21 |
+
- Gaudi Processors
|
22 |
+
- [Parameter Efficient Fine-Tuning of Llama-2 70B](https://github.com/HabanaAI/Gaudi-tutorials/blob/main/PyTorch/llama2_fine_tuning_inference/llama2_fine_tuning_inference.ipynb)
|
23 |
+
- Xeon Processors
|
24 |
+
- [Distributed Training of GPT2 LLMs on AWS](https://github.com/intel/intel-cloud-optimizations-aws/tree/main/distributed-training)
|
25 |
+
- [Fine-tuning Falcon 7B on Xeon Processors](https://medium.com/@eduand-alvarez/fine-tune-falcon-7-billion-on-xeon-cpus-with-hugging-face-and-oneapi-a25e10803a53)
|
26 |
+
- Max Series GPUs
|
27 |
+
- [LLM Fine-tuning with QLoRA on Max Series GPUs](https://console.idcservice.net/training/detail/159c24e4-5598-3155-a790-2qv973tlm172)
|
28 |
+
## Submitting your Model to the Hub
|
29 |
+
Once you have trained your model, it is a straighforward process to upload and open source it on the Hugging Face Hub.
|
30 |
+
|
31 |
+
```python
|
32 |
+
|
33 |
+
# Logging in to Hugging Face
|
34 |
+
|
35 |
+
from huggingface_hub import notebook_login, Repository
|
36 |
+
|
37 |
+
# Login to Hugging Face
|
38 |
+
notebook_login()
|
39 |
+
|
40 |
+
# Model and Tokenize Loading
|
41 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
42 |
+
|
43 |
+
# Define the path to the checkpoint
|
44 |
+
checkpoint_path = "" # Replace with your checkpoint folder
|
45 |
+
|
46 |
+
# Load the model
|
47 |
+
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path)
|
48 |
+
|
49 |
+
# Load the tokenizer
|
50 |
+
tokenizer = AutoTokenizer.from_pretrained("") #add name of your model's tokenizer on Hugging Face OR custom tokenizer
|
51 |
+
|
52 |
+
#Saving and Uploading the Model and Tokenizer
|
53 |
+
|
54 |
+
# Save the model and tokenizer
|
55 |
+
model_name_on_hub = "desired-model-name"
|
56 |
+
model.save_pretrained(model_name_on_hub)
|
57 |
+
tokenizer.save_pretrained(model_name_on_hub)
|
58 |
+
|
59 |
+
# Push to the hub
|
60 |
+
model.push_to_hub(model_name_on_hub)
|
61 |
+
tokenizer.push_to_hub(model_name_on_hub)
|
62 |
+
|
63 |
+
# Congratulations! Your fine-tuned model is now uploaded to the Hugging Face Model Hub.
|
64 |
+
# You can view and share your model using its URL: https://huggingface.co/your-username/your-model-name
|
65 |
+
|
66 |
+
```
|
67 |
+
|
68 |
+
"""
|
69 |
+
|
70 |
+
SUBMIT_TEXT = f"""
|
71 |
+
# Use the Resource Below to Start Training a Model Today
|
72 |
+
|
73 |
+
"""
|
leaderboard_status_030424.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,Average,Hardware,Model Type,Precision,Size,Infrastructure,ARC,HellaSwag,MMLU,TruthfulQA,Winogrande,GSM8K,Affiliation
|
2 |
+
Intel/neural-chat-7b-v3-3,69.83,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.89,85.26,63.07,63.01,79.64,61.11,Intel Labs
|
3 |
+
Intel/neural-chat-7b-v3-2,68.29,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.49,83.92,63.55,59.68,79.65,55.12,Intel Labs
|
4 |
+
Intel/neural-chat-7b-v3-1,61.59,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.21,83.64,62.37,59.65,78.14,19.56,Intel Labs
|
5 |
+
Intel/neural-chat-7b-v3,58.46,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.15,83.29,62.26,58.77,78.06,1.21,Intel Labs
|
src/__pycache__/processing.cpython-38.pyc
ADDED
Binary file (779 Bytes). View file
|
|
src/leaderboard_filtered.csv
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,Model,Average,Hardware,Model Type,Precision,Size,Infrastructure,ARC,MMLU,TruthfulQA,Winogrande,GSM8K,Affiliation
|
2 |
+
1,BetaWave,83.21,Arc GPU,fine-tuned,fp16,7,Local,70.44,92.32,78.67,85.55,90.0,Innovator
|
3 |
+
4,EpsilonWave,58.44,Xeon,fine-tuned,int8,3,AWS,91.22,82.1,60.55,80.11,77.89,Partner
|
4 |
+
6,EtaMatrix,69.78,Xeon,fine-tuned,int8,3,GCP,85.55,79.33,70.89,72.18,79.44,Liftoff
|
5 |
+
7,ThetaCore,88.12,Arc GPU,fine-tuned,int8,3,Local,67.33,85.78,88.55,86.9,83.11,Liftoff
|
6 |
+
14,BetaNeural,79.67,Gaudi 1,fine-tuned,4bit,7,AWS,85.44,77.22,83.1,75.45,71.33,Partner
|
7 |
+
15,TrackSpeed,88.12,Arc GPU,fine-tuned,4bit,7,Local,67.33,85.78,88.55,86.9,83.11,Student Ambassador
|
src/processing.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
def filter_benchmarks_table(df, hw_selected, platform_selected,
|
3 |
+
size_selected, precision_selected,
|
4 |
+
type_selected, affiliation_selected):
|
5 |
+
|
6 |
+
filtered_df = df[
|
7 |
+
df['Hardware'].isin(hw_selected) &
|
8 |
+
df['Infrastructure'].isin(platform_selected) &
|
9 |
+
df['Size'].isin(size_selected) &
|
10 |
+
df['Precision'].isin(precision_selected) &
|
11 |
+
df['Model Type'].isin(type_selected) &
|
12 |
+
df['Affiliation'].isin(affiliation_selected)]
|
13 |
+
|
14 |
+
return filtered_df
|
15 |
+
|
16 |
+
def make_clickable(val):
|
17 |
+
return f'<a target="_blank" href="{val}">{val}</a>'
|
18 |
+
|
19 |
+
|
20 |
+
|
src/submit.py
ADDED
File without changes
|