Benjamin Consolvo
commited on
Commit
•
7645d86
1
Parent(s):
491fabd
doc updates 2
Browse files- app.py +1 -1
- info/deployment.py +56 -38
- info/programs.py +9 -3
- info/submit.py +10 -8
- info/train_a_model.py +17 -21
app.py
CHANGED
@@ -30,7 +30,7 @@ with demo:
|
|
30 |
follow the instructions and complete the form in the 🏎️ Submit tab. Models submitted to the leaderboard are evaluated
|
31 |
on the Intel Developer Cloud ☁️. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from
|
32 |
the [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness).""")
|
33 |
-
gr.Markdown("""Join 5000+ developers on the [Intel DevHub Discord](https://discord.gg/yNYNxK2k) to get support with your submission and
|
34 |
talk about everything from GenAI, HPC, to Quantum Computing.""")
|
35 |
gr.Markdown("""A special shout-out to the 🤗 [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
36 |
team for generously sharing their code and best
|
|
|
30 |
follow the instructions and complete the form in the 🏎️ Submit tab. Models submitted to the leaderboard are evaluated
|
31 |
on the Intel Developer Cloud ☁️. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from
|
32 |
the [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness).""")
|
33 |
+
gr.Markdown("""![DevHub-image](assets/DevHub_Logo.png) Join 5000+ developers on the [Intel DevHub Discord](https://discord.gg/yNYNxK2k) to get support with your submission and
|
34 |
talk about everything from GenAI, HPC, to Quantum Computing.""")
|
35 |
gr.Markdown("""A special shout-out to the 🤗 [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
36 |
team for generously sharing their code and best
|
info/deployment.py
CHANGED
@@ -5,7 +5,7 @@ DEPLOY_TEXT = f"""
|
|
5 |
A collection of powerful models is valuable, but ultimately, you need to be able to use them effectively.
|
6 |
This tab is dedicated to providing guidance and code snippets for performing inference with leaderboard models on Intel platforms.
|
7 |
|
8 |
-
Below
|
9 |
A 🚀 indicates that inference with the associated software package is supported on the hardware. We hope this information
|
10 |
helps you choose the best option for your specific use case. Happy building!
|
11 |
|
@@ -72,8 +72,8 @@ helps you choose the best option for your specific use case. Happy building!
|
|
72 |
<td>PyTorch</td>
|
73 |
<td>🚀</td>
|
74 |
<td>🚀</td>
|
75 |
-
<td
|
76 |
-
<td
|
77 |
<td>🚀</td>
|
78 |
</tr>
|
79 |
</tr>
|
@@ -81,43 +81,25 @@ helps you choose the best option for your specific use case. Happy building!
|
|
81 |
<td>Tensorflow</td>
|
82 |
<td>🚀</td>
|
83 |
<td>🚀</td>
|
84 |
-
<td
|
85 |
-
<td
|
86 |
<td>🚀</td>
|
87 |
</tr>
|
88 |
</table>
|
89 |
</div>
|
90 |
|
91 |
-
|
92 |
<hr>
|
93 |
|
94 |
-
# Intel®
|
95 |
-
|
96 |
-
The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
|
97 |
-
(e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
|
98 |
-
and graph-level optimizations).
|
99 |
-
|
100 |
-
Optimum Habana provides covenient functionality for various tasks, below you'll find the command line
|
101 |
-
snippet that you would run to perform inference on Gaudi with meta-llama/Llama-2-7b-hf.
|
102 |
-
|
103 |
-
The "run_generation.py" script below can be found [here](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
|
104 |
-
|
105 |
-
```bash
|
106 |
-
python run_generation.py \
|
107 |
-
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
108 |
-
--use_hpu_graphs \
|
109 |
-
--use_kv_cache \
|
110 |
-
--max_new_tokens 100 \
|
111 |
-
--do_sample \
|
112 |
-
--batch_size 2 \
|
113 |
-
--prompt "Hello world" "How are you?"
|
114 |
|
115 |
-
|
116 |
-
|
|
|
117 |
|
118 |
-
|
|
|
119 |
|
120 |
-
### INT4 Inference (GPU)
|
121 |
```python
|
122 |
import intel_extension_for_pytorch as ipex
|
123 |
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
|
@@ -138,11 +120,15 @@ output = model.generate(inputs)
|
|
138 |
```
|
139 |
<hr>
|
140 |
|
141 |
-
# Intel® Xeon CPUs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
-
### Intel Extension for PyTorch - Optimum Intel (no quantization)
|
144 |
-
Requires installing/updating optimum `pip install --upgrade-strategy eager optimum[ipex]
|
145 |
-
`
|
146 |
```python
|
147 |
from optimum.intel import IPEXModelForCausalLM
|
148 |
from transformers import AutoTokenizer, pipeline
|
@@ -154,6 +140,7 @@ results = pipe("A fisherman at sea...")
|
|
154 |
```
|
155 |
|
156 |
### Intel® Extension for PyTorch - Mixed Precision (fp32 and bf16)
|
|
|
157 |
```python
|
158 |
import torch
|
159 |
import intel_extension_for_pytorch as ipex
|
@@ -188,9 +175,12 @@ outputs = model.generate(inputs)
|
|
188 |
<hr>
|
189 |
|
190 |
# Intel® Core Ultra (NPUs and iGPUs)
|
191 |
-
|
192 |
|
193 |
### Intel® NPU Acceleration Library
|
|
|
|
|
|
|
194 |
```python
|
195 |
from transformers import AutoTokenizer, TextStreamer, AutoModelForCausalLM
|
196 |
import intel_npu_acceleration_library
|
@@ -222,7 +212,9 @@ print("Run inference")
|
|
222 |
_ = model.generate(**generation_kwargs)
|
223 |
```
|
224 |
|
225 |
-
### OpenVINO
|
|
|
|
|
226 |
|
227 |
```python
|
228 |
from optimum.intel import OVModelForCausalLM
|
@@ -240,9 +232,35 @@ pipe("In the spring, beautiful flowers bloom...")
|
|
240 |
|
241 |
<hr>
|
242 |
|
243 |
-
# Intel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
-
|
|
|
246 |
|
|
|
247 |
|
248 |
"""
|
|
|
5 |
A collection of powerful models is valuable, but ultimately, you need to be able to use them effectively.
|
6 |
This tab is dedicated to providing guidance and code snippets for performing inference with leaderboard models on Intel platforms.
|
7 |
|
8 |
+
Below is a table of open-source software options for inference, along with the supported Intel hardware platforms.
|
9 |
A 🚀 indicates that inference with the associated software package is supported on the hardware. We hope this information
|
10 |
helps you choose the best option for your specific use case. Happy building!
|
11 |
|
|
|
72 |
<td>PyTorch</td>
|
73 |
<td>🚀</td>
|
74 |
<td>🚀</td>
|
75 |
+
<td></td>
|
76 |
+
<td></td>
|
77 |
<td>🚀</td>
|
78 |
</tr>
|
79 |
</tr>
|
|
|
81 |
<td>Tensorflow</td>
|
82 |
<td>🚀</td>
|
83 |
<td>🚀</td>
|
84 |
+
<td></td>
|
85 |
+
<td></td>
|
86 |
<td>🚀</td>
|
87 |
</tr>
|
88 |
</table>
|
89 |
</div>
|
90 |
|
|
|
91 |
<hr>
|
92 |
|
93 |
+
# Intel® Max Series GPU
|
94 |
+
The Intel® Data Center GPU Max Series is Intel's highest performing, highest density, general-purpose discrete GPU, which packs over 100 billion transistors into one package and contains up to 128 Xe Cores--Intel's foundational GPU compute building block. You can learn more about this GPU [here](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/max-series.html).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
### INT4 Inference (GPU) with Intel Extension for Transformers and Intel Extension for Python
|
97 |
+
Intel® Extension for Transformers is an innovative toolkit designed to accelerate GenAI/LLM everywhere with the optimal performance of Transformer-based models on various Intel platforms, including Intel Gaudi2, Intel CPU, and Intel GPU.
|
98 |
+
👍 [Intel Extension for Transformers GitHub](https://github.com/intel/intel-extension-for-transformers)
|
99 |
|
100 |
+
Intel® Extension for PyTorch* extends PyTorch* with up-to-date features optimizations for an extra performance boost on Intel hardware. Optimizations take advantage of Intel® Advanced Vector Extensions 512 (Intel® AVX-512) Vector Neural Network Instructions (VNNI) and Intel® Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel Xe Matrix Extensions (XMX) AI engines on Intel discrete GPUs. Moreover, Intel® Extension for PyTorch* provides easy GPU acceleration for Intel discrete GPUs through the PyTorch* xpu device.
|
101 |
+
👍 [Intel Extension for PyTorch GitHub](https://github.com/intel/intel-extension-for-pytorch)
|
102 |
|
|
|
103 |
```python
|
104 |
import intel_extension_for_pytorch as ipex
|
105 |
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
|
|
|
120 |
```
|
121 |
<hr>
|
122 |
|
123 |
+
# Intel® Xeon® CPUs
|
124 |
+
The Intel® Xeon® CPUs have the most built-in accelerators of any CPU on the market, including Advanced Matrix Extensions (AMX) to accelerate matrix multiplication in deep learning training and inference. Learn more about the Xeon CPUs [here](https://www.intel.com/content/www/us/en/products/details/processors/xeon.html).
|
125 |
+
|
126 |
+
### Optimum Intel and Intel Extension for PyTorch (no quantization)
|
127 |
+
🤗 Optimum Intel is the interface between the 🤗 Transformers and Diffusers libraries and the different tools and libraries provided by Intel to accelerate end-to-end pipelines on Intel architectures.
|
128 |
+
👍 [Optimum Intel GitHub](https://github.com/huggingface/optimum-intel)
|
129 |
+
|
130 |
+
Requires installing/updating optimum `pip install --upgrade-strategy eager optimum[ipex]`
|
131 |
|
|
|
|
|
|
|
132 |
```python
|
133 |
from optimum.intel import IPEXModelForCausalLM
|
134 |
from transformers import AutoTokenizer, pipeline
|
|
|
140 |
```
|
141 |
|
142 |
### Intel® Extension for PyTorch - Mixed Precision (fp32 and bf16)
|
143 |
+
|
144 |
```python
|
145 |
import torch
|
146 |
import intel_extension_for_pytorch as ipex
|
|
|
175 |
<hr>
|
176 |
|
177 |
# Intel® Core Ultra (NPUs and iGPUs)
|
178 |
+
Intel® Core™ Ultra Processors are optimized for premium thin and powerful laptops, featuring 3D performance hybrid architecture, advanced AI capabilities, and available with built-in Intel® Arc™ GPU. Learn more about Intel Core Ultra [here](https://www.intel.com/content/www/us/en/products/details/processors/core-ultra.html). For now, there is support for smaller models like [TinyLama-1.1B](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0).
|
179 |
|
180 |
### Intel® NPU Acceleration Library
|
181 |
+
The Intel® NPU Acceleration Library is a Python library designed to boost the efficiency of your applications by leveraging the power of the Intel Neural Processing Unit (NPU) to perform high-speed computations on compatible hardware.
|
182 |
+
👍 [Intel NPU Acceleration Library GitHub](https://github.com/intel/intel-npu-acceleration-library)
|
183 |
+
|
184 |
```python
|
185 |
from transformers import AutoTokenizer, TextStreamer, AutoModelForCausalLM
|
186 |
import intel_npu_acceleration_library
|
|
|
212 |
_ = model.generate(**generation_kwargs)
|
213 |
```
|
214 |
|
215 |
+
### OpenVINO Tooling with Optimum Intel
|
216 |
+
OpenVINO™ is an open-source toolkit for optimizing and deploying AI inference.
|
217 |
+
👍 [OpenVINO GitHub](https://github.com/openvinotoolkit/openvino)
|
218 |
|
219 |
```python
|
220 |
from optimum.intel import OVModelForCausalLM
|
|
|
232 |
|
233 |
<hr>
|
234 |
|
235 |
+
# Intel® Gaudi Accelerators
|
236 |
+
The Intel Gaudi 2 accelerator is Intel's most capable deep learning chip. You can learn about Gaudi 2 [here](https://habana.ai/products/gaudi2/).
|
237 |
+
|
238 |
+
Habana's SDK, Intel Gaudi Software, supports PyTorch and DeepSpeed for accelerating LLM training and inference.
|
239 |
+
The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
|
240 |
+
(e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
|
241 |
+
and graph-level optimizations).
|
242 |
+
|
243 |
+
Optimum Habana provides covenient functionality for various tasks. Below is a command line snippet to run inference on Gaudi with meta-llama/Llama-2-7b-hf.
|
244 |
+
👍[Optimum Habana GitHub](https://github.com/huggingface/optimum-habana)
|
245 |
+
|
246 |
+
The "run_generation.py" script below can be found [here on GitHub](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
|
247 |
+
|
248 |
+
```bash
|
249 |
+
python run_generation.py \
|
250 |
+
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
251 |
+
--use_hpu_graphs \
|
252 |
+
--use_kv_cache \
|
253 |
+
--max_new_tokens 100 \
|
254 |
+
--do_sample \
|
255 |
+
--batch_size 2 \
|
256 |
+
--prompt "Hello world" "How are you?"
|
257 |
+
|
258 |
+
```
|
259 |
+
<hr>
|
260 |
|
261 |
+
# Intel Arc GPUs
|
262 |
+
You can learn more about Arc GPUs [here](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/arc.html).
|
263 |
|
264 |
+
Code snippets coming soon!
|
265 |
|
266 |
"""
|
info/programs.py
CHANGED
@@ -2,8 +2,7 @@ PROGRAMS_TEXT= """
|
|
2 |
# 👩💻 Developer Programs
|
3 |
|
4 |
Intel offers a range of programs to grant early, short, and long-term access to developers. A great way to build
|
5 |
-
and share models on the "Powered by Intel" LLM Leaderboard is to join one of these programs.
|
6 |
-
these opportunities below:
|
7 |
|
8 |
<hr>
|
9 |
|
@@ -14,7 +13,7 @@ helps you innovate and scale, no matter where you are in your entrepreneurial jo
|
|
14 |
Through Intel Liftoff, startups can access the computational power they need to build powerful LLMs on platforms
|
15 |
like Gaudi, Max Series GPUs, and Xeon Processors.
|
16 |
|
17 |
-
Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/liftoff.html
|
18 |
|
19 |
<hr>
|
20 |
|
@@ -41,4 +40,11 @@ environment for projects on the latest Intel technology and as a oneAPI expert,
|
|
41 |
others in the community and within Intel
|
42 |
|
43 |
Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/community/innovators/oneapi-innovator.html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
"""
|
|
|
2 |
# 👩💻 Developer Programs
|
3 |
|
4 |
Intel offers a range of programs to grant early, short, and long-term access to developers. A great way to build
|
5 |
+
and share models on the "Powered by Intel" LLM Leaderboard is to join one of these programs.
|
|
|
6 |
|
7 |
<hr>
|
8 |
|
|
|
13 |
Through Intel Liftoff, startups can access the computational power they need to build powerful LLMs on platforms
|
14 |
like Gaudi, Max Series GPUs, and Xeon Processors.
|
15 |
|
16 |
+
Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/liftoff.html.
|
17 |
|
18 |
<hr>
|
19 |
|
|
|
40 |
others in the community and within Intel
|
41 |
|
42 |
Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/community/innovators/oneapi-innovator.html
|
43 |
+
|
44 |
+
<hr>
|
45 |
+
|
46 |
+
## Intel DevHub Discord
|
47 |
+
|
48 |
+
Join 5000+ developers on the [Intel DevHub Discord](https://discord.gg/yNYNxK2k) to get support with your submission and talk about everything from GenAI, HPC, to Quantum Computing.
|
49 |
+
|
50 |
"""
|
info/submit.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
|
2 |
SUBMIT_TEXT = f"""
|
3 |
# 🏎️ Submit
|
4 |
-
Models added here will be queued for evaluation on the Intel Developer Cloud
|
5 |
-
We will work to create greater transperancy as our leaderboard community grows
|
6 |
|
7 |
## First steps before submitting a model
|
8 |
|
@@ -14,21 +14,23 @@ model = AutoModel.from_pretrained("your model name", revision=revision)
|
|
14 |
tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
|
15 |
```
|
16 |
If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
|
17 |
-
|
18 |
-
Note:
|
|
|
|
|
19 |
|
20 |
### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
|
21 |
-
It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer
|
22 |
|
23 |
-
### 3) Make sure your model has an open license
|
24 |
This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗 A good example of an open source license is apache-2.0.
|
25 |
-
Typically model licenses that are allow for commercial and research use tend to be the most attractive to other developers in the ecosystem
|
26 |
|
27 |
### 4) Fill up your model card
|
28 |
We use your model card to better understand the properties of your model and make them more easily discoverable for other users.
|
29 |
Model cards are required to have mentions of the hardware, software, and infrastructure used for training - without this information
|
30 |
we cannot accept your model as a valid submission. Remember, only models trained on these processors are eligle to participate in evaluation:
|
31 |
-
Intel® Gaudi Accelerators, Intel® Xeon® Processors, Intel® Data Center GPU Max Series, Intel® ARC GPUs, and Intel® Core Ultra
|
32 |
|
33 |
### 5) Select the correct precision
|
34 |
Not all models are converted properly from `float16` to `bfloat16`, and selecting the wrong precision can sometimes cause evaluation error (as loading a `bf16` model in `fp16` can sometimes generate NaNs, depending on the weight range).
|
|
|
1 |
|
2 |
SUBMIT_TEXT = f"""
|
3 |
# 🏎️ Submit
|
4 |
+
Models added here will be queued for evaluation on the Intel Developer Cloud ☁️. Depending on the queue, your model may take up to 10 days to show up on the leaderboard.
|
5 |
+
We will work to create greater transperancy as our leaderboard community grows.
|
6 |
|
7 |
## First steps before submitting a model
|
8 |
|
|
|
14 |
tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
|
15 |
```
|
16 |
If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
|
17 |
+
|
18 |
+
Note: Make sure your model is public!
|
19 |
+
|
20 |
+
Note: If your model needs `use_remote_code=True`, we do not support this option yet, but we are working on adding it.
|
21 |
|
22 |
### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
|
23 |
+
It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`.
|
24 |
|
25 |
+
### 3) Make sure your model has an open license.
|
26 |
This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗 A good example of an open source license is apache-2.0.
|
27 |
+
Typically model licenses that are allow for commercial and research use tend to be the most attractive to other developers in the ecosystem.
|
28 |
|
29 |
### 4) Fill up your model card
|
30 |
We use your model card to better understand the properties of your model and make them more easily discoverable for other users.
|
31 |
Model cards are required to have mentions of the hardware, software, and infrastructure used for training - without this information
|
32 |
we cannot accept your model as a valid submission. Remember, only models trained on these processors are eligle to participate in evaluation:
|
33 |
+
Intel® Gaudi Accelerators, Intel® Xeon® Processors, Intel® Data Center GPU Max Series, Intel® ARC GPUs, and Intel® Core Ultra,
|
34 |
|
35 |
### 5) Select the correct precision
|
36 |
Not all models are converted properly from `float16` to `bfloat16`, and selecting the wrong precision can sometimes cause evaluation error (as loading a `bf16` model in `fp16` can sometimes generate NaNs, depending on the weight range).
|
info/train_a_model.py
CHANGED
@@ -2,22 +2,21 @@
|
|
2 |
LLM_BENCHMARKS_TEXT = f"""
|
3 |
# 🧰 Train a Model
|
4 |
|
5 |
-
Intel offers a variety of platforms that can be used to train LLMs including
|
6 |
-
Below, you
|
7 |
|
8 |
## Intel Developer Cloud - Quick Start
|
9 |
The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
|
10 |
-
224 Core 4th Generation Xeon
|
11 |
-
1. Visit [
|
12 |
-
2. Navigate to the "Training" module under the "Software" section in the left panel
|
13 |
-
3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch"
|
14 |
-
4. Follow the instructions in the notebook to train your model using Intel® Data Center GPU Max 1100
|
15 |
-
5. Upload your model to the Hugging Face Model Hub
|
16 |
-
6. Go to the "Submit" tab follow instructions to
|
17 |
-
|
18 |
-
##
|
19 |
-
|
20 |
-
Below you will find a list of additional resources for training models on different intel hardware platforms:
|
21 |
- Intel® Gaudi® Accelerators
|
22 |
- [Parameter Efficient Fine-Tuning of Llama-2 70B](https://github.com/HabanaAI/Gaudi-tutorials/blob/main/PyTorch/llama2_fine_tuning_inference/llama2_fine_tuning_inference.ipynb)
|
23 |
- Intel® Xeon® Processors
|
@@ -25,13 +24,12 @@ Below you will find a list of additional resources for training models on differ
|
|
25 |
- [Fine-tuning Falcon 7B on Xeon Processors](https://medium.com/@eduand-alvarez/fine-tune-falcon-7-billion-on-xeon-cpus-with-hugging-face-and-oneapi-a25e10803a53)
|
26 |
- Intel® Data Center GPU Max Series
|
27 |
- [LLM Fine-tuning with QLoRA on Max Series GPUs](https://console.idcservice.net/training/detail/159c24e4-5598-3155-a790-2qv973tlm172)
|
28 |
-
## Submitting your Model to the Hub
|
29 |
-
Once you have trained your model, it is a straighforward process to upload and open source it on the Hugging Face Hub.
|
30 |
|
31 |
-
|
|
|
32 |
|
|
|
33 |
# Logging in to Hugging Face
|
34 |
-
|
35 |
from huggingface_hub import notebook_login, Repository
|
36 |
|
37 |
# Login to Hugging Face
|
@@ -49,8 +47,6 @@ model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path)
|
|
49 |
# Load the tokenizer
|
50 |
tokenizer = AutoTokenizer.from_pretrained("") #add name of your model's tokenizer on Hugging Face OR custom tokenizer
|
51 |
|
52 |
-
#Saving and Uploading the Model and Tokenizer
|
53 |
-
|
54 |
# Save the model and tokenizer
|
55 |
model_name_on_hub = "desired-model-name"
|
56 |
model.save_pretrained(model_name_on_hub)
|
@@ -61,10 +57,10 @@ model.push_to_hub(model_name_on_hub)
|
|
61 |
tokenizer.push_to_hub(model_name_on_hub)
|
62 |
|
63 |
# Congratulations! Your fine-tuned model is now uploaded to the Hugging Face Model Hub.
|
64 |
-
# You can view and share your model using its URL: https://huggingface.co
|
65 |
|
66 |
```
|
67 |
-
|
68 |
"""
|
69 |
|
70 |
SUBMIT_TEXT = f"""
|
|
|
2 |
LLM_BENCHMARKS_TEXT = f"""
|
3 |
# 🧰 Train a Model
|
4 |
|
5 |
+
Intel offers a variety of platforms that can be used to train LLMs including data center and consumer grade CPUs, GPUs, and ASICs.
|
6 |
+
Below, you can find documentation on how to access free and paid resources to train a model on Intel hardware and submit it to the Hugging Face Model Hub.
|
7 |
|
8 |
## Intel Developer Cloud - Quick Start
|
9 |
The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
|
10 |
+
224 Core 4th Generation Xeon Bare Metal nodes with 4x GPU Max Series 1100. To access these resources please follow the instructions below:
|
11 |
+
1. Visit the [Intel Developer Cloud](https://cloud.intel.com/) and sign up for the "Standard - Free" tier to get started.
|
12 |
+
2. Navigate to the "Training" module under the "Software" section in the left panel.
|
13 |
+
3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch".
|
14 |
+
4. Follow the instructions in the notebook to train your model using Intel® Data Center GPU Max 1100.
|
15 |
+
5. Upload your model to the Hugging Face Model Hub.
|
16 |
+
6. Go to the "Submit" tab on this Leaderboard and follow the instructions to submit your model.
|
17 |
+
|
18 |
+
## Training Code Samples
|
19 |
+
Below are some resources to get you started on training models on Intel platforms:
|
|
|
20 |
- Intel® Gaudi® Accelerators
|
21 |
- [Parameter Efficient Fine-Tuning of Llama-2 70B](https://github.com/HabanaAI/Gaudi-tutorials/blob/main/PyTorch/llama2_fine_tuning_inference/llama2_fine_tuning_inference.ipynb)
|
22 |
- Intel® Xeon® Processors
|
|
|
24 |
- [Fine-tuning Falcon 7B on Xeon Processors](https://medium.com/@eduand-alvarez/fine-tune-falcon-7-billion-on-xeon-cpus-with-hugging-face-and-oneapi-a25e10803a53)
|
25 |
- Intel® Data Center GPU Max Series
|
26 |
- [LLM Fine-tuning with QLoRA on Max Series GPUs](https://console.idcservice.net/training/detail/159c24e4-5598-3155-a790-2qv973tlm172)
|
|
|
|
|
27 |
|
28 |
+
## Submitting your Model to the Hugging Face Model Hub
|
29 |
+
Once your model is trained, it is a straighforward process to upload and open source it on the Hugging Face Model Hub. The commands from a Jupyter notebook are given below:
|
30 |
|
31 |
+
```python
|
32 |
# Logging in to Hugging Face
|
|
|
33 |
from huggingface_hub import notebook_login, Repository
|
34 |
|
35 |
# Login to Hugging Face
|
|
|
47 |
# Load the tokenizer
|
48 |
tokenizer = AutoTokenizer.from_pretrained("") #add name of your model's tokenizer on Hugging Face OR custom tokenizer
|
49 |
|
|
|
|
|
50 |
# Save the model and tokenizer
|
51 |
model_name_on_hub = "desired-model-name"
|
52 |
model.save_pretrained(model_name_on_hub)
|
|
|
57 |
tokenizer.push_to_hub(model_name_on_hub)
|
58 |
|
59 |
# Congratulations! Your fine-tuned model is now uploaded to the Hugging Face Model Hub.
|
60 |
+
# You can view and share your model using its URL: https://huggingface.co/<your-username>/<your-model-name>
|
61 |
|
62 |
```
|
63 |
+
Once your model is uploaded, make sure to update your model card, specifying your use of Intel software and hardware. Hugging Face has a great description on [how to build model cards here](https://huggingface.co/docs/hub/en/model-cards).
|
64 |
"""
|
65 |
|
66 |
SUBMIT_TEXT = f"""
|