nicholasKluge
commited on
Commit
•
33a5d22
1
Parent(s):
fbf7853
Upload 17 files
Browse files- AIRA_FineTuning.ipynb +0 -0
- Aira_emissions.csv +1 -1
- README.md +28 -37
- added_tokens.json +4 -2
- config.json +2 -2
- generation_config.json +1 -1
- model.safetensors +2 -2
- optimizer.pt +3 -0
- pytorch_model.bin +2 -2
- rng_state.pt +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +3 -2
- tokenizer_config.json +3 -2
- training_stats.parquet +2 -2
AIRA_FineTuning.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
Aira_emissions.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
|
2 |
-
2023-
|
|
|
1 |
timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
|
2 |
+
2023-09-05T14:47:28,Aira_emissions,cbb40a59-d1e6-4364-8dc4-b409f729cfa6,6527.2822506427765,0.35849483705483043,5.492252721559995e-05,42.5,333.6809648058492,31.305280208587646,0.077058052478234,0.600908513781982,0.056748852031142355,0.7347154182913581,Singapore,SGP,,,,Linux-5.15.109+-x86_64-with-glibc2.35,3.10.12,2.3.1,12,Intel(R) Xeon(R) CPU @ 2.20GHz,1,1 x NVIDIA A100-SXM4-40GB,103.8547,1.2929,83.48074722290039,machine,N,1.0
|
README.md
CHANGED
@@ -1,14 +1,11 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
datasets:
|
4 |
-
- Dahoas/synthetic-instruct-gptj-pairwise
|
5 |
-
- databricks/databricks-dolly-15k
|
6 |
-
- HuggingFaceH4/instruction-dataset
|
7 |
- nicholasKluge/instruct-aira-dataset
|
8 |
language:
|
9 |
- pt
|
10 |
metrics:
|
11 |
-
-
|
12 |
library_name: transformers
|
13 |
tags:
|
14 |
- alignment
|
@@ -18,15 +15,13 @@ tags:
|
|
18 |
- assistant
|
19 |
pipeline_tag: text-generation
|
20 |
widget:
|
21 |
-
- text: <|
|
22 |
example_title: Olá
|
23 |
-
- text:
|
24 |
-
|
25 |
-
|
26 |
-
example_title:
|
27 |
-
- text: <|
|
28 |
-
example_title: Ética das virtudes
|
29 |
-
- text: <|startoftext|>O que posso fazer para alegrar minha namorada?<|endoftext|>
|
30 |
example_title: Conselho
|
31 |
inference:
|
32 |
parameters:
|
@@ -37,14 +32,18 @@ inference:
|
|
37 |
max_length: 200
|
38 |
length_penalty: 0.3
|
39 |
early_stopping: true
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
---
|
41 |
-
# Aira-
|
42 |
|
43 |
-
`Aira-
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
Check our gradio-demo in [Spaces](https://huggingface.co/spaces/nicholasKluge/Aira-Demo).
|
48 |
|
49 |
## Details
|
50 |
|
@@ -52,27 +51,19 @@ Check our gradio-demo in [Spaces](https://huggingface.co/spaces/nicholasKluge/Ai
|
|
52 |
- **Dataset:** [Instruct-Aira Dataset](https://huggingface.co/datasets/nicholasKluge/instruct-aira-dataset)
|
53 |
- **Language:** Portuguese
|
54 |
- **Number of Epochs:** 5
|
55 |
-
- **Batch size:**
|
56 |
- **Optimizer:** `torch.optim.AdamW` (warmup_steps = 1e2, learning_rate = 5e-4, epsilon = 1e-8)
|
57 |
- **GPU:** 1 NVIDIA A100-SXM4-40GB
|
58 |
-
- **Emissions:** 0.
|
59 |
-
- **Total Energy Consumption:** 0.
|
60 |
-
|
61 |
-
| Epoch/Loss|Training|Validation|
|
62 |
-
|---|---|---|
|
63 |
-
| 1 |0.947100|0.774946|
|
64 |
-
| 2 |0.737357|0.730962|
|
65 |
-
| 3 |0.657410|0.710232|
|
66 |
-
| 4 |0.597437|0.705064|
|
67 |
-
| 5 |0.551684|0.704830|
|
68 |
|
69 |
-
This repository has the notebook used to train this model.
|
70 |
|
71 |
## Usage
|
72 |
|
73 |
-
|
74 |
|
75 |
-
`<|
|
76 |
|
77 |
```python
|
78 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
@@ -80,8 +71,8 @@ import torch
|
|
80 |
|
81 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
82 |
|
83 |
-
tokenizer = AutoTokenizer.from_pretrained('nicholasKluge/Aira-
|
84 |
-
aira = AutoModelForCausalLM.from_pretrained('nicholasKluge/Aira-
|
85 |
|
86 |
aira.eval()
|
87 |
aira.to(device)
|
@@ -110,10 +101,10 @@ for i, response in enumerate(responses):
|
|
110 |
The model will output something like:
|
111 |
|
112 |
```markdown
|
113 |
-
>>> Question: 👤
|
114 |
|
115 |
-
>>>Response 1: 🤖
|
116 |
-
>>>Response 2: 🤖
|
117 |
```
|
118 |
|
119 |
## Limitations
|
@@ -140,4 +131,4 @@ The model will output something like:
|
|
140 |
|
141 |
## License
|
142 |
|
143 |
-
The `Aira-
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
datasets:
|
|
|
|
|
|
|
4 |
- nicholasKluge/instruct-aira-dataset
|
5 |
language:
|
6 |
- pt
|
7 |
metrics:
|
8 |
+
- accuracy
|
9 |
library_name: transformers
|
10 |
tags:
|
11 |
- alignment
|
|
|
15 |
- assistant
|
16 |
pipeline_tag: text-generation
|
17 |
widget:
|
18 |
+
- text: "<|startofinstruction|>Olá! Como você se chama?<|endofinstruction|>"
|
19 |
example_title: Olá
|
20 |
+
- text: "<|startofinstruction|>Você pode me explicar o que é Aprendizagem de Máquina?<|endofinstruction|>"
|
21 |
+
example_title: Aprendizagem de Máquina
|
22 |
+
- text: "<|startofinstruction|>Você sabe alguma coisa sobre Ética das Virtudes?<|endofinstruction|>"
|
23 |
+
example_title: Ética
|
24 |
+
- text: "<|startofinstruction|>Como eu posso fazer a minha namorada feliz?<|endofinstruction|>"
|
|
|
|
|
25 |
example_title: Conselho
|
26 |
inference:
|
27 |
parameters:
|
|
|
32 |
max_length: 200
|
33 |
length_penalty: 0.3
|
34 |
early_stopping: true
|
35 |
+
co2_eq_emissions:
|
36 |
+
emissions: 0.35
|
37 |
+
source: CodeCarbon
|
38 |
+
training_type: fine-tuning
|
39 |
+
geographical_location: Singapore
|
40 |
+
hardware_used: NVIDIA A100-SXM4-40GB
|
41 |
---
|
42 |
+
# Aira-2-portuguese-124M
|
43 |
|
44 |
+
`Aira-2-portuguese-124M` is the second version of the Aira instruction-tuned series. iAira is an instruction-tuned GPT-style model based on [GPT-2](https://huggingface.co/pierreguillou/gpt2-small-portuguese). The model was trained with a dataset composed of prompt, completions generated synthetically by prompting already-tuned models (ChatGPT, Llama, Open-Assistant, etc).
|
45 |
|
46 |
+
Check our gradio-demo in [Spaces](https://huggingface.co/spaces/nicholasKluge/Aira-Demo-Portuguese).
|
|
|
|
|
47 |
|
48 |
## Details
|
49 |
|
|
|
51 |
- **Dataset:** [Instruct-Aira Dataset](https://huggingface.co/datasets/nicholasKluge/instruct-aira-dataset)
|
52 |
- **Language:** Portuguese
|
53 |
- **Number of Epochs:** 5
|
54 |
+
- **Batch size:** 24
|
55 |
- **Optimizer:** `torch.optim.AdamW` (warmup_steps = 1e2, learning_rate = 5e-4, epsilon = 1e-8)
|
56 |
- **GPU:** 1 NVIDIA A100-SXM4-40GB
|
57 |
+
- **Emissions:** 0.35 KgCO2 (Singapore)
|
58 |
+
- **Total Energy Consumption:** 0.73 kWh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
+
This repository has the [notebook](AIRA_FineTuning.ipynb) used to train this model.
|
61 |
|
62 |
## Usage
|
63 |
|
64 |
+
Three special tokens are used to mark the user side of the interaction and the model's response:
|
65 |
|
66 |
+
`<|startofinstruction|>`O que é um modelo de linguagem?`<|endofinstruction|>`Um modelo de linguagem é uma distribuição de probabilidade sobre um vocabulário.`<|endofcompletion|>`
|
67 |
|
68 |
```python
|
69 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
71 |
|
72 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
73 |
|
74 |
+
tokenizer = AutoTokenizer.from_pretrained('nicholasKluge/Aira-2-portuguese-124M')
|
75 |
+
aira = AutoModelForCausalLM.from_pretrained('nicholasKluge/Aira-2-portuguese-124M')
|
76 |
|
77 |
aira.eval()
|
78 |
aira.to(device)
|
|
|
101 |
The model will output something like:
|
102 |
|
103 |
```markdown
|
104 |
+
>>> Question: 👤 Qual a capital do Brasil?
|
105 |
|
106 |
+
>>>Response 1: 🤖 A capital do Brasil é Brasília.
|
107 |
+
>>>Response 2: 🤖 A capital do Brasil é Brasília.
|
108 |
```
|
109 |
|
110 |
## Limitations
|
|
|
131 |
|
132 |
## License
|
133 |
|
134 |
+
The `Aira-2-portuguese-124M` is licensed under the Apache License, Version 2.0. See the [LICENSE](LICENSE) file for more details.
|
added_tokens.json
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
{
|
2 |
-
"<|
|
3 |
-
"<|
|
|
|
|
|
4 |
}
|
|
|
1 |
{
|
2 |
+
"<|endofcompletion|>": 50258,
|
3 |
+
"<|endofinstruction|>": 50259,
|
4 |
+
"<|pad|>": 50260,
|
5 |
+
"<|startofinstruction|>": 50257
|
6 |
}
|
config.json
CHANGED
@@ -33,7 +33,7 @@
|
|
33 |
}
|
34 |
},
|
35 |
"torch_dtype": "float32",
|
36 |
-
"transformers_version": "4.
|
37 |
"use_cache": true,
|
38 |
-
"vocab_size":
|
39 |
}
|
|
|
33 |
}
|
34 |
},
|
35 |
"torch_dtype": "float32",
|
36 |
+
"transformers_version": "4.33.0",
|
37 |
"use_cache": true,
|
38 |
+
"vocab_size": 50261
|
39 |
}
|
generation_config.json
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 50256,
|
4 |
"eos_token_id": 50256,
|
5 |
-
"transformers_version": "4.
|
6 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 50256,
|
4 |
"eos_token_id": 50256,
|
5 |
+
"transformers_version": "4.33.0"
|
6 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:531f4ef9cb99dc3c7d2548cd1248f64d890303eb03de562861312c2b86844790
|
3 |
+
size 497786496
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ef28bdf2e6ad48551faa7696cafc16e0c5e3e9c2f51cca544a0fc3b11523a1f
|
3 |
+
size 649096325
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5de6e1e3302d8628784844b4f0b29c5fcc01dc4a80e2859bd87b1bc43eab7def
|
3 |
+
size 497819485
|
rng_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f709d0640d343650b38ff79894793392ca953fe40a2d1c7ac3adbb741ed50143
|
3 |
+
size 5809
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a37b5bfcf423368b830ec97a879837159978abb929495bf71afb2e98b98cf588
|
3 |
+
size 563
|
special_tokens_map.json
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
{
|
2 |
"bos_token": {
|
3 |
-
"content": "<|
|
4 |
"lstrip": false,
|
5 |
"normalized": true,
|
6 |
"rstrip": false,
|
7 |
"single_word": false
|
8 |
},
|
9 |
"eos_token": {
|
10 |
-
"content": "<|
|
11 |
"lstrip": false,
|
12 |
"normalized": true,
|
13 |
"rstrip": false,
|
@@ -20,5 +20,6 @@
|
|
20 |
"rstrip": false,
|
21 |
"single_word": false
|
22 |
},
|
|
|
23 |
"unk_token": "<|endoftext|>"
|
24 |
}
|
|
|
1 |
{
|
2 |
"bos_token": {
|
3 |
+
"content": "<|startofinstruction|>",
|
4 |
"lstrip": false,
|
5 |
"normalized": true,
|
6 |
"rstrip": false,
|
7 |
"single_word": false
|
8 |
},
|
9 |
"eos_token": {
|
10 |
+
"content": "<|endofcompletion|>",
|
11 |
"lstrip": false,
|
12 |
"normalized": true,
|
13 |
"rstrip": false,
|
|
|
20 |
"rstrip": false,
|
21 |
"single_word": false
|
22 |
},
|
23 |
+
"sep_token": "<|endofinstruction|>",
|
24 |
"unk_token": "<|endoftext|>"
|
25 |
}
|
tokenizer_config.json
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
"add_prefix_space": false,
|
4 |
"bos_token": {
|
5 |
"__type": "AddedToken",
|
6 |
-
"content": "<|
|
7 |
"lstrip": false,
|
8 |
"normalized": true,
|
9 |
"rstrip": false,
|
@@ -12,7 +12,7 @@
|
|
12 |
"clean_up_tokenization_spaces": true,
|
13 |
"eos_token": {
|
14 |
"__type": "AddedToken",
|
15 |
-
"content": "<|
|
16 |
"lstrip": false,
|
17 |
"normalized": true,
|
18 |
"rstrip": false,
|
@@ -29,6 +29,7 @@
|
|
29 |
"rstrip": false,
|
30 |
"single_word": false
|
31 |
},
|
|
|
32 |
"tokenizer_class": "GPT2Tokenizer",
|
33 |
"unk_token": {
|
34 |
"__type": "AddedToken",
|
|
|
3 |
"add_prefix_space": false,
|
4 |
"bos_token": {
|
5 |
"__type": "AddedToken",
|
6 |
+
"content": "<|startofinstruction|>",
|
7 |
"lstrip": false,
|
8 |
"normalized": true,
|
9 |
"rstrip": false,
|
|
|
12 |
"clean_up_tokenization_spaces": true,
|
13 |
"eos_token": {
|
14 |
"__type": "AddedToken",
|
15 |
+
"content": "<|endofcompletion|>",
|
16 |
"lstrip": false,
|
17 |
"normalized": true,
|
18 |
"rstrip": false,
|
|
|
29 |
"rstrip": false,
|
30 |
"single_word": false
|
31 |
},
|
32 |
+
"sep_token": "<|endofinstruction|>",
|
33 |
"tokenizer_class": "GPT2Tokenizer",
|
34 |
"unk_token": {
|
35 |
"__type": "AddedToken",
|
training_stats.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fd2182fce4c23b82a8ddbaf441ebcca1c89cae6bf92c1283572fe28a0e7a2d0
|
3 |
+
size 2355
|