nicholasKluge
commited on
Commit
•
d559c5b
1
Parent(s):
479b3b6
Upload 16 files
Browse files- .gitattributes +1 -0
- Aira_emissions.csv +1 -1
- README.md +0 -150
- added_tokens.json +4 -2
- config.json +2 -2
- generation_config.json +1 -1
- model.safetensors +2 -2
- optimizer.pt +3 -0
- pytorch_model.bin +2 -2
- rng_state.pt +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +3 -2
- tokenizer_config.json +3 -2
- training_stats.parquet +2 -2
.gitattributes
CHANGED
@@ -25,6 +25,7 @@
|
|
25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
28 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
|
|
25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
Aira_emissions.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
|
2 |
-
2023-
|
|
|
1 |
timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
|
2 |
+
2023-08-26T00:29:47,Aira_emissions,a4308eb3-4ff0-45d9-8b3a-17d86a661b19,4563.063962221146,0.2556931051047661,5.603539797419437e-05,42.5,355.2915830996542,31.305280208587646,0.05386941181470952,0.430489313280064,0.03967020713370335,0.5240289322284765,Singapore,SGP,,,,Linux-5.15.109+-x86_64-with-glibc2.35,3.10.12,2.3.1,12,Intel(R) Xeon(R) CPU @ 2.20GHz,1,1 x NVIDIA A100-SXM4-40GB,103.8547,1.2929,83.48074722290039,machine,N,1.0
|
README.md
CHANGED
@@ -1,153 +1,3 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
-
datasets:
|
4 |
-
- Dahoas/synthetic-instruct-gptj-pairwise
|
5 |
-
- databricks/databricks-dolly-15k
|
6 |
-
- HuggingFaceH4/instruction-dataset
|
7 |
-
- nicholasKluge/instruct-aira-dataset
|
8 |
-
language:
|
9 |
-
- en
|
10 |
-
metrics:
|
11 |
-
- rouge
|
12 |
-
library_name: transformers
|
13 |
-
tags:
|
14 |
-
- alignment
|
15 |
-
- instruction tuned
|
16 |
-
- text generation
|
17 |
-
- conversation
|
18 |
-
- assistant
|
19 |
-
pipeline_tag: text-generation
|
20 |
-
widget:
|
21 |
-
- text: <|startoftext|>Hello! What is your name?<|endoftext|>
|
22 |
-
example_title: Greetings
|
23 |
-
- text: <|startoftext|>Can you explain what is Machine Learning?<|endoftext|>
|
24 |
-
example_title: Machine Learning
|
25 |
-
- text: <|startoftext|>Do you know anything about virtue ethics?<|endoftext|>
|
26 |
-
example_title: Ethics
|
27 |
-
- text: <|startoftext|>How can I make my girlfried happy?<|endoftext|>
|
28 |
-
example_title: Advise
|
29 |
-
inference:
|
30 |
-
parameters:
|
31 |
-
repetition_penalty: 1.2
|
32 |
-
temperature: 0.2
|
33 |
-
top_k: 30
|
34 |
-
top_p: 0.3
|
35 |
-
max_length: 200
|
36 |
-
length_penalty: 0.3
|
37 |
-
early_stopping: true
|
38 |
-
co2_eq_emissions:
|
39 |
-
emissions: 0_19
|
40 |
-
source: "CodeCarbon"
|
41 |
-
training_type: "fine-tuning"
|
42 |
-
geographical_location: "United States of America"
|
43 |
-
hardware_used: "NVIDIA A100-SXM4-40GB"
|
44 |
---
|
45 |
-
# Aira-Instruct-124M
|
46 |
-
|
47 |
-
`Aira-Instruct-124M` is a instruction-tuned GPT-style model based on [GPT-2](https://huggingface.co/gpt2). The model was trained with a dataset composed of `prompt`, `completions`, generated via the [Self-Instruct](https://github.com/yizhongw/self-instruct) framework. `Aira-Instruct-124M` instruction-tuning was achieved via conditional text generation.
|
48 |
-
|
49 |
-
The dataset used to train this model combines the following sources of data: the [`synthetic-instruct-gptj-pairwise`](https://huggingface.co/datasets/Dahoas/synthetic-instruct-gptj-pairwise) dataset, the [`databricks_dolly_15k`](https://huggingface.co/datasets/HuggingFaceH4/databricks_dolly_15k) dataset, the [`instruction-dataset`](https://huggingface.co/datasets/HuggingFaceH4/instruction-dataset) dataset, and a subset of [Aira's](https://github.com/Nkluge-correa/Aira-EXPERT) fine-tuning dataset, focused on Q&A related to Ethics, AI, AI safety, and other related topics. The dataset is available in both Portuguese and English.
|
50 |
-
|
51 |
-
Check our gradio-demo in [Spaces](https://huggingface.co/spaces/nicholasKluge/Aira-Demo).
|
52 |
-
|
53 |
-
## Details
|
54 |
-
|
55 |
-
- **Size:** 124,441,344 parameters
|
56 |
-
- **Dataset:** [Instruct-Aira Dataset](https://huggingface.co/datasets/nicholasKluge/instruct-aira-dataset)
|
57 |
-
- **Language:** English
|
58 |
-
- **Number of Epochs:** 5
|
59 |
-
- **Batch size:** 32
|
60 |
-
- **Optimizer:** `torch.optim.AdamW` (warmup_steps = 1e2, learning_rate = 5e-4, epsilon = 1e-8)
|
61 |
-
- **GPU:** 1 NVIDIA A100-SXM4-40GB
|
62 |
-
- **Emissions:** 0.19 KgCO2 (United States of America)
|
63 |
-
- **Total Energy Consumption:** 0.42 kWh
|
64 |
-
|
65 |
-
| Epoch|Training Loss|Validation Loss|
|
66 |
-
|---|---|---|
|
67 |
-
| 1 |1.076073|0.692127|
|
68 |
-
| 2 |0.680394|0.662053|
|
69 |
-
| 3 |0.622054|0.651161|
|
70 |
-
| 4 |0.577170|0.644864|
|
71 |
-
| 5 |0.541509|0.644677|
|
72 |
-
|
73 |
-
This repository has the notebook used to train this model.
|
74 |
-
|
75 |
-
## Usage
|
76 |
-
|
77 |
-
Two special tokens are used to mark the user side of the interaction and the model's response:
|
78 |
-
|
79 |
-
`<|startoftext|>`What is a language model?`<|endoftext|>`A language model is a probability distribution over a vocabulary.`<|endoftext|>`
|
80 |
-
|
81 |
-
```python
|
82 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
83 |
-
import torch
|
84 |
-
|
85 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
86 |
-
|
87 |
-
tokenizer = AutoTokenizer.from_pretrained('nicholasKluge/Aira-Instruct-124M')
|
88 |
-
aira = AutoModelForCausalLM.from_pretrained('nicholasKluge/Aira-Instruct-124M')
|
89 |
-
|
90 |
-
aira.eval()
|
91 |
-
aira.to(device)
|
92 |
-
|
93 |
-
question = input("Enter your question: ")
|
94 |
-
|
95 |
-
inputs = tokenizer(tokenizer.bos_token + question + tokenizer.eos_token, return_tensors="pt").to(device)
|
96 |
-
|
97 |
-
responses = aira.generate(**inputs,
|
98 |
-
bos_token_id=tokenizer.bos_token_id,
|
99 |
-
pad_token_id=tokenizer.pad_token_id,
|
100 |
-
eos_token_id=tokenizer.eos_token_id,
|
101 |
-
do_sample=True,
|
102 |
-
top_k=50,
|
103 |
-
max_length=200,
|
104 |
-
top_p=0.95,
|
105 |
-
temperature=0.7,
|
106 |
-
num_return_sequences=2)
|
107 |
-
|
108 |
-
print(f"Question: 👤 {question}\n")
|
109 |
-
|
110 |
-
for i, response in enumerate(responses):
|
111 |
-
print(f'Response {i+1}: 🤖 {tokenizer.decode(response, skip_special_tokens=True).replace(question, "")}')
|
112 |
-
```
|
113 |
-
|
114 |
-
The model will output something like:
|
115 |
-
|
116 |
-
```markdown
|
117 |
-
>>> Question: 👤 Hello! What is your name?
|
118 |
-
|
119 |
-
>>>Response 1: 🤖 Hi there! I am Aira, a chatbot designed to answer questions about AI ethics and AI safety. If you need assistance navigating our conversation, please feel free to ask!
|
120 |
-
>>>Response 2: 🤖 Hi there! My name is Aira, and I'm a chatbot designed to answer questions related to AI ethics and AI Safety. If you need assistance, feel free to ask, and I'll be happy to help you out.
|
121 |
-
```
|
122 |
-
|
123 |
-
## Limitations
|
124 |
-
|
125 |
-
🤥 Generative models can perpetuate the generation of pseudo-informative content, that is, false information that may appear truthful.
|
126 |
-
|
127 |
-
🤬 In certain types of tasks, generative models can produce harmful and discriminatory content inspired by historical stereotypes.
|
128 |
-
|
129 |
-
## Evaluation
|
130 |
-
|
131 |
-
| Model|Average|[ARC](https://arxiv.org/abs/1803.05457)|[HellaSwag](https://arxiv.org/abs/1905.07830)|[MMLU](https://arxiv.org/abs/2009.03300)|[TruthfulQA](https://arxiv.org/abs/2109.07958)|
|
132 |
-
|---|---|---|---|---|---|
|
133 |
-
| [Aira-Instruct-124M](https://huggingface.co/nicholasKluge/Aira-Instruct-124M) |**29.36**|23.55|30.82|25.13|37.94|
|
134 |
-
|
135 |
-
## Cite as 🤗
|
136 |
-
|
137 |
-
```latex
|
138 |
-
|
139 |
-
@misc{nicholas22aira,
|
140 |
-
doi = {10.5281/zenodo.6989727},
|
141 |
-
url = {https://huggingface.co/nicholasKluge/Aira-Instruct-124M},
|
142 |
-
author = {Nicholas Kluge Corrêa and Carolina Del Pino},
|
143 |
-
title = {Aira},
|
144 |
-
year = {2023},
|
145 |
-
publisher = {HuggingFace},
|
146 |
-
journal = {HuggingFace repository},
|
147 |
-
}
|
148 |
-
|
149 |
-
```
|
150 |
-
|
151 |
-
## License
|
152 |
-
|
153 |
-
The `Aira-Instruct-124M` is licensed under the Apache License, Version 2.0. See the [LICENSE](LICENSE) file for more details.
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
added_tokens.json
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
{
|
2 |
-
"<|
|
3 |
-
"<|
|
|
|
|
|
4 |
}
|
|
|
1 |
{
|
2 |
+
"<|endofcompletion|>": 50258,
|
3 |
+
"<|endofinstruction|>": 50259,
|
4 |
+
"<|pad|>": 50260,
|
5 |
+
"<|startofinstruction|>": 50257
|
6 |
}
|
config.json
CHANGED
@@ -33,7 +33,7 @@
|
|
33 |
}
|
34 |
},
|
35 |
"torch_dtype": "float32",
|
36 |
-
"transformers_version": "4.
|
37 |
"use_cache": true,
|
38 |
-
"vocab_size":
|
39 |
}
|
|
|
33 |
}
|
34 |
},
|
35 |
"torch_dtype": "float32",
|
36 |
+
"transformers_version": "4.32.0",
|
37 |
"use_cache": true,
|
38 |
+
"vocab_size": 50261
|
39 |
}
|
generation_config.json
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 50256,
|
4 |
"eos_token_id": 50256,
|
5 |
-
"transformers_version": "4.
|
6 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 50256,
|
4 |
"eos_token_id": 50256,
|
5 |
+
"transformers_version": "4.32.0"
|
6 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0eb9554ba389122dfc381e5cc4343225548f608b08291b4262d24357816ddf3d
|
3 |
+
size 497786496
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecefeefd8fe6f9919855d1624d77968ce405086707c5221d09edb7542216af29
|
3 |
+
size 649096325
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e97b04660e4787d245319f53b5a20493df4eadde1eb3572e3d1ff8f400f5317
|
3 |
+
size 497819485
|
rng_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:931e1966ae9d5ac3455e4898b0ab5f71c8438c50501aac2fa870cdd0176e7b79
|
3 |
+
size 5809
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:971be8fe543936607759f2c265ff70b5de91370f7d741aa2cc8a685bc923c2c3
|
3 |
+
size 563
|
special_tokens_map.json
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
{
|
2 |
"bos_token": {
|
3 |
-
"content": "<|
|
4 |
"lstrip": false,
|
5 |
"normalized": true,
|
6 |
"rstrip": false,
|
7 |
"single_word": false
|
8 |
},
|
9 |
"eos_token": {
|
10 |
-
"content": "<|
|
11 |
"lstrip": false,
|
12 |
"normalized": true,
|
13 |
"rstrip": false,
|
@@ -20,6 +20,7 @@
|
|
20 |
"rstrip": false,
|
21 |
"single_word": false
|
22 |
},
|
|
|
23 |
"unk_token": {
|
24 |
"content": "<|endoftext|>",
|
25 |
"lstrip": false,
|
|
|
1 |
{
|
2 |
"bos_token": {
|
3 |
+
"content": "<|startofinstruction|>",
|
4 |
"lstrip": false,
|
5 |
"normalized": true,
|
6 |
"rstrip": false,
|
7 |
"single_word": false
|
8 |
},
|
9 |
"eos_token": {
|
10 |
+
"content": "<|endofcompletion|>",
|
11 |
"lstrip": false,
|
12 |
"normalized": true,
|
13 |
"rstrip": false,
|
|
|
20 |
"rstrip": false,
|
21 |
"single_word": false
|
22 |
},
|
23 |
+
"sep_token": "<|endofinstruction|>",
|
24 |
"unk_token": {
|
25 |
"content": "<|endoftext|>",
|
26 |
"lstrip": false,
|
tokenizer_config.json
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
"add_prefix_space": false,
|
4 |
"bos_token": {
|
5 |
"__type": "AddedToken",
|
6 |
-
"content": "<|
|
7 |
"lstrip": false,
|
8 |
"normalized": true,
|
9 |
"rstrip": false,
|
@@ -12,7 +12,7 @@
|
|
12 |
"clean_up_tokenization_spaces": true,
|
13 |
"eos_token": {
|
14 |
"__type": "AddedToken",
|
15 |
-
"content": "<|
|
16 |
"lstrip": false,
|
17 |
"normalized": true,
|
18 |
"rstrip": false,
|
@@ -28,6 +28,7 @@
|
|
28 |
"rstrip": false,
|
29 |
"single_word": false
|
30 |
},
|
|
|
31 |
"tokenizer_class": "GPT2Tokenizer",
|
32 |
"unk_token": {
|
33 |
"__type": "AddedToken",
|
|
|
3 |
"add_prefix_space": false,
|
4 |
"bos_token": {
|
5 |
"__type": "AddedToken",
|
6 |
+
"content": "<|startofinstruction|>",
|
7 |
"lstrip": false,
|
8 |
"normalized": true,
|
9 |
"rstrip": false,
|
|
|
12 |
"clean_up_tokenization_spaces": true,
|
13 |
"eos_token": {
|
14 |
"__type": "AddedToken",
|
15 |
+
"content": "<|endofcompletion|>",
|
16 |
"lstrip": false,
|
17 |
"normalized": true,
|
18 |
"rstrip": false,
|
|
|
28 |
"rstrip": false,
|
29 |
"single_word": false
|
30 |
},
|
31 |
+
"sep_token": "<|endofinstruction|>",
|
32 |
"tokenizer_class": "GPT2Tokenizer",
|
33 |
"unk_token": {
|
34 |
"__type": "AddedToken",
|
training_stats.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46f316fc121507952a383f83d83f7afc0dfeafe83c1162e799ba756fb19798b6
|
3 |
+
size 3106
|