|
--- |
|
language: |
|
- en |
|
library_name: transformers |
|
datasets: |
|
- nilq/babylm-10M |
|
--- |
|
|
|
- GPT-2 model trained as ablation by team CLAUSE Bielefeld for the BabyLM challenge 2023 |
|
- for more info, please check out the paper |
|
|
|
Citation: |
|
``` |
|
@inproceedings{bunzeck-zarriess-2023-gpt, |
|
title = "{GPT}-wee: How Small Can a Small Language Model Really Get?", |
|
author = "Bunzeck, Bastian and |
|
Zarrie{\ss}, Sina", |
|
editor = "Warstadt, Alex and |
|
Mueller, Aaron and |
|
Choshen, Leshem and |
|
Wilcox, Ethan and |
|
Zhuang, Chengxu and |
|
Ciro, Juan and |
|
Mosquera, Rafael and |
|
Paranjabe, Bhargavi and |
|
Williams, Adina and |
|
Linzen, Tal and |
|
Cotterell, Ryan", |
|
booktitle = "Proceedings of the BabyLM Challenge at the 27th Conference on Computational Natural Language Learning", |
|
month = dec, |
|
year = "2023", |
|
address = "Singapore", |
|
publisher = "Association for Computational Linguistics", |
|
url = "https://aclanthology.org/2023.conll-babylm.2", |
|
doi = "10.18653/v1/2023.conll-babylm.2", |
|
pages = "35--46", |
|
} |
|
|
|
``` |