tim-lawson's picture
Push model using huggingface_hub.
c490cdf verified
raw
history blame contribute delete
431 Bytes
{
"accumulate_grad_batches": 64,
"autoencoder": null,
"auxk": 256,
"auxk_coef": 0.03125,
"batch_size": 1,
"dead_steps_threshold": null,
"dead_threshold": 0.001,
"dead_tokens_threshold": 10000000,
"expansion_factor": 32,
"k": 32,
"layers": null,
"lr": 0.0001,
"max_length": 2048,
"model_name": "EleutherAI/pythia-70m-deduped",
"skip_special_tokens": true,
"standardize": true,
"transformer": null
}