configs/gpt_neo_small_v1.json
{
"block_size": 512,
"model": {
"type": "EleutherAI/gpt-neo-125M",
"attention_dropout": 0.0,
"embed_dropout": 0.0,
"max_position_embeddings": 512,
"use_cache": true
},
"trainer": {
"evaluation_strategy": "steps",
"per_device_train_batch_size": 22,
"per_device_eval_batch_size": 22,
"gradient_accumulation_steps": 16,
"eval_steps": 2000,
"save_steps": 2000,
"logging_steps": 500,
"learning_rate": 0.0006,
"num_train_epochs": null,
"max_steps": 30000,
"lr_scheduler_type": "cosine",
"warmup_steps": 1000,
"weight_decay": 0.01,
"fp16": true,
"bf16": false,
"gradient_checkpointing": false,
"optim": "adamw_apex_fused",
"half_precision_backend": "auto",
"fp16_opt_level": "O2"
}
}