IlyaGusev/rulm

View on GitHub
configs/gpt_neo_small_v1.json

Summary

Maintainability
Test Coverage
{
    "block_size": 512,
    "model": {
        "type": "EleutherAI/gpt-neo-125M",
        "attention_dropout": 0.0,
        "embed_dropout": 0.0,
        "max_position_embeddings": 512,
        "use_cache": true
    },
    "trainer": {
        "evaluation_strategy": "steps",
        "per_device_train_batch_size": 22,
        "per_device_eval_batch_size": 22,
        "gradient_accumulation_steps": 16,
        "eval_steps": 2000,
        "save_steps": 2000,
        "logging_steps": 500,
        "learning_rate": 0.0006,
        "num_train_epochs": null,
        "max_steps": 30000,
        "lr_scheduler_type": "cosine",
        "warmup_steps": 1000,
        "weight_decay": 0.01,
        "fp16": true,
        "bf16": false,
        "gradient_checkpointing": false,
        "optim": "adamw_apex_fused",
        "half_precision_backend": "auto",
        "fp16_opt_level": "O2"
    }
}