configs/gpt_neo_tiny_v2.json from IlyaGusev/rulm

configs/gpt_neo_tiny_v2.json
Summary

Maintainability

Test Coverage

Issues
{
    "block_size": 512,
    "model": {
        "type": "EleutherAI/gpt-neo-125M",
        "num_layers": 6,
        "attention_dropout": 0.0,
        "embed_dropout": 0.0,
        "attention_layers": [
            "global",
            "local",
            "global",
            "local",
            "global",
            "local"
        ],
        "attention_types": [
            [
                [
                    "global",
                    "local"
                ],
                3
            ]
        ],
        "num_heads": 8,
        "hidden_size": 512,
        "max_position_embeddings": 512,
        "use_cache": true
    },
    "trainer": {
        "evaluation_strategy": "steps",
        "per_device_train_batch_size": 51,
        "per_device_eval_batch_size": 51,
        "gradient_accumulation_steps": 7,
        "eval_steps": 2000,
        "save_steps": 2000,
        "logging_steps": 500,
        "learning_rate": 0.0006,
        "num_train_epochs": null,
        "max_steps": 20000,
        "lr_scheduler_type": "cosine",
        "warmup_steps": 2000,
        "weight_decay": 0.01,
        "fp16": true,
        "bf16": false,
        "torch_compile": true,
        "gradient_checkpointing": false,
        "optim": "adamw_apex_fused",
        "half_precision_backend": "auto",
        "fp16_opt_level": "O2"
    }
}