hongbo-miao/hongbomiao.com

View on GitHub
cloud-platform/aws/aws-parallelcluster/config/hm-hpc-cluster-config.yaml

Summary

Maintainability
Test Coverage
# https://docs.aws.amazon.com/parallelcluster/latest/ug/HeadNode-v3.html
# https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html

---
Region: us-west-2
Image:
  Os: ubuntu2204
HeadNode:
  InstanceType: t4g.small
  Networking:
    # Public
    SubnetId: subnet-xxxxxxxxxxxxxxxxx
  Ssh:
    KeyName: hm-key-pair
Scheduling:
  Scheduler: slurm
  SlurmSettings:
    QueueUpdateStrategy: DRAIN
    EnableMemoryBasedScheduling: true
  SlurmQueues:
    - Name: spot-queue
      CapacityType: SPOT
      AllocationStrategy: capacity-optimized
      ComputeSettings:
        LocalStorage:
          RootVolume:
            VolumeType: gp3
      ComputeResources:
        - Name: c7gn-16xlarge
          Efa:
            Enabled: true
          InstanceType: c7gn.16xlarge
          MinCount: 0
          MaxCount: 5
        - Name: c7gn-metal
          Efa:
            Enabled: true
          InstanceType: c7gn.metal
          MinCount: 0
          MaxCount: 5
      Networking:
        # Private
        SubnetIds:
          - subnet-xxxxxxxxxxxxxxxxx
        PlacementGroup:
          Enabled: true
      Tags:
        - Key: Team
          Value: hongbomiao
        - Key: Environment
          Value: production
        - Key: ResourceName
          Value: spot-queue
    - Name: on-demand-queue
      CapacityType: ONDEMAND
      AllocationStrategy: lowest-price
      ComputeSettings:
        LocalStorage:
          RootVolume:
            VolumeType: gp3
      ComputeResources:
        - Name: c7gn-metal
          Efa:
            Enabled: true
          InstanceType: c7gn.metal
          MinCount: 0
          MaxCount: 5
      Networking:
        # Private
        SubnetIds:
          - subnet-xxxxxxxxxxxxxxxxx
        PlacementGroup:
          Enabled: true
      Tags:
        - Key: Team
          Value: hongbomiao
        - Key: Environment
          Value: production
        - Key: ResourceName
          Value: on-demand-queue