rokroskar/sparkhpc

View on GitHub
sparkhpc/templates/sparkjob.slurm.template

Summary

Maintainability
Test Coverage
#!/bin/env python
#SBATCH -J {jobname}
#SBATCH -t {walltime} # runtime to request !!! in minutes !!!
#SBATCH -o {jobname}-%J.log # output extra o means overwrite
#SBATCH -n {number_of_executors:d} # requesting n tasks
#SBATCH -c {cores_per_executor:d}
#SBATCH --mem-per-cpu={memory_per_core:d} 
#SBATCH -N {number_of_executors:d}
#SBATCH --ntasks-per-core=1

# setup the spark paths
import os
os.environ['SPARK_HOME']='{spark_home}'
os.environ['SPARK_LOCAL_DIRS']='/tmp'
os.environ['LOCAL_DIRS']=os.environ['SPARK_LOCAL_DIRS']
os.environ['SPARK_WORKER_DIR']=os.path.join(os.environ['SPARK_LOCAL_DIRS'], 'work')

from sparkhpc import sparkjob

sparkjob.start_cluster('{memory_per_executor}M', 
                       cores_per_executor={cores_per_executor}, 
                       spark_home='{spark_home}',
                       master_log_dir='{master_log_dir}',
                       master_log_filename='{master_log_filename}')