example.ipynb
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example of simple `sparkhpc` usage in the Jupyter notebook"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure python for using the `spark` python libraries with `findspark`"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import findspark; findspark.init()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Launch the standalone spark clusters using `sparkhpc`"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import sparkhpc"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:sparkhpc:Submitted cluster 0\n"
]
}
],
"source": [
"sj = sparkhpc.sparkjob.LSFSparkJob(ncores=4)\n",
"sj.wait_to_start()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<tr>\n",
" <th>Job ID</th>\n",
" <th>Number of cores</th>\n",
" <th>Status</th>\n",
" <th>Spark UI</th>\n",
" <th>Spark URL</th>\n",
" </tr>\n",
" <td>33084182</td>\n",
" <td>4</td>\n",
" <td>running</td>\n",
" <td><a target=\"_blank\" href=\"http://spark.master:8080\">http://spark.master:8080</a></td>\n",
" <td>spark://spark.master:7077</td>\n",
" "
],
"text/plain": [
"<sparkhpc.sparkjob.LSFSparkJob at 0x2aaf3c0237d0>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sj"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:sparkhpc:Submitted cluster 1\n"
]
}
],
"source": [
"sj2 = sparkhpc.sparkjob.LSFSparkJob(ncores=10)\n",
"sj2.submit()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<tr><td>ClusterID</td>\n",
" <th>Job ID</th>\n",
" <th>Number of cores</th>\n",
" <th>Status</th>\n",
" <th>Spark UI</th>\n",
" <th>Spark URL</th>\n",
" </tr><tr><td>0</td>\n",
" <td>33084182</td>\n",
" <td>4</td>\n",
" <td>running</td>\n",
" <td><a target=\"_blank\" href=\"http://spark.master:8080\">http://spark.master:8080</a></td>\n",
" <td>spark://spark.master:7077</td>\n",
" </tr><tr><td>1</td>\n",
" <td>33084664</td>\n",
" <td>10</td>\n",
" <td>submitted</td>\n",
" <td><a target=\"_blank\" href=\"None\">None</a></td>\n",
" <td>None</td>\n",
" </tr>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sj.show_clusters()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create a `SparkContext` and start computing"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pyspark import SparkContext"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sc = SparkContext(master=sj.master_url)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"100"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sc.parallelize(range(100)).count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Teardown"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:sparkhpc:Job <33084182> is being terminated\n",
"\n",
"INFO:sparkhpc:Job <33084664> is being terminated\n",
"\n"
]
}
],
"source": [
"sj.stop()\n",
"sj2.stop()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<tr><td>ClusterID</td>\n",
" <th>Job ID</th>\n",
" <th>Number of cores</th>\n",
" <th>Status</th>\n",
" <th>Spark UI</th>\n",
" <th>Spark URL</th>\n",
" </tr><tr><td>0</td>\n",
" <td>33084182</td>\n",
" <td>4</td>\n",
" <td>running</td>\n",
" <td><a target=\"_blank\" href=\"http://10.205.11.34:8080\">http://10.205.11.34:8080</a></td>\n",
" <td>spark://spark.master:7077</td>\n",
" </tr><tr><td>1</td>\n",
" <td>33084664</td>\n",
" <td>10</td>\n",
" <td>submitted</td>\n",
" <td><a target=\"_blank\" href=\"None\">None</a></td>\n",
" <td>None</td>\n",
" </tr>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sj.show_clusters()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}