mrmrcoleman/Hyper Apache Spark with Jupyter Notebook

## Hyper Apache Spark with Jupyter Notebook
version: '2'
services:
    mast:
      image: seqvence/spark-master
      command: /bin/bash -c "sleep 10 && bin/spark-class org.apache.spark.deploy.master.Master -h mast"
      hostname: mast
      environment:
        MASTER: spark://mast:7077
        SPARK_CONF_DIR: /conf
        SPARK_PUBLIC_DNS: localhost
      size: "m1"
      ports:
        - 4040:4040
        - 6066:6066
        - 7077:7077
        - 8080:8080
        - 7001:7001
        - 7002:7002
        - 7003:7003
        - 7004:7004
        - 7005:7005
        - 7006:7006

    work:
      image: seqvence/spark-worker
      command: /bin/bash -c "sleep 60 && bin/spark-class org.apache.spark.deploy.worker.Worker spark://mast:7077"
      hostname: work
      depends_on:
        - mast
      environment:
        SPARK_CONF_DIR: /conf
        SPARK_WORKER_CORES: 2
        SPARK_WORKER_MEMORY: 512m
        SPARK_WORKER_PORT: 8881
        SPARK_WORKER_WEBUI_PORT: 8081
        SPARK_PUBLIC_DNS: localhost
      size: "m1"
      ports:
        - 8081:8081
        - 7012:7012
        - 7013:7013
        - 7014:7014
        - 7015:7015
        - 7016:7016

    jupyterbook:
      image: seqvence/jupyter-pyspark
      fip: 209.177.93.189
      hostname: jupyter
      environment:
        SPARK_MASTER: mast
      size: M1
      ports:
        - 8888:8888


import findspark
findspark.init()
import pyspark
sc = pyspark.SparkContext(appName="first spark based notebook")

text_file = sc.textFile("/etc/mime.types")

word_counts = text_file \
.flatMap(lambda line: line.split()) \
.map(lambda word: (word, 1)) \
.reduceByKey(lambda a, b: a + b) \
.map(lambda x:(x[1],x[0])) \
.sortByKey(ascending=False)

word_counts.take(10)
	version: '2'
	services:
	mast:
	image: seqvence/spark-master
	command: /bin/bash -c "sleep 10 && bin/spark-class org.apache.spark.deploy.master.Master -h mast"
	hostname: mast
	environment:
	MASTER: spark://mast:7077
	SPARK_CONF_DIR: /conf
	SPARK_PUBLIC_DNS: localhost
	size: "m1"
	ports:
	- 4040:4040
	- 6066:6066
	- 7077:7077
	- 8080:8080
	- 7001:7001
	- 7002:7002
	- 7003:7003
	- 7004:7004
	- 7005:7005
	- 7006:7006

	work:
	image: seqvence/spark-worker
	command: /bin/bash -c "sleep 60 && bin/spark-class org.apache.spark.deploy.worker.Worker spark://mast:7077"
	hostname: work
	depends_on:
	- mast
	environment:
	SPARK_CONF_DIR: /conf
	SPARK_WORKER_CORES: 2
	SPARK_WORKER_MEMORY: 512m
	SPARK_WORKER_PORT: 8881
	SPARK_WORKER_WEBUI_PORT: 8081
	SPARK_PUBLIC_DNS: localhost
	size: "m1"
	ports:
	- 8081:8081
	- 7012:7012
	- 7013:7013
	- 7014:7014
	- 7015:7015
	- 7016:7016

	jupyterbook:
	image: seqvence/jupyter-pyspark
	fip: 209.177.93.189
	hostname: jupyter
	environment:
	SPARK_MASTER: mast
	size: M1
	ports:
	- 8888:8888



	import findspark
	findspark.init()
	import pyspark
	sc = pyspark.SparkContext(appName="first spark based notebook")

	text_file = sc.textFile("/etc/mime.types")

	word_counts = text_file \
	.flatMap(lambda line: line.split()) \
	.map(lambda word: (word, 1)) \
	.reduceByKey(lambda a, b: a + b) \
	.map(lambda x:(x[1],x[0])) \
	.sortByKey(ascending=False)

	word_counts.take(10)