Skip to content

Instantly share code, notes, and snippets.

@kszucs
Last active June 11, 2016 15:49
Show Gist options
  • Save kszucs/af60654f62ba777bf62d9b2b7fc593ea to your computer and use it in GitHub Desktop.
Save kszucs/af60654f62ba777bf62d9b2b7fc593ea to your computer and use it in GitHub Desktop.
example workflow
import pandas as pd
from dask_mesos import get
from epos import mesos, spark
from epos.utils import MiB, GiB
@mesos
def generate(): # or crawl
return [['a', 1, 120],
['b', 2, 220],
['c', 1, 320],
['d', 3, 420],
['e', 3, 520]]
@mesos(cpus=0.1, mem=128*MiB, disk=3*GiB)
def summarize(data):
df = pd.DataFrame(data, columns=['char', 'i', 'v'])
summary = df.groupby('i').sum()
return summary.to_dict(orient='records')
@mesos(docker='lensa/epos:dev', cpus=1.5, mem=1*GiB)
@spark(master='mesos://master:5050', docker='org/spark-custom-deps',
executor_memory=512*MiB)
def spark_maximum(sc, sqlctx, dcts):
rdd = sc.parallelize(dcts, 2)
return rdd.map(lambda x: x['v']).max()
@mesos(cpus=3.0, mem=512*MiB, uris=['custom-mesos-uri'])
def maximum(dcts):
prices = map(lambda x: x['v'], dcts)
return max(prices)
@mesos(cpus=0.5, mem=128*MiB)
def aggregate(first, second):
return first + second
@mesos(docker='org/custom-image', cpus=0.1, mem=128*MiB)
def add(a, b):
return a + b
def workflow():
data = generate()
summary = summarize(data)
max1 = maximum(summary)
max2 = spark_maximum(summary)
return add(max1, max2)
with MesosExecutor() as executor:
result = workflow()
executor.compute(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment