Skip to content

Instantly share code, notes, and snippets.

@maurodoglio
Created September 29, 2016 09:27
Show Gist options
  • Save maurodoglio/15f3192bd6b545686d41f3a025324e2a to your computer and use it in GitHub Desktop.
Save maurodoglio/15f3192bd6b545686d41f3a025324e2a to your computer and use it in GitHub Desktop.
#!/usr/bin/pyspark
import logging
from os import environ
from mozaggregator.aggregator import aggregate_metrics
from mozaggregator.db import submit_aggregates
from pyspark import SparkContext, SparkConf
conf = SparkConf().setAppName('telemetry-aggregates')
sc = SparkContext(conf=conf)
print sc.defaultParallelism
date = environ['date']
print "Running job for {}".format(date)
aggregates = aggregate_metrics(sc, ("nightly", "aurora", "beta", "release"), date)
print "Number of build-id aggregates: {}".format(aggregates[0].count())
print "Number of submission date aggregates: {}".format(aggregates[1].count())
submit_aggregates(aggregates)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment