Skip to content

Instantly share code, notes, and snippets.

@maurodoglio
Last active October 4, 2016 14:28
Show Gist options
  • Save maurodoglio/464e3946c88fb66f30c7878921b04b6a to your computer and use it in GitHub Desktop.
Save maurodoglio/464e3946c88fb66f30c7878921b04b6a to your computer and use it in GitHub Desktop.
from moztelemetry.dataset import Dataset
# Let's start selecting the `telemetry` dataset.
# This will load all the metadata about available dimensions and file locations.
dataset = Dataset.from_source('telemetry')
#The list of dimensions is now available on the `schema` attribute.
assert dataset.schema == [
u'submissionDate',
u'sourceName',
u'sourceVersion',
u'docType',
u'appName',
u'appUpdateChannel',
u'appVersion',
u'appBuildId'
]
# Filter values can be either strings or python functions
dataset = (dataset.where(docType='main')
.where(appName='Firefox')
.where(appUpdateChannel='nightly')
.where(submissionDate=lambda x: x.startswith('2016091')))
# Retrieve 10% of the files
records = dataset.records(sc, sample=.1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment