Skip to content

Instantly share code, notes, and snippets.

@marco-c
Created October 11, 2017 10:57
Show Gist options
  • Save marco-c/c2055ae58a238faa80c93b0f733a6357 to your computer and use it in GitHub Desktop.
Save marco-c/c2055ae58a238faa80c93b0f733a6357 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from datetime import datetime, timedelta\n",
"from pyspark.sql import functions"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"num_days = 14\n",
"days = [datetime.utcnow().date() - timedelta(1) - timedelta(i) for i in range(0, num_days)]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"dataset = SQLContext(sc).read.load(['s3://telemetry-parquet/socorro_crash/v2/crash_date=' + day.strftime('%Y%m%d') for day in days], 'parquet')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"dataset = dataset\\\n",
".filter((dataset['product'] == 'Firefox') & (functions.instr(dataset['signature'], 'onepin-opensc-pkcs11.dll') != 0))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"658"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.count()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[(u'0.15.0.0', 625), (u'0.14.0.0', 32), (u'0.12.5361.1102', 1)]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset\\\n",
".select(functions.explode(dataset['json_dump']['modules']).alias('module')).rdd\\\n",
".filter(lambda v: 'onepin-opensc-pkcs11.dll' in v['module']['filename'].lower())\\\n",
".map(lambda v: (v['module']['version'], 1))\\\n",
".reduceByKey(lambda x, y: x + y)\\\n",
".sortBy(lambda v: v[1], ascending=False)\\\n",
".collect()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment