Skip to content

Instantly share code, notes, and snippets.

@mythmon
Created September 21, 2017 21:48
Show Gist options
  • Save mythmon/749921b919663c5f5931b4abac7cb73f to your computer and use it in GitHub Desktop.
Save mythmon/749921b919663c5f5931b4abac7cb73f to your computer and use it in GitHub Desktop.
uptake 2
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"bucket = \"telemetry-parquet\"\n",
"prefix = \"main_summary/v4\"\n",
"s3path = \"s3://{}/{}\".format(bucket, prefix)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 28 ms, sys: 8 ms, total: 36 ms\n",
"Wall time: 3min 15s\n"
]
}
],
"source": [
"%time df = spark.read.parquet(s3path)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"root\n",
" |-- document_id: string (nullable = true)\n",
" |-- client_id: string (nullable = true)\n",
" |-- channel: string (nullable = true)\n",
" |-- normalized_channel: string (nullable = true)\n",
" |-- country: string (nullable = true)\n",
" |-- city: string (nullable = true)\n",
" |-- os: string (nullable = true)\n",
" |-- os_version: string (nullable = true)\n",
" |-- os_service_pack_major: long (nullable = true)\n",
" |-- os_service_pack_minor: long (nullable = true)\n",
" |-- windows_build_number: long (nullable = true)\n",
" |-- windows_ubr: long (nullable = true)\n",
" |-- install_year: long (nullable = true)\n",
" |-- profile_creation_date: long (nullable = true)\n",
" |-- subsession_start_date: string (nullable = true)\n",
" |-- subsession_length: long (nullable = true)\n",
" |-- distribution_id: string (nullable = true)\n",
" |-- submission_date: string (nullable = true)\n",
" |-- sync_configured: boolean (nullable = true)\n",
" |-- sync_count_desktop: integer (nullable = true)\n",
" |-- sync_count_mobile: integer (nullable = true)\n",
" |-- app_build_id: string (nullable = true)\n",
" |-- app_display_version: string (nullable = true)\n",
" |-- app_name: string (nullable = true)\n",
" |-- app_version: string (nullable = true)\n",
" |-- timestamp: long (nullable = true)\n",
" |-- env_build_id: string (nullable = true)\n",
" |-- env_build_version: string (nullable = true)\n",
" |-- env_build_arch: string (nullable = true)\n",
" |-- e10s_enabled: boolean (nullable = true)\n",
" |-- e10s_cohort: string (nullable = true)\n",
" |-- locale: string (nullable = true)\n",
" |-- active_experiment_id: string (nullable = true)\n",
" |-- active_experiment_branch: string (nullable = true)\n",
" |-- reason: string (nullable = true)\n",
" |-- timezone_offset: integer (nullable = true)\n",
" |-- plugin_hangs: integer (nullable = true)\n",
" |-- aborts_plugin: integer (nullable = true)\n",
" |-- aborts_content: integer (nullable = true)\n",
" |-- aborts_gmplugin: integer (nullable = true)\n",
" |-- crashes_detected_plugin: integer (nullable = true)\n",
" |-- crashes_detected_content: integer (nullable = true)\n",
" |-- crashes_detected_gmplugin: integer (nullable = true)\n",
" |-- crash_submit_attempt_main: integer (nullable = true)\n",
" |-- crash_submit_attempt_content: integer (nullable = true)\n",
" |-- crash_submit_attempt_plugin: integer (nullable = true)\n",
" |-- crash_submit_success_main: integer (nullable = true)\n",
" |-- crash_submit_success_content: integer (nullable = true)\n",
" |-- crash_submit_success_plugin: integer (nullable = true)\n",
" |-- active_addons_count: long (nullable = true)\n",
" |-- flash_version: string (nullable = true)\n",
" |-- vendor: string (nullable = true)\n",
" |-- is_default_browser: boolean (nullable = true)\n",
" |-- default_search_engine_data_name: string (nullable = true)\n",
" |-- default_search_engine: string (nullable = true)\n",
" |-- loop_activity_counter: struct (nullable = true)\n",
" | |-- open_panel: integer (nullable = true)\n",
" | |-- open_conversation: integer (nullable = true)\n",
" | |-- room_open: integer (nullable = true)\n",
" | |-- room_share: integer (nullable = true)\n",
" | |-- room_delete: integer (nullable = true)\n",
" |-- devtools_toolbox_opened_count: integer (nullable = true)\n",
" |-- client_submission_date: string (nullable = true)\n",
" |-- places_bookmarks_count: integer (nullable = true)\n",
" |-- places_pages_count: integer (nullable = true)\n",
" |-- push_api_notification_received: integer (nullable = true)\n",
" |-- web_notification_shown: integer (nullable = true)\n",
" |-- popup_notification_stats: map (nullable = true)\n",
" | |-- key: string\n",
" | |-- value: struct (valueContainsNull = true)\n",
" | | |-- offered: integer (nullable = true)\n",
" | | |-- action_1: integer (nullable = true)\n",
" | | |-- action_2: integer (nullable = true)\n",
" | | |-- action_3: integer (nullable = true)\n",
" | | |-- action_last: integer (nullable = true)\n",
" | | |-- dismissal_click_elsewhere: integer (nullable = true)\n",
" | | |-- dismissal_leave_page: integer (nullable = true)\n",
" | | |-- dismissal_close_button: integer (nullable = true)\n",
" | | |-- dismissal_not_now: integer (nullable = true)\n",
" | | |-- open_submenu: integer (nullable = true)\n",
" | | |-- learn_more: integer (nullable = true)\n",
" | | |-- reopen_offered: integer (nullable = true)\n",
" | | |-- reopen_action_1: integer (nullable = true)\n",
" | | |-- reopen_action_2: integer (nullable = true)\n",
" | | |-- reopen_action_3: integer (nullable = true)\n",
" | | |-- reopen_action_last: integer (nullable = true)\n",
" | | |-- reopen_dismissal_click_elsewhere: integer (nullable = true)\n",
" | | |-- reopen_dismissal_leave_page: integer (nullable = true)\n",
" | | |-- reopen_dismissal_close_button: integer (nullable = true)\n",
" | | |-- reopen_dismissal_not_now: integer (nullable = true)\n",
" | | |-- reopen_open_submenu: integer (nullable = true)\n",
" | | |-- reopen_learn_more: integer (nullable = true)\n",
" |-- search_counts: array (nullable = true)\n",
" | |-- element: struct (containsNull = true)\n",
" | | |-- engine: string (nullable = true)\n",
" | | |-- source: string (nullable = true)\n",
" | | |-- count: long (nullable = true)\n",
" |-- submission_date_s3: string (nullable = true)\n",
" |-- sample_id: string (nullable = true)\n",
"\n"
]
}
],
"source": [
"df.printSchema()"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
# coding: utf-8
# In[1]:
bucket = "telemetry-parquet"
prefix = "main_summary/v4"
s3path = "s3://{}/{}".format(bucket, prefix)
# In[2]:
get_ipython().magic(u'time df = spark.read.parquet(s3path)')
# In[3]:
df.printSchema()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment