Skip to content

Instantly share code, notes, and snippets.

@vitillo
Last active October 23, 2015 16:36
Show Gist options
  • Save vitillo/0bce456f1805ffa5e60e to your computer and use it in GitHub Desktop.
Save vitillo/0bce456f1805ffa5e60e to your computer and use it in GitHub Desktop.
Addon analysis
Display the source blob
Display the rendered blob
Raw
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "import datetime as dt\nimport operator\nimport pandas as pd\nimport ujson as json\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client\nfrom __future__ import division\n\n%pylab inline", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Populating the interactive namespace from numpy and matplotlib\n"}], "metadata": {"scrolled": true, "collapsed": false, "trusted": true}}, {"source": "### Prepare dataset", "cell_type": "markdown", "metadata": {}}, {"execution_count": 17, "cell_type": "code", "source": "yesterday = (dt.datetime.now() - dt.timedelta(days=1)).strftime(\"%Y%m%d\")", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "pings = get_pings(sc, app=\"Firefox\", channel=\"release\", submission_date=yesterday, fraction=1, schema=\"v4\")", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "subset = get_pings_properties(pings, [\"clientId\",\n \"environment/system/os/name\",\n \"environment/system/os/version\",\n \"environment/system/cpu/count\",\n \"environment/system/memoryMB\",\n \"environment/settings/telemetryEnabled\",\n \"payload/simpleMeasurements/firstPaint\",\n \"payload/simpleMeasurements/AMI_startup_begin\",\n \"payload/simpleMeasurements/shutdownDuration\",\n \"payload/addonDetails\"]).\\\n filter(lambda p: p[\"environment/settings/telemetryEnabled\"])", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 5, "cell_type": "code", "source": "def add_startup(ping):\n AMI_startup = ping.pop(\"payload/simpleMeasurements/AMI_startup_begin\")\n firstPaint = ping.pop(\"payload/simpleMeasurements/firstPaint\")\n ping[\"startup\"] = firstPaint - AMI_startup if firstPaint and AMI_startup else None\n return ping\n \nsubset = subset.map(add_startup)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 6, "cell_type": "code", "source": "def filter_outliers(ping):\n startup = ping[\"startup\"]\n shutdown = ping[\"payload/simpleMeasurements/shutdownDuration\"]\n os = ping[\"environment/system/os/name\"]\n version = ping[\"environment/system/os/version\"]\n cpucount = ping[\"environment/system/cpu/count\"]\n memsize = int(round(ping[\"environment/system/memoryMB\"] / 1000.0))\n \n # Let's remove machines with older configurations or with suspect startup times\n if not startup or not version.startswith(\"6\") or os != \"Windows_NT\" \\\n or cpucount < 2 or memsize < 2 or startup > 60000 or startup <= 0 \\\n or not shutdown or shutdown <= 0:\n return False\n \n return True\n\nfiltered = get_one_ping_per_client(subset.filter(filter_outliers))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 7, "cell_type": "code", "source": "def clean(s):\n try:\n s = s.decode('ascii').strip()\n return s if len(s) > 0 else None\n except:\n return None\n\ndef extract_addon_names(ping):\n addons = ping[\"payload/addonDetails\"].get(\"XPI\", {})\n addon_names = set()\n \n for addon, desc in addons.iteritems():\n name = clean(desc.get(\"name\", None))\n if name is not None:\n addon_names.add(name)\n \n return addon_names\n\naddon_counts = pd.Series(filtered.flatMap(extract_addon_names).countByValue())\naddon_counts = (addon_counts/addon_counts.sum()).to_dict()\ndel addon_counts[\"Default\"]", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 8, "cell_type": "code", "source": "top_addons = pd.DataFrame(sorted(addon_counts.items(), key=operator.itemgetter(1), reverse=True)[:250], columns=[\"addon\", \"freq\"])\ntop_addons.to_csv(\"addons.csv\", index=False)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 9, "cell_type": "code", "source": "def vectorize(ping):\n startup = ping[\"startup\"]\n shutdown = ping[\"payload/simpleMeasurements/shutdownDuration\"]\n cpucount = ping[\"environment/system/cpu/count\"]\n memsize = int(round(ping[\"environment/system/memoryMB\"] / 1000.0))\n \n addon_names = extract_addon_names(ping)\n addons = map(lambda x: 1 if x in addon_names else 0, top_addons[\"addon\"])\n return list(addons) + [startup, shutdown, cpucount, memsize]", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 10, "cell_type": "code", "source": "vectorized = pd.DataFrame(filtered.map(vectorize).collect(), columns = list(top_addons[\"addon\"]) +\n [\"startup\", \"shutdown\", \"cpucount\", \"memsize\"])\nvectorized.to_csv(\"vectorized.csv\", index=False)", "outputs": [], "metadata": {"scrolled": true, "collapsed": false, "trusted": true}}, {"source": "### Fit model", "cell_type": "markdown", "metadata": {}}, {"execution_count": 11, "cell_type": "code", "source": "!mkdir -p ./output", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 12, "cell_type": "code", "source": "%load_ext rpy2.ipython", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 13, "cell_type": "code", "source": "%%R -i yesterday\n\nis.installed <- function(mypkg){\n is.element(mypkg, installed.packages()[,1])\n}\n\nif (!is.installed(\"dplyr\"))\n install.packages(\"dplyr\", repos=\"http://cran.rstudio.com/\", quiet=TRUE)\n\nif (!is.installed(\"caret\"))\n install.packages(\"caret\", repos=\"http://cran.rstudio.com/\", quiet=TRUE)\n\nlibrary(caret)\nlibrary(plyr)\nlibrary(dplyr)\n\nselect <- dplyr::select\n \naddon_plot <- function(df) {\n ggplot(df, aes(factor(addon, levels=rev(unique(addon))), Estimate)) +\n geom_point() +\n geom_errorbar(width=.1, aes(ymin=Estimate-Error, ymax=Estimate+Error)) +\n coord_flip() +\n scale_y_continuous(name=\"Startup time overhead in ms\") + scale_x_discrete(name =\"Add-on\") +\n theme_bw()\n}\n\nextract <- function(model) {\n coefs <- data.frame(coef(summary(model)))\n coefs %>%\n mutate(addon = gsub(\"`\", \"\", row.names(coefs))) %>%\n select(Estimate, Error=Std..Error, t=t.value, Pr=Pr...t.., addon) %>%\n arrange(-Estimate) %>% filter(Estimate > 0, Pr < 0.01)\n}\n\nextract_log <- function(model) {\n coefs <- data.frame(coef(summary(model)))\n coefs %>%\n mutate(addon = gsub(\"`\", \"\", row.names(coefs))) %>%\n select(Estimate, Error=Std..Error, t=t.value, Pr=Pr...t.., addon) %>%\n arrange(-Estimate) %>% filter(Estimate > 0, addon != \"(Intercept)\", Pr < 0.01) %>%\n mutate(Estimate = (exp(Estimate) - 1)*100)\n}\n\npredict_metric <- function(df, freq, metric, prefix, log.transform=c(FALSE, TRUE)) {\n if (log.transform)\n df[[metric]] <- log(df[[metric]])\n\n # Partition the dataset into training and test set\n set.seed(42)\n data_partition <- createDataPartition(y = df[[metric]], p = 0.80, list = F)\n training <- df[data_partition,]\n testing <- df[-data_partition,]\n\n # Create model\n model <- lm(as.formula(paste(metric, \"~.\")), data=training)\n\n # Evaluate model\n prediction_train <- predict(model, training)\n cat(\"R2 on training set: \", R2(prediction_train, training[[metric]]), \"\\n\")\n cat(\"RMSE on training set: \", RMSE(prediction_train, training[[metric]]), \"\\n\")\n\n prediction_test <- predict(model, testing)\n cat(\"R2 on test set: \", R2(prediction_test, testing[[metric]]), \"\\n\")\n cat(\"RMSE on test set: \", RMSE(prediction_test, testing[[metric]]), \"\\n\")\n\n # Retrain on whole dataset\n model <- lm(as.formula(paste(metric, \"~.\")), data=df)\n\n # Pretty print results\n if (log.transform)\n result <- extract_log(model)\n else\n result <- extract(model)\n\n # addon_plot(result)\n result <- data.frame(lapply(result, function(x){sapply(x, toString)}))\n result <- left_join(result, freq) %>% select(-Pr)\n result <- result[, c(\"addon\", \"freq\", \"Estimate\", \"Error\", \"t\")]\n\n base <- basename(prefix)\n path <- dirname(prefix)\n write.csv(result, file=paste(path, \"/\", metric, \"_\", base, \".csv\", sep=\"\"), row.names=FALSE, quote=FALSE)\n return(result)\n}\n \nargs <- commandArgs(trailingOnly = TRUE)\naddons <- read.csv(\"vectorized.csv\", check.names=F) %>% select(-cpucount, -memsize)\naddons_freq <- read.csv(\"addons.csv\", col.names = c(\"addon\", \"freq\"))\n \n# Remove linear combinations\ncmbs <- findLinearCombos(addons)$remove\nif (!is.null(cmbs))\n addons <- addons[, -cmbs]\n \n# Predict!\npredict_metric(addons %>% select(-shutdown), addons_freq, \"startup\", paste(\"./output/addon_summary_\", yesterday, sep=\"\"))\n#predict_metric(addons %>% select(-startup), addons_freq, \"shutdown\", paste(\"./output/addon_summary_\", yesterday, sep=\"\"), TRUE)", "outputs": [{"output_type": "stream", "name": "stderr", "text": "/home/hadoop/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: also installing the dependencies \u2018assertthat\u2019, \u2018R6\u2019, \u2018Rcpp\u2019, \u2018magrittr\u2019, \u2018lazyeval\u2019, \u2018DBI\u2019, \u2018BH\u2019\n\n\n res = super(Function, self).__call__(*new_args, **new_kwargs)\n/home/hadoop/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: also installing the dependencies \u2018colorspace\u2019, \u2018minqa\u2019, \u2018nloptr\u2019, \u2018RcppEigen\u2019, \u2018RColorBrewer\u2019, \u2018dichromat\u2019, \u2018munsell\u2019, \u2018labeling\u2019, \u2018lme4\u2019, \u2018SparseM\u2019, \u2018MatrixModels\u2019, \u2018stringi\u2019, \u2018digest\u2019, \u2018gtable\u2019, \u2018scales\u2019, \u2018proto\u2019, \u2018pbkrtest\u2019, \u2018quantreg\u2019, \u2018iterators\u2019, \u2018stringr\u2019, \u2018ggplot2\u2019, \u2018car\u2019, \u2018foreach\u2019, \u2018plyr\u2019, \u2018reshape2\u2019\n\n\n res = super(Function, self).__call__(*new_args, **new_kwargs)\n/home/hadoop/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: lattice\n\n res = super(Function, self).__call__(*new_args, **new_kwargs)\n/home/hadoop/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: ggplot2\n\n res = super(Function, self).__call__(*new_args, **new_kwargs)\n/home/hadoop/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: \nAttaching package: \u2018dplyr\u2019\n\n\n res = super(Function, self).__call__(*new_args, **new_kwargs)\n/home/hadoop/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from \u2018package:plyr\u2019:\n\n arrange, count, desc, failwith, id, mutate, rename, summarise,\n summarize\n\n\n res = super(Function, self).__call__(*new_args, **new_kwargs)\n/home/hadoop/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from \u2018package:stats\u2019:\n\n filter, lag\n\n\n res = super(Function, self).__call__(*new_args, **new_kwargs)\n/home/hadoop/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from \u2018package:base\u2019:\n\n intersect, setdiff, setequal, union\n\n\n res = super(Function, self).__call__(*new_args, **new_kwargs)\n/home/hadoop/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Joining by: \"addon\"\n\n res = super(Function, self).__call__(*new_args, **new_kwargs)\n"}, {"output_type": "display_data", "data": {"text/plain": "R2 on training set: 0.03146126 \nRMSE on training set: 6744.175 \nR2 on test set: 0.02986745 \nRMSE on test set: 6779.447 \n addon freq\n1 GBBD Banco do Brasil 0.0011268326\n2 Yandex Elements 0.0124129975\n3 (Intercept) NA\n4 GBBD Caixa Economica Federal 0.0011669704\n5 Zotero 0.0005485504\n6 McAfee SiteAdvisor 0.0008622202\n7 Avast Online Security 0.0210456055\n8 Mozilla Firefox Hotfixer 0.0005351712\n9 AVG Security Toolbar 0.0026580167\n10 Hola Better Internet 0.0010599362\n11 avast! Online Security 0.0008919519\n12 Avira SearchFree Toolbar plus Web Protection 0.0012026485\n13 Yahoo! Toolbar 0.0049473600\n14 ADB Helper 0.0039766190\n15 FF Toolbar 0.0005054394\n16 AVG SafeGuard toolbar 0.0015178048\n17 Fast Start 0.0014999658\n18 Babylon 0.0007477530\n19 deskCut 0.0017675514\n20 Norton Identity Safe Toolbar 0.0011416985\n21 McAfee WebAdvisor 0.0049295209\n22 Firefox Migration 0.0022596115\n23 Easy Access 0.0009172239\n24 FastestFox 0.0006734237\n25 COBA 0.0023056957\n26 Ads Removal 0.0018151222\n27 Firefox HTTP authentication from sub-resources Hotfix 0.0067847809\n28 Speed Dial [FVD] - New Tab Page, Sync... 0.0007105884\n29 cacaoweb 0.0005693626\n30 Flash Video Downloader - YouTube HD Download [4K] 0.0030980463\n31 HTTPS-Everywhere 0.0010554764\n32 HTML5 location provider 0.0010004727\n33 IE Tab 0.0007670786\n34 Trend Micro Osprey Firefox Extension 0.0032377855\n35 LastPass 0.0025123312\n36 Free Download Manager extension 0.0005589565\n37 Delta Toolbar 0.0007477530\n38 Awesome screenshot: Capture and Annotate 0.0006109871\n39 X-notifier 0.0006838298\n40 Ant Video Downloader 0.0011892692\n41 ColorZilla 0.0007566725\n42 Norton Vulnerability Protection 0.0014761804\n43 TelevisionFanatic 0.0009395227\n44 Trend Micro NSC Firefox Extension 0.0021332517\n45 Amazon 1Button App for Firefox 0.0006972091\n46 AddThis 0.0005069260\n47 Default Tab 0.0006436920\n48 Webmail Ad Blocker 0.0005455773\n49 Avira Browser Safety 0.0058155265\n50 Default Manager 0.0021763627\n51 FoxyProxy Standard 0.0008057299\n52 MEGA 0.0058764766\n53 Cliqz 0.0013319815\n54 AVG Safe Search 0.0005113858\n55 Ask Toolbar 0.0043348863\n56 FEBE 0.0007001823\n57 Allin1Convert 0.0008934385\n58 Default SearchProtected 0.0025257105\n59 Xmarks 0.0017690380\n60 AVG Web TuneUp 0.0074061741\n61 DivX Plus Web Player HTML5 <video> 0.0018493136\n62 Skype Click to Call 0.0307812607\n63 Hotspot Shield Extension 0.0006496383\n64 Personas Plus 0.0021704163\n65 Adobe Contribute Toolbar 0.0007893774\n66 iMacros for Firefox 0.0005500370\n67 HP Detect 0.0009960130\n68 The Addon Bar (restored) 0.0006065273\n69 Internet Speed Tracker 0.0013215754\n70 Search Enginer 0.0009187105\n71 Quick Translator 0.0007819445\n72 GamingWonderland 0.0007046420\n73 VideoDownloadConverter 0.0021332517\n74 SweetPacks Toolbar for Firefox 0.0009677678\n75 Easy YouTube Video Downloader 0.0011089936\n76 Evernote Web Clipper 0.0017199806\n77 NetVideoHunter 0.0008755995\n78 GMX MailCheck 0.0008087031\n79 IDS_SS_NAME 0.0010197984\n80 Pin It button 0.0007224811\n81 FromDocToPDF 0.0013572535\n82 New Tab by Yahoo 0.0052863017\n83 IDM CC 0.0059537791\n84 YouTube Flash Video Player 0.0008770861\n85 HP Smart Web Printing 0.0087604544\n86 Flash and Video Download 0.0017734977\n87 WOT 0.0045935524\n88 anonymoX 0.0012621120\n89 SaveFrom.net - helper 0.0025138178\n90 All-in-One Sidebar 0.0012442729\n91 MapsGalaxy 0.0017913368\n92 McAfee Security Scan Plus 0.0038012018\n93 Test Pilot 0.0013899584\n94 Tab Mix Plus 0.0037699834\n95 RealPlayer Browser Record Plugin 0.0029285755\n96 DVDVideoSoft YouTube MP3 and Video Download 0.0022031212\n97 DigitalPersona Extension 0.0022848835\n98 Lightbeam 0.0025777410\n99 Greasemonkey 0.0035574016\n100 Element Hiding Helper for Adblock Plus 0.0030118243\n101 Norton Toolbar 0.0245405704\n102 Microsoft .NET Framework Assistant 0.0126463916\n103 Adblock Plus 0.0476525312\n104 Firebug 0.0043556985\n Estimate Error t\n1 5198.19306632295 258.710916603648 20.0926699752941\n2 4525.02638993824 81.2707797400891 55.6783927065751\n3 4223.77029414349 8.95682840185006 471.569857615119\n4 3574.99974335765 253.856494642088 14.0827586404596\n5 3378.79077523889 356.107445029463 9.48812169585316\n6 2633.88639013501 281.918299816394 9.34272940724455\n7 2263.76085522341 60.4922888821164 37.4223045128096\n8 2199.918770853 356.394926017217 6.17269946976385\n9 2113.52138296086 162.5527407246 13.0020655052604\n10 2059.27682830884 252.857051990645 8.14403558096146\n11 1947.77297725708 277.210417062417 7.02633399530046\n12 1868.98523394015 240.280444256978 7.77834933558425\n13 1855.69313168778 121.317403957583 15.2961823378334\n14 1834.01917991624 159.687423822249 11.4850570947761\n15 1780.55815146327 391.511606341745 4.54790642888132\n16 1751.5762123658 217.603676562856 8.04938703257548\n17 1729.22892547952 234.549479138325 7.37255495869042\n18 1699.96187228839 309.825753130467 5.4868320503123\n19 1656.38483300528 280.257952364752 5.91021528213238\n20 1628.18494704979 245.174583366229 6.64092062356109\n21 1611.17047999205 119.629029895115 13.4680560513167\n22 1586.24674508879 456.349667459175 3.47594587703013\n23 1565.02569162606 355.196984376321 4.40607820579908\n24 1556.89149464482 327.908150480585 4.74794997429318\n25 1547.9103539542 559.456220855414 2.76681230139408\n26 1530.92064283776 230.001604859932 6.65613026383043\n27 1452.19941790247 100.123534059995 14.5040767042072\n28 1440.01923437281 315.581234775233 4.56306990305819\n29 1403.05302796202 345.694986514956 4.0586444197718\n30 1388.83085931628 155.789390213071 8.91479745454292\n31 1371.16489404857 265.655713886103 5.16143573195059\n32 1360.02538218299 265.661056941613 5.11940062965983\n33 1347.60709449364 305.942666954064 4.40477004371533\n34 1321.07679891872 318.614623847973 4.14631564290369\n35 1314.50269406541 172.877227840725 7.60367753742843\n36 1311.03476447768 348.44423360284 3.76253827168228\n37 1290.53023261412 302.913017761225 4.26039871825975\n38 1290.03520353402 334.428008428804 3.85743768769491\n39 1266.40786511728 315.387716994278 4.01540008338451\n40 1239.04729353341 242.538553206542 5.10866118871524\n41 1225.24286847269 323.703590643809 3.78507654498311\n42 1221.00420266743 219.214410400447 5.56990847653208\n43 1211.18120790548 272.502594173683 4.44465936765917\n44 1195.96263105821 198.922300644444 6.01220992912146\n45 1187.63321096766 316.996190580794 3.74652202851934\n46 1172.89948620081 369.026113594659 3.17836446525606\n47 1149.79177633696 325.707805662113 3.53013270283656\n48 1142.93674018186 357.939985187141 3.19309601464139\n49 1132.07493005838 118.090597502411 9.58649506397215\n50 1070.63324272837 204.695119696133 5.23037991485926\n51 1069.15159187217 303.312103014582 3.52492228712934\n52 1063.36634564025 108.294677839447 9.81919302827372\n53 1044.66627625333 226.894649304332 4.60419088531317\n54 1040.98517625046 366.678303051384 2.83896038458699\n55 1033.97905012034 128.074465654643 8.07326460302006\n56 1028.00892024698 330.400473298153 3.11140268651886\n57 1026.51714006649 280.887736162768 3.65454595522692\n58 1002.51816066255 298.338391565556 3.36033909481698\n59 981.88753132297 205.213570972486 4.78471051729136\n60 977.013971536993 97.8766606086591 9.98209343740685\n61 974.216806331756 193.815604867011 5.02651376807471\n62 965.578909304335 53.3508939352507 18.098645366209\n63 955.301004163789 325.572436891899 2.9342195343182\n64 949.853853034841 184.274949492143 5.15454680982203\n65 947.392737011084 301.187801272572 3.14552160814011\n66 943.311353070609 351.227345300686 2.68575714759065\n67 943.112023956943 265.079168237759 3.55785039702189\n68 928.567800644442 341.875424535889 2.71609988318117\n69 928.349673702956 233.518760819054 3.97548218587159\n70 912.124951434166 303.812575771691 3.00226200023928\n71 890.265576989486 296.360720262994 3.00399316144006\n72 886.532987476332 312.864322266871 2.83360205808359\n73 874.908668402685 183.681030326842 4.76319556159868\n74 872.359985189911 268.351625935092 3.25080938917401\n75 856.496376594174 250.591230797853 3.41790242965482\n76 841.056470728551 217.236307773303 3.87162016952633\n77 826.78979285069 285.287993725845 2.89808828634133\n78 810.68227433974 294.212558058052 2.75543056248396\n79 809.976692334162 259.704802151146 3.11883602315048\n80 805.38216183741 311.133292403762 2.58854382189436\n81 784.153771707016 228.062084388497 3.43833467018233\n82 781.87440982002 114.24314118381 6.84395055771471\n83 774.085049744269 111.144106728831 6.96469720731911\n84 773.976554387478 282.091929950576 2.7437032832633\n85 771.364970556897 91.3543405541666 8.44365977443112\n86 736.725459254005 203.698973470165 3.61673623928159\n87 729.424255969954 134.263899847582 5.43276529877358\n88 723.334881032826 236.350397927232 3.06043436937867\n89 714.852442333704 167.650615026407 4.26394166356685\n90 697.632893984484 246.831313534354 2.8263549060901\n91 691.598019680579 198.752751757562 3.47969028637244\n92 668.540901678556 136.420589195829 4.90058652890641\n93 609.500046720969 225.027359412491 2.70855974274539\n94 606.725767448544 160.206820386447 3.78714068467881\n95 552.406324386948 154.888916033375 3.56646775336665\n96 537.288825750606 178.362034717414 3.01234972230416\n97 532.661469325024 174.494403305806 3.05259916211479\n98 492.997903618377 166.789853730256 2.9558027217632\n99 455.261077942172 146.908470365429 3.09894369473542\n100 445.328685281018 171.062973931244 2.60330260281814\n101 443.62716170584 57.8676652746246 7.66623570521643\n102 433.220501320276 78.4057722884756 5.52536488928829\n103 411.66092965376 45.91246865492 8.96621204901485\n104 391.31172413141 143.571421761375 2.72555442671451\n"}, "metadata": {}}], "metadata": {"scrolled": false, "collapsed": false, "trusted": true}}, {"execution_count": 14, "cell_type": "code", "source": "%%R -i yesterday\n\npredict_metric(addons %>% select(-startup), addons_freq, \"shutdown\", paste(\"./output/addon_summary_\", yesterday, sep=\"\"), TRUE)", "outputs": [{"output_type": "display_data", "data": {"text/plain": "R2 on training set: 0.01992712 \nRMSE on training set: 0.9449842 \nR2 on test set: 0.01867965 \nRMSE on test set: 0.9454737 \n addon freq\n1 Firefox Migration 0.0022596115\n2 Yandex Elements 0.0124129975\n3 Session Manager 0.0011149399\n4 Zotero 0.0005485504\n5 Firefox HTTP authentication from sub-resources Hotfix 0.0067847809\n6 IDS_SS_NAME 0.0010197984\n7 Hola Better Internet 0.0010599362\n8 Microsoft .NET Framework Assistant 0.0126463916\n9 Default Tab 0.0006436920\n10 Mozilla Firefox Hotfixer 0.0005351712\n11 SaveFrom.net - helper 0.0025138178\n12 AVG SafeGuard toolbar 0.0015178048\n13 Free Download Manager extension 0.0005589565\n14 ADB Helper 0.0039766190\n15 Yontoo 0.0005158455\n16 Fast Start 0.0014999658\n17 deskCut 0.0017675514\n18 Default SearchProtected 0.0025257105\n19 Awesome screenshot: Capture and Annotate 0.0006109871\n20 McAfee WebAdvisor 0.0049295209\n21 Yahoo! Toolbar 0.0049473600\n22 McAfee ScriptScan for Firefox 0.0012650851\n23 GamingWonderland 0.0007046420\n24 Move Media Player 0.0005217919\n25 avast! Online Security 0.0008919519\n26 GBBD Banco do Brasil 0.0011268326\n27 DuckDuckGo Plus 0.0005827419\n28 IDM CC 0.0059537791\n29 Evernote Web Clipper 0.0017199806\n30 ZenMate Security, Privacy & Unblock VPN 0.0005649029\n31 YouTube Unblocker 0.0007269408\n32 anonymoX 0.0012621120\n33 McAfee Security Scan Plus 0.0038012018\n34 Pin It Button 0.0008205958\n35 Webroot Filtering Extension 0.0007670786\n36 Webmail Ad Blocker 0.0005455773\n37 Pin It button 0.0007224811\n38 McAfee SiteAdvisor 0.0008622202\n39 Print Edit 0.0006526115\n40 iMacros for Firefox 0.0005500370\n41 Firebug 0.0043556985\n42 Amazon 1Button App for Firefox 0.0006972091\n43 Norton Vulnerability Protection 0.0014761804\n44 cacaoweb 0.0005693626\n45 British English Dictionary 0.0006422054\n46 Youdao Word Capturer 0.0005842285\n47 VideoDownloadConverter 0.0021332517\n48 DivX Plus Web Player HTML5 <video> 0.0018493136\n49 United States English Spellchecker 0.0012799510\n50 YouTube Flash Video Player 0.0008770861\n51 Skype Click to Call 0.0307812607\n52 FoxyProxy Standard 0.0008057299\n53 TelevisionFanatic 0.0009395227\n54 Memory Fox 0.0007076152\n55 Default Manager 0.0021763627\n56 Cliqz 0.0013319815\n57 New Tab by Yahoo 0.0052863017\n58 Allin1Convert 0.0008934385\n59 MEGA 0.0058764766\n60 RealPlayer Browser Record Plugin 0.0029285755\n61 SweetPacks Toolbar for Firefox 0.0009677678\n62 AVG Security Toolbar 0.0026580167\n63 Google Toolbar for Firefox 0.0009454690\n64 iLivid 0.0013572535\n65 Adblock Plus 0.0476525312\n66 HP Detect 0.0009960130\n67 Delta Toolbar 0.0007477530\n68 YouTube mp3 0.0012531924\n69 Lightbeam 0.0025777410\n70 LastPass 0.0025123312\n71 Freemake Video Converter Plugin 0.0013706328\n72 YouTube Video and Audio Downloader 0.0017660648\n73 GBBD Caixa Economica Federal 0.0011669704\n74 Add to Amazon Wish List Button 0.0010881813\n75 FlashGot 0.0031798086\n76 Trend Micro NSC Firefox Extension 0.0021332517\n77 Flash and Video Download 0.0017734977\n78 Avast Online Security 0.0210456055\n79 IDM integration 0.0020708150\n80 HP Smart Web Printing 0.0087604544\n81 AVG Web TuneUp 0.0074061741\n82 Avira Browser Safety 0.0058155265\n83 Tab Mix Plus 0.0037699834\n84 iCloud Bookmarks 0.0021228456\n85 NoScript 0.0053977957\n86 Greasemonkey 0.0035574016\n87 Ghostery 0.0037313322\n88 Video DownloadHelper 0.0119521557\n89 Ask Toolbar 0.0043348863\n Estimate Error t\n1 52.8970782081247 0.0638850594943054 6.64623029166333\n2 44.6446909322648 0.0113772156946048 32.4429238062338\n3 44.2887666490671 0.036218200834254 10.1232645773754\n4 42.7377212703574 0.0498520037030762 7.137900521485\n5 39.8703973042053 0.0140164404322155 23.9394642274993\n6 34.3010905920214 0.0363564562865975 8.11173772683955\n7 33.7151852636914 0.0353978296947531 8.20790063448513\n8 32.7825720138074 0.0109761390979743 25.8326543673939\n9 30.0707699840646 0.0455963135863252 5.76600343663883\n10 29.1490149564601 0.049892248588338 5.12698292088568\n11 27.0448597152489 0.0234696555710256 10.1991298287599\n12 26.4422493707686 0.0304626579456011 7.70174065720434\n13 25.8526436435024 0.0487792194921595 4.71392416835837\n14 25.5407538734435 0.0223548767509279 10.1749723097701\n15 25.4350494352657 0.0515209713649221 4.39855651461078\n16 25.2117320850907 0.0328349257106685 6.84746410023328\n17 24.3892484037625 0.0392337219401527 5.56270351541794\n18 24.3109377291294 0.0417648291511158 5.21050385415676\n19 23.0470475306428 0.0468170675657336 4.4299356470306\n20 22.5249899545424 0.0167470434122299 12.130190256279\n21 21.5946892987813 0.0169834013743819 11.5126001411844\n22 20.9446054609129 0.0326083216769026 5.83171530750627\n23 20.7895724185858 0.0437983355021348 4.31248750855714\n24 20.7410166756681 0.0511099899300469 3.68768821857159\n25 20.4685847535092 0.0388070930018987 4.79857704115218\n26 20.0233659358906 0.0362173207905952 5.03947420702214\n27 19.8091938261705 0.048711993161874 3.71017952953323\n28 19.7313065731785 0.0155592265692799 11.5738358550534\n29 19.4021241180111 0.0304112294497558 5.83096467830197\n30 19.2700629666144 0.048892246001785 3.60425603129983\n31 19.1537970723427 0.0446577630144861 3.92417517628989\n32 18.319242989159 0.0330870389741979 5.08405223355136\n33 18.3169499614749 0.0190977184349618 8.80717006992221\n34 18.2908162333086 0.0410486148503559 4.09212226603114\n35 17.7976622312296 0.0417900919258812 3.91954724787986\n36 17.7549848220733 0.0501085436884141 3.26163699695766\n37 16.9037050827453 0.0435560060918674 3.58573685845843\n38 16.5078970140627 0.0394661564159351 3.87138966963281\n39 16.1514654574618 0.0468517584829337 3.19571548509213\n40 15.8758747316301 0.0491688313820644 2.9968047369073\n41 15.7773052733791 0.0200987739773646 7.28892110601469\n42 15.5919542489609 0.0443767618095917 3.26513612072967\n43 15.393286934281 0.0306881469387606 4.66551449108166\n44 15.2685215190683 0.0483943483586888 2.93617323343748\n45 15.2058914156053 0.0462126528090656 3.06302912995182\n46 15.1811049265802 0.0519965101683669 2.71817336642007\n47 14.9667734450998 0.0257137769284238 5.42405631236988\n48 14.9574706752091 0.027132530887538 5.13745121444781\n49 14.8807434151637 0.0327725527243765 4.23294430830146\n50 14.316015642561 0.0394904631531658 3.38807103465526\n51 13.4033812806645 0.00746866991731244 16.8411542541574\n52 13.3731490854892 0.0424611063141904 2.95598507934852\n53 13.2684596229648 0.0381480379684854 3.26597567280867\n54 13.2049957645267 0.0480235647631176 2.58269271792826\n55 13.0251028199872 0.0286555701306637 4.27280826533865\n56 12.7612950161676 0.0317633148512014 3.78118485009777\n57 12.5779427717366 0.015993064949471 7.40793717628012\n58 12.4842720832335 0.0393218862980426 2.99180006998289\n59 12.4738765431355 0.0151603308384325 7.75384135340586\n60 12.462516725797 0.021683126605293 5.41664482325118\n61 12.3968247595025 0.0375669378345513 3.11086046237105\n62 12.3957191274188 0.022755996668021 5.13515915808827\n63 12.3755427025009 0.0377191620407534 3.0932854779273\n64 12.2744757654556 0.0318923324802556 3.63022566146478\n65 12.2327707973898 0.00642735384881681 17.9552646411777\n66 12.0934015116921 0.03710882168022 3.07641888559482\n67 12.0487707310989 0.0424052377840434 2.68278282179394\n68 12.0106525165365 0.0331752765867102 3.41892530247642\n69 11.6648135150001 0.0233491563342865 4.72528686105408\n70 11.4479261912589 0.0242013367672791 4.4785652622086\n71 11.3524845218837 0.0320607475333979 3.35396172777758\n72 10.7832111795887 0.028637313185659 3.57593090281894\n73 10.5940715337982 0.0355377431378899 2.83350291840987\n74 10.3702752693653 0.0357069598783361 2.76334547072773\n75 10.0019439538794 0.0226436279398692 4.2099195504056\n76 9.65593085477849 0.0278474247218583 3.31008620353917\n77 9.49590893100223 0.0285161181589655 3.18125351789087\n78 9.485636498671 0.00846840427362959 10.7013291061864\n79 9.38296353604482 0.0258533312950957 3.46899068781241\n80 8.61268179025894 0.0127888281673572 6.46016889471642\n81 7.97527363827379 0.0137018973211973 5.60010525046872\n82 7.84462732391125 0.0165316760044194 4.56828269535666\n83 7.84351812946449 0.0224275878379977 3.3668838928776\n84 7.07736434419792 0.0252985046228263 2.70298262629841\n85 6.68494820674623 0.0174756578295666 3.70285894364699\n86 6.40306029901905 0.0205659323074942 3.01781372158061\n87 5.95107022023282 0.0203219272752082 2.84457271568536\n88 5.84537279684381 0.0115962493190716 4.89891984500705\n89 5.2658651309845 0.017929332355173 2.86229357482882\n"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 15, "cell_type": "code", "source": "!gzip ./output/{startup*,shutdown*}.csv", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"source": "### Dashboard configuration", "cell_type": "markdown", "metadata": {}}, {"execution_count": 16, "cell_type": "code", "source": "startup_config = {\n \"sort-options\": {\n \"values\": [\"Estimate (ms)\", \"Add-on\", \"Frequency\"],\n \"selected\": \"Estimate (ms)\"\n },\n \"filter-options\": [\n {\"id\": \"Limit\",\n \"values\": [10, 50, 100, 200, 500],\n \"selected\": 10\n }\n ],\n \"title\": [\"Add-ons startup correlations\", \"Correlations between startup time and add-ons\"],\n \"description\": [\"A linear regression model is fit using the add-ons as predictors for the startup time. The job is run weekly on all the data collected on Monday for the release channel on Windows.\",\n \"http://robertovitillo.com/2014/10/07/using-ml-to-correlate-add-ons-to-performance-bottlenecks/\"],\n \"primary-key\": [\"Add-on\"],\n \"header\": [\"Add-on\", \"Frequency\", \"Estimate (ms)\", \"Error (ms)\", \"t-statistic\"],\n \"field-description\": [\"The name of the add-on\", \"The fraction of pings that contained the add-on\", \"The add-on coefficient expresses the effect of the addon on startup time wrt the average startup time without any add-ons\", \"The standard error of the coefficient\", \"The value of the associated t-statistic for the coefficient\"],\n \"url-prefix\": \"https://s3-us-west-2.amazonaws.com/telemetry-public-analysis-2/Addon%20analysis/data/startup_addon_summary\"\n}\n\nshutdown_config = {\n \"sort-options\": {\n \"values\": [\"Estimate\", \"Add-on\", \"Frequency\"],\n \"selected\": \"Estimate\"\n },\n \"filter-options\": [\n {\"id\": \"Limit\",\n \"values\": [10, 50, 100, 200, 500],\n \"selected\": 10\n }\n ],\n \"title\": [\"Add-ons shutdown influence\", \"Correlations between shutdown time and add-ons\"],\n \"description\": [\"A linear regression model is fit using the add-ons as predictors for the logarithm of the shutdown time. The job is run weekly on all the data collected on Monday for the release channel on Windows.\",\n \"http://robertovitillo.com/2014/10/16/correlating-add-ons-to-slow-shutdown-times/\"],\n \"primary-key\": [\"Add-on\"],\n \"header\": [\"Add-on\", \"Frequency\", \"Estimate\", \"Error (ms)\", \"t-statistic\"],\n \"field-description\": [\"The name of the add-on\", \"The fraction of pings that contained the add-on\", \"The add-on coefficient expresses the change in percentage wrt the average shutdown time with no add-ons\", \"The standard error of the coefficient\", \"The value of the associated t-statistic for the coefficient\"],\n \"url-prefix\": \"https://s3-us-west-2.amazonaws.com/telemetry-public-analysis-2/Addon%20analysis/data/shutdown_addon_summary\"\n}\n\nwith open('./output/startup.json', 'w') as f:\n json.dump(startup_config, f)\n \nwith open('./output/shutdown.json', 'w') as f:\n json.dump(shutdown_config, f)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment