-
-
Save olegchir/249c374e567ee0ab8ba8b222a922698d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"body": { | |
"angularObjects": {}, | |
"config": { | |
"isZeppelinNotebookCronEnable": false, | |
"looknfeel": "default" | |
}, | |
"defaultInterpreterGroup": "spark", | |
"id": "2A94M5J1Z", | |
"info": { | |
"isRunning": false | |
}, | |
"name": "2. Spark Basic Features", | |
"noteForms": {}, | |
"noteParams": {}, | |
"paragraphs": [ | |
{ | |
"apps": [], | |
"config": { | |
"colWidth": 12.0, | |
"editorHide": true, | |
"editorMode": "ace/mode/markdown", | |
"editorSetting": { | |
"editOnDblClick": true, | |
"language": "markdown" | |
}, | |
"enabled": true, | |
"results": [ | |
{ | |
"graph": { | |
"groups": [], | |
"height": 300.0, | |
"keys": [], | |
"mode": "table", | |
"optionOpen": false, | |
"scatter": {}, | |
"values": [] | |
} | |
} | |
], | |
"tableHide": false | |
}, | |
"dateCreated": "Feb 13, 2015 11:16:21 PM", | |
"dateFinished": "Sep 22, 2020 7:23:32 AM", | |
"dateStarted": "Sep 22, 2020 7:23:30 AM", | |
"dateUpdated": "Sep 22, 2020 7:23:30 AM", | |
"id": "20150213-231621_168813393", | |
"jobName": "paragraph_1423836981412_-1007008116", | |
"progressUpdateIntervalMs": 500, | |
"results": { | |
"code": "SUCCESS", | |
"msg": [ | |
{ | |
"data": "<div class=\"markdown-body\">\n<h2>Welcome to Zeppelin.</h2>\n<h5>This is a live tutorial, you can run the code yourself. (Shift-Enter to Run)</h5>\n\n</div>", | |
"type": "HTML" | |
} | |
] | |
}, | |
"runtimeInfos": {}, | |
"settings": { | |
"forms": {}, | |
"params": {} | |
}, | |
"status": "FINISHED", | |
"text": "%md\n## Welcome to Zeppelin.\n##### This is a live tutorial, you can run the code yourself. (Shift-Enter to Run)", | |
"user": "anonymous" | |
}, | |
{ | |
"apps": [], | |
"config": { | |
"colWidth": 12.0, | |
"editorMode": "ace/mode/scala", | |
"editorSetting": { | |
"completionKey": "TAB", | |
"completionSupport": true, | |
"editOnDblClick": false, | |
"language": "scala" | |
}, | |
"enabled": true, | |
"fontSize": 9.0, | |
"results": [ | |
{ | |
"graph": { | |
"height": 300.0, | |
"mode": "table", | |
"optionOpen": false | |
} | |
} | |
], | |
"title": true | |
}, | |
"dateCreated": "Feb 10, 2015 1:52:59 AM", | |
"dateFinished": "Sep 22, 2020 7:23:53 AM", | |
"dateStarted": "Sep 22, 2020 7:23:32 AM", | |
"dateUpdated": "Sep 22, 2020 7:30:04 AM", | |
"id": "20150210-015259_1403135953", | |
"jobName": "paragraph_1423500779206_-1502780787", | |
"progressUpdateIntervalMs": 500, | |
"results": { | |
"code": "SUCCESS", | |
"msg": [ | |
{ | |
"data": "\u001b[33mwarning: \u001b[0mthere was one deprecation warning; re-run with -deprecation for details\nimport sqlContext.implicits._\nimport org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\u001b[1m\u001b[34mbankText\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.rdd.RDD[String]\u001b[0m = ParallelCollectionRDD[0] at parallelize at <console>:24\ndefined class Bank\n\u001b[1m\u001b[34mbank\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m = [age: int, job: string ... 3 more fields]\n", | |
"type": "TEXT" | |
} | |
] | |
}, | |
"runtimeInfos": {}, | |
"settings": { | |
"forms": {}, | |
"params": {} | |
}, | |
"status": "FINISHED", | |
"text": "import org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\n// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)\n// So you don't need create them manually\n\n// load bank data\nval bankText = sc.parallelize(\n IOUtils.toString(\n new URL(\"https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\"),\n Charset.forName(\"utf8\")).split(\"\\n\"))\n\ncase class Bank(age: Integer, job: String, marital: String, education: String, balance: Integer)\n\nval bank = bankText.map(s => s.split(\";\")).filter(s => s(0) != \"\\\"age\\\"\").map(\n s => Bank(s(0).toInt, \n s(1).replaceAll(\"\\\"\", \"\"),\n s(2).replaceAll(\"\\\"\", \"\"),\n s(3).replaceAll(\"\\\"\", \"\"),\n s(5).replaceAll(\"\\\"\", \"\").toInt\n )\n).toDF()\nbank.registerTempTable(\"bank\")\nbank.groupBy(\"\")", | |
"title": "Load data into table", | |
"user": "anonymous" | |
}, | |
{ | |
"apps": [], | |
"config": { | |
"colWidth": 4.0, | |
"editorMode": "ace/mode/sql", | |
"editorSetting": { | |
"completionKey": "TAB", | |
"completionSupport": true, | |
"editOnDblClick": false, | |
"language": "sql" | |
}, | |
"enabled": true, | |
"fontSize": 9.0, | |
"results": [ | |
{ | |
"graph": { | |
"commonSetting": {}, | |
"groups": [], | |
"height": 366.0, | |
"keys": [ | |
{ | |
"aggr": "sum", | |
"index": 0.0, | |
"name": "age" | |
} | |
], | |
"mode": "multiBarChart", | |
"optionOpen": false, | |
"setting": { | |
"multiBarChart": { | |
"rotate": { | |
"degree": "-45" | |
}, | |
"xLabelStatus": "default" | |
} | |
}, | |
"values": [ | |
{ | |
"aggr": "sum", | |
"index": 1.0, | |
"name": "value" | |
} | |
] | |
}, | |
"helium": {} | |
} | |
] | |
}, | |
"dateCreated": "Feb 10, 2015 1:53:02 AM", | |
"dateFinished": "Sep 22, 2020 7:23:59 AM", | |
"dateStarted": "Sep 22, 2020 7:23:53 AM", | |
"dateUpdated": "Sep 22, 2020 7:23:53 AM", | |
"id": "20150210-015302_1492795503", | |
"jobName": "paragraph_1423500782552_-1439281894", | |
"progressUpdateIntervalMs": 500, | |
"results": { | |
"code": "SUCCESS", | |
"msg": [ | |
{ | |
"data": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n", | |
"type": "TABLE" | |
} | |
] | |
}, | |
"runtimeInfos": { | |
"jobUrl": { | |
"group": "spark", | |
"interpreterSettingId": "spark", | |
"label": "SPARK JOB", | |
"propertyName": "jobUrl", | |
"tooltip": "View in Spark web UI", | |
"values": [ | |
{ | |
"jobUrl": "http://edge.example.com:4040/jobs/job?id=0" | |
} | |
] | |
} | |
}, | |
"settings": { | |
"forms": {}, | |
"params": {} | |
}, | |
"status": "FINISHED", | |
"text": "%sql \nselect age, count(1) value\nfrom bank \nwhere age < 30 \ngroup by age \norder by age", | |
"user": "anonymous" | |
}, | |
{ | |
"apps": [], | |
"config": { | |
"colWidth": 4.0, | |
"editorMode": "ace/mode/sql", | |
"editorSetting": { | |
"completionKey": "TAB", | |
"completionSupport": true, | |
"editOnDblClick": false, | |
"language": "sql" | |
}, | |
"enabled": true, | |
"fontSize": 9.0, | |
"results": [ | |
{ | |
"graph": { | |
"commonSetting": {}, | |
"groups": [], | |
"height": 294.0, | |
"keys": [ | |
{ | |
"aggr": "sum", | |
"index": 0.0, | |
"name": "age" | |
} | |
], | |
"mode": "multiBarChart", | |
"optionOpen": false, | |
"setting": { | |
"multiBarChart": { | |
"rotate": { | |
"degree": "-45" | |
}, | |
"xLabelStatus": "default" | |
} | |
}, | |
"values": [ | |
{ | |
"aggr": "sum", | |
"index": 1.0, | |
"name": "value" | |
} | |
] | |
}, | |
"helium": {} | |
} | |
] | |
}, | |
"dateCreated": "Feb 12, 2015 2:54:04 PM", | |
"dateFinished": "Sep 22, 2020 7:24:01 AM", | |
"dateStarted": "Sep 22, 2020 7:23:59 AM", | |
"dateUpdated": "Sep 22, 2020 7:24:02 AM", | |
"id": "20150212-145404_867439529", | |
"jobName": "paragraph_1423720444030_-1424110477", | |
"progressUpdateIntervalMs": 500, | |
"results": { | |
"code": "SUCCESS", | |
"msg": [ | |
{ | |
"data": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n30\t150\n31\t199\n32\t224\n33\t186\n34\t231\n", | |
"type": "TABLE" | |
} | |
] | |
}, | |
"runtimeInfos": { | |
"jobUrl": { | |
"group": "spark", | |
"interpreterSettingId": "spark", | |
"label": "SPARK JOB", | |
"propertyName": "jobUrl", | |
"tooltip": "View in Spark web UI", | |
"values": [ | |
{ | |
"jobUrl": "http://edge.example.com:4040/jobs/job?id=1" | |
} | |
] | |
} | |
}, | |
"settings": { | |
"forms": { | |
"maxAge": { | |
"defaultValue": "30", | |
"displayName": "maxAge", | |
"hidden": false, | |
"name": "maxAge" | |
} | |
}, | |
"params": { | |
"bdtMeta": { | |
"inlay": { | |
"state": { | |
"chart": { | |
"axes": { | |
"mergeYAxes": true | |
}, | |
"series": [ | |
{ | |
"bars": [ | |
{ | |
"column": "value", | |
"index": 1.0, | |
"modifier": "Sum" | |
} | |
], | |
"categories": { | |
"column": "age", | |
"index": 0.0, | |
"modifier": "GroupAndSort" | |
}, | |
"type": "Bar" | |
} | |
] | |
}, | |
"currentPage": "Chart" | |
} | |
} | |
}, | |
"maxAge": "35" | |
} | |
}, | |
"status": "FINISHED", | |
"text": "%sql \nselect age, count(1) value \nfrom bank \nwhere age < ${maxAge=30} \ngroup by age \norder by age", | |
"user": "anonymous" | |
}, | |
{ | |
"apps": [], | |
"config": { | |
"colWidth": 4.0, | |
"editorMode": "ace/mode/sql", | |
"editorSetting": { | |
"completionKey": "TAB", | |
"completionSupport": true, | |
"editOnDblClick": false, | |
"language": "sql" | |
}, | |
"enabled": true, | |
"fontSize": 9.0, | |
"results": [ | |
{ | |
"graph": { | |
"commonSetting": {}, | |
"groups": [], | |
"height": 280.0, | |
"keys": [ | |
{ | |
"aggr": "sum", | |
"index": 0.0, | |
"name": "age" | |
} | |
], | |
"mode": "stackedAreaChart", | |
"optionOpen": false, | |
"setting": { | |
"stackedAreaChart": { | |
"rotate": { | |
"degree": "-45" | |
}, | |
"xLabelStatus": "default" | |
} | |
}, | |
"values": [ | |
{ | |
"aggr": "sum", | |
"index": 1.0, | |
"name": "value" | |
} | |
] | |
}, | |
"helium": {} | |
} | |
], | |
"runOnSelectionChange": true | |
}, | |
"dateCreated": "Feb 13, 2015 11:04:22 PM", | |
"dateFinished": "Sep 22, 2020 7:24:02 AM", | |
"dateStarted": "Sep 22, 2020 7:24:01 AM", | |
"dateUpdated": "Sep 22, 2020 7:24:01 AM", | |
"id": "20150213-230422_1600658137", | |
"jobName": "paragraph_1423836262027_-210588283", | |
"progressUpdateIntervalMs": 500, | |
"results": { | |
"code": "SUCCESS", | |
"msg": [ | |
{ | |
"data": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t17\n24\t13\n25\t33\n26\t56\n27\t64\n28\t78\n29\t56\n30\t92\n31\t86\n32\t105\n33\t61\n34\t75\n35\t46\n36\t50\n37\t43\n38\t44\n39\t30\n40\t25\n41\t19\n42\t23\n43\t21\n44\t20\n45\t15\n46\t14\n47\t12\n48\t12\n49\t11\n50\t8\n51\t6\n52\t9\n53\t4\n55\t3\n56\t3\n57\t2\n58\t7\n59\t2\n60\t5\n66\t2\n69\t1\n", | |
"type": "TABLE" | |
} | |
] | |
}, | |
"runtimeInfos": { | |
"jobUrl": { | |
"group": "spark", | |
"interpreterSettingId": "spark", | |
"label": "SPARK JOB", | |
"propertyName": "jobUrl", | |
"tooltip": "View in Spark web UI", | |
"values": [ | |
{ | |
"jobUrl": "http://edge.example.com:4040/jobs/job?id=2" | |
} | |
] | |
} | |
}, | |
"settings": { | |
"forms": { | |
"marital": { | |
"defaultValue": "single", | |
"displayName": "marital", | |
"hidden": false, | |
"name": "marital", | |
"options": [ | |
{ | |
"value": "single" | |
}, | |
{ | |
"value": "divorced" | |
}, | |
{ | |
"value": "married" | |
} | |
] | |
} | |
}, | |
"params": { | |
"marital": "single" | |
} | |
}, | |
"status": "FINISHED", | |
"text": "%sql \nselect age, count(1) value \nfrom bank \nwhere marital=\"${marital=single,single|divorced|married}\" \ngroup by age \norder by age", | |
"user": "anonymous" | |
}, | |
{ | |
"apps": [], | |
"config": { | |
"colWidth": 12.0, | |
"editorHide": true, | |
"editorMode": "ace/mode/markdown", | |
"editorSetting": { | |
"editOnDblClick": true, | |
"language": "markdown" | |
}, | |
"enabled": true, | |
"results": [ | |
{ | |
"graph": { | |
"height": 300.0, | |
"mode": "table", | |
"optionOpen": false | |
} | |
} | |
], | |
"tableHide": false | |
}, | |
"dateCreated": "Feb 13, 2015 11:04:28 PM", | |
"dateFinished": "Sep 22, 2020 7:24:02 AM", | |
"dateStarted": "Sep 22, 2020 7:24:02 AM", | |
"dateUpdated": "Sep 22, 2020 7:24:02 AM", | |
"id": "20150213-230428_1231780373", | |
"jobName": "paragraph_1423836268492_216498320", | |
"progressUpdateIntervalMs": 500, | |
"results": { | |
"code": "SUCCESS", | |
"msg": [ | |
{ | |
"data": "<div class=\"markdown-body\">\n<h2>Congratulations, it’s done.</h2>\n<h5>You can create your own notebook in ‘Notebook’ menu. Good luck!</h5>\n\n</div>", | |
"type": "HTML" | |
} | |
] | |
}, | |
"runtimeInfos": {}, | |
"settings": { | |
"forms": {}, | |
"params": {} | |
}, | |
"status": "FINISHED", | |
"text": "%md\n## Congratulations, it's done.\n##### You can create your own notebook in 'Notebook' menu. Good luck!", | |
"user": "anonymous" | |
}, | |
{ | |
"apps": [], | |
"config": { | |
"colWidth": 12.0, | |
"editorHide": true, | |
"editorMode": "ace/mode/markdown", | |
"editorSetting": { | |
"editOnDblClick": true, | |
"language": "markdown" | |
}, | |
"enabled": true, | |
"results": [ | |
{ | |
"graph": { | |
"height": 300.0, | |
"mode": "table", | |
"optionOpen": false | |
} | |
} | |
], | |
"tableHide": false | |
}, | |
"dateCreated": "Mar 26, 2015 9:46:58 PM", | |
"dateFinished": "Sep 22, 2020 7:24:02 AM", | |
"dateStarted": "Sep 22, 2020 7:24:02 AM", | |
"dateUpdated": "Sep 22, 2020 7:24:02 AM", | |
"id": "20150326-214658_12335843", | |
"jobName": "paragraph_1427420818407_872443482", | |
"progressUpdateIntervalMs": 500, | |
"results": { | |
"code": "SUCCESS", | |
"msg": [ | |
{ | |
"data": "<div class=\"markdown-body\">\n<p>About bank data</p>\n<pre><code>Citation Request:\n This dataset is public available for research. The details are described in [Moro et al., 2011]. \n Please include this citation if you plan to use this database:\n\n [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM'2011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.\n\n Available at: [pdf] http://hdl.handle.net/1822/14838\n [bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt\n</code></pre>\n\n</div>", | |
"type": "HTML" | |
} | |
] | |
}, | |
"runtimeInfos": {}, | |
"settings": { | |
"forms": {}, | |
"params": {} | |
}, | |
"status": "FINISHED", | |
"text": "%md\n\nAbout bank data\n\n```\nCitation Request:\n This dataset is public available for research. The details are described in [Moro et al., 2011]. \n Please include this citation if you plan to use this database:\n\n [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM'2011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.\n\n Available at: [pdf] http://hdl.handle.net/1822/14838\n [bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt\n```", | |
"user": "anonymous" | |
}, | |
{ | |
"apps": [], | |
"config": {}, | |
"dateCreated": "Jul 3, 2015 1:30:47 PM", | |
"dateUpdated": "Sep 22, 2020 7:24:02 AM", | |
"id": "20150703-133047_853701097", | |
"jobName": "paragraph_1435955447812_-158639899", | |
"progressUpdateIntervalMs": 500, | |
"runtimeInfos": {}, | |
"settings": { | |
"forms": {}, | |
"params": {} | |
}, | |
"status": "FINISHED", | |
"text": "", | |
"user": "anonymous" | |
} | |
], | |
"path": "/Spark Tutorial/2. Spark Basic Features" | |
}, | |
"message": "", | |
"status": "OK" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment