Skip to content

Instantly share code, notes, and snippets.

@aorjoa
Last active January 19, 2019 02:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aorjoa/60d92bd37317abb50f4904efe5d19080 to your computer and use it in GitHub Desktop.
Save aorjoa/60d92bd37317abb50f4904efe5d19080 to your computer and use it in GitHub Desktop.
Simple Spark Task
{"paragraphs":[{"text":"%spark\nsc.version","user":"anonymous","dateUpdated":"2019-01-19T02:05:18+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1547863518098_-1385869494","id":"20190117-204650_193106845","dateCreated":"2019-01-19T02:05:18+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:959"},{"text":"%spark\nimport org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\n// Word count with RDD\nval fileFromURL = IOUtils.toString(\n new URL(\"https://raw.githubusercontent.com/apache/spark/master/README.md\"),\n Charset.forName(\"utf8\")).split(\"\\n\")\nval textFile = sc.parallelize(fileFromURL)\nval counts = textFile.flatMap(line => line.split(\" \")).map(word => (word, 1)).reduceByKey(_ + _)\ncounts.count()","user":"anonymous","dateUpdated":"2019-01-19T02:05:18+0000","config":{"tableHide":false,"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","fontSize":9,"editorHide":false,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1547863518109_-1343489491","id":"20190117-204655_654187683","dateCreated":"2019-01-19T02:05:18+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:960"},{"text":"%spark\n// Word count with DataSet\nimport spark.implicits._\nimport org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\nval fileFromURL = IOUtils.toString(\n new URL(\"https://raw.githubusercontent.com/apache/spark/master/README.md\"),\n Charset.forName(\"utf8\")).split(\"\\n\")\nval data = spark.createDataset(fileFromURL)\nval words = data.flatMap(value => value.split(\" \"))\nval groupedWords = words.groupByKey(identity)\nval countInGroup = groupedWords.count()\ncountInGroup.count()","user":"anonymous","dateUpdated":"2019-01-19T02:05:18+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1547863518109_-649212394","id":"20190117-204839_2089662210","dateCreated":"2019-01-19T02:05:18+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:961"},{"text":"%spark\ncountInGroup.show()","user":"anonymous","dateUpdated":"2019-01-19T02:05:18+0000","config":{"tableHide":false,"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","fontSize":9,"editorHide":false,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1547863518110_-457998235","id":"20190117-205400_1529299486","dateCreated":"2019-01-19T02:05:18+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:962"},{"text":"%spark\n// Spark SQL\n// register table\ncountInGroup.registerTempTable(\"words\")","user":"anonymous","dateUpdated":"2019-01-19T02:05:18+0000","config":{"tableHide":false,"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","fontSize":9,"editorHide":false,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1547863518111_-556403258","id":"20190117-205413_2065839891","dateCreated":"2019-01-19T02:05:18+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:963"},{"text":"%spark.sql select * from words\n","user":"anonymous","dateUpdated":"2019-01-19T02:05:21+0000","config":{"editorSetting":{"language":"sql","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/sql","fontSize":9,"results":{"0":{"graph":{"mode":"multiBarChart","height":300,"optionOpen":true,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"value":"string","count(1)":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false},"multiBarChart":{"rotate":{"degree":"-45"},"xLabelStatus":"default"},"stackedAreaChart":{"rotate":{"degree":"-45"},"xLabelStatus":"default"},"lineChart":{"rotate":{"degree":"-45"},"xLabelStatus":"default"},"pieChart":{}},"commonSetting":{},"keys":[{"name":"value","index":0,"aggr":"sum"}],"groups":[],"values":[{"name":"count(1)","index":1,"aggr":"sum"}]},"helium":{}}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1547863518111_-1891122300","id":"20190117-210217_2110671993","dateCreated":"2019-01-19T02:05:18+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:964"},{"user":"anonymous","dateUpdated":"2019-01-19T02:05:18+0000","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"fontSize":9},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1547863518112_2118016876","id":"20190118-143133_382468296","dateCreated":"2019-01-19T02:05:18+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:965"}],"name":"Hello World","id":"2E1UXPA8E","noteParams":{},"noteForms":{},"angularObjects":{"spark:shared_process":[]},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"default","personalizedMode":"false"},"info":{}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment