Last active
August 29, 2015 14:19
-
-
Save nealmcb/b6d989a83adddcdd459f to your computer and use it in GitHub Desktop.
IPython Notebook with examples, and TypeError, in Learning Spark book dated 2015-03-27: "Second Release"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Notebooks by Neal McBurnett based on the book _Learning Spark_" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Example 3-5" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"lines = sc.parallelize([\"pandas\", \"i like pandas\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"inputRDD = sc.parallelize([\"success\", \"error: issue 1\", \"warning: issue 2\", \"error and warning: weird!\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"errorsRDD = inputRDD.filter(lambda x: \"error\" in x)\n", | |
"warningsRDD = inputRDD.filter(lambda x: \"warning\" in x)\n", | |
"badLinesRDD = errorsRDD.union(warningsRDD)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(8) UnionRDD[12] at union at NativeMethodAccessorImpl.java:-2 []\n", | |
" | PythonRDD[10] at RDD at PythonRDD.scala:43 []\n", | |
" | ParallelCollectionRDD[3] at parallelize at PythonRDD.scala:392 []\n", | |
" | PythonRDD[11] at RDD at PythonRDD.scala:43 []\n", | |
" | ParallelCollectionRDD[3] at parallelize at PythonRDD.scala:392 []\n" | |
] | |
} | |
], | |
"source": [ | |
"print badLinesRDD.toDebugString()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Example 3-15 (with semantic error)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "TypeError", | |
"evalue": "cannot concatenate 'str' and 'int' objects", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-10-078b22c97d4b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mprint\u001b[0m \u001b[1;34m\"Input had \"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mbadLinesRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\" concerning lines\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;32mprint\u001b[0m \u001b[1;34m\"Here are 10 examples:\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mbadLinesRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mprint\u001b[0m \u001b[0mline\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;31mTypeError\u001b[0m: cannot concatenate 'str' and 'int' objects" | |
] | |
} | |
], | |
"source": [ | |
"print \"Input had \" + badLinesRDD.count() + \" concerning lines\"\n", | |
"print \"Here are 10 examples:\"\n", | |
"for line in badLinesRDD.take(10):\n", | |
" print line" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Example 3-15 (fix error and language usage)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Input had 4 worrisome lines\n", | |
"Here are 10 examples:\n", | |
"error: issue 1\n", | |
"error and warning: weird!\n", | |
"warning: issue 2\n", | |
"error and warning: weird!\n" | |
] | |
} | |
], | |
"source": [ | |
"print \"Input had %d worrisome lines\" % (badLinesRDD.count())\n", | |
"print \"Here are 10 examples:\"\n", | |
"for line in badLinesRDD.take(10):\n", | |
" print line" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment