Skip to content

Instantly share code, notes, and snippets.

@nealmcb
Last active August 29, 2015 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nealmcb/b6d989a83adddcdd459f to your computer and use it in GitHub Desktop.
Save nealmcb/b6d989a83adddcdd459f to your computer and use it in GitHub Desktop.
IPython Notebook with examples, and TypeError, in Learning Spark book dated 2015-03-27: "Second Release"
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notebooks by Neal McBurnett based on the book _Learning Spark_"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Example 3-5"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"lines = sc.parallelize([\"pandas\", \"i like pandas\"])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"inputRDD = sc.parallelize([\"success\", \"error: issue 1\", \"warning: issue 2\", \"error and warning: weird!\"])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"errorsRDD = inputRDD.filter(lambda x: \"error\" in x)\n",
"warningsRDD = inputRDD.filter(lambda x: \"warning\" in x)\n",
"badLinesRDD = errorsRDD.union(warningsRDD)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(8) UnionRDD[12] at union at NativeMethodAccessorImpl.java:-2 []\n",
" | PythonRDD[10] at RDD at PythonRDD.scala:43 []\n",
" | ParallelCollectionRDD[3] at parallelize at PythonRDD.scala:392 []\n",
" | PythonRDD[11] at RDD at PythonRDD.scala:43 []\n",
" | ParallelCollectionRDD[3] at parallelize at PythonRDD.scala:392 []\n"
]
}
],
"source": [
"print badLinesRDD.toDebugString()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Example 3-15 (with semantic error)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "TypeError",
"evalue": "cannot concatenate 'str' and 'int' objects",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-10-078b22c97d4b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mprint\u001b[0m \u001b[1;34m\"Input had \"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mbadLinesRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\" concerning lines\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;32mprint\u001b[0m \u001b[1;34m\"Here are 10 examples:\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mbadLinesRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mprint\u001b[0m \u001b[0mline\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mTypeError\u001b[0m: cannot concatenate 'str' and 'int' objects"
]
}
],
"source": [
"print \"Input had \" + badLinesRDD.count() + \" concerning lines\"\n",
"print \"Here are 10 examples:\"\n",
"for line in badLinesRDD.take(10):\n",
" print line"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Example 3-15 (fix error and language usage)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Input had 4 worrisome lines\n",
"Here are 10 examples:\n",
"error: issue 1\n",
"error and warning: weird!\n",
"warning: issue 2\n",
"error and warning: weird!\n"
]
}
],
"source": [
"print \"Input had %d worrisome lines\" % (badLinesRDD.count())\n",
"print \"Here are 10 examples:\"\n",
"for line in badLinesRDD.take(10):\n",
" print line"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment