Created
November 17, 2016 05:46
-
-
Save Back2Basics/3e2c801109f442883c8075bb7aec8dfd to your computer and use it in GitHub Desktop.
Python Sets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Tonight we are going to be working with sets." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'apple', 'bannana', 'grape'}" | |
] | |
}, | |
"execution_count": 37, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"groceries = set(['apple','bannana','grape'])\n", | |
"groceries" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Adding more of the same elements yields only the unique values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'apple', 'bannana', 'grape'}" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"groceries = set(['apple','bannana','grape','grape','grape'])\n", | |
"groceries" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"You can put in any iterable of immutable (unchangable) things. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{' ', 'a', 'g', 'h', 'i', 'n', 'r', 's', 't'}" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"my_str = set('this is a string')\n", | |
"my_str" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Changeable things like lists don't work because lists don't have a __hash__ function" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "TypeError", | |
"evalue": "unhashable type: 'list'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-62-1c684a90f188>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'wow'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'i'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'am'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'amazed'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'look'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'at'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'her'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'shoes'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mTypeError\u001b[0m: unhashable type: 'list'" | |
] | |
} | |
], | |
"source": [ | |
"a = set([['wow','i','am','amazed'],['look','at','her','shoes']])\n", | |
"a" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"the order isn't kept" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['bannana', 'grape', 'apple']" | |
] | |
}, | |
"execution_count": 63, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"groceries = list(set(['apple','bannana','grape','grape','grape']))\n", | |
"groceries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'apple', 'bannana', 'grape'}" | |
] | |
}, | |
"execution_count": 57, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"groceries = set(['apple','bannana','grape'])\n", | |
"groceries" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"you can ask questions of a set." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"False" | |
] | |
}, | |
"execution_count": 58, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"'orange' in groceries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"last_weeks_groceries = set(['bannana','nuts','pineapple'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"you can compare sets functions like difference, intersection, and union" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'apple', 'grape'}" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"groceries.difference(last_weeks_groceries) #what was different this week" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'bannana'}" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"groceries.intersection(last_weeks_groceries) #what they had in common" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'apple', 'bannana', 'grape', 'nuts', 'pineapple'}" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"groceries.union(last_weeks_groceries) #every item from both weeks together" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"But why did Python creators decide to keep both of these strings? They have the same 4 letters. They are just in a different order." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'asdf', 'fdsa'}" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"b = set(['asdf','fdsa'])\n", | |
"b" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This is an example of how they would change this behaviour. The eq and hash functions are the ones to pay attention to in a class that works with sets. Here is our normal example:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"###WARNING: don't mess with Python internals\n", | |
"### this is a short example of how Python Dev's did it.\n", | |
"class My_String():\n", | |
" def __init__(self, s):\n", | |
" self.s = s\n", | |
"\n", | |
" def __eq__(self, other):\n", | |
" return self.s == other.s #in our next case 'asdf' doesn't equal 'fdsa' so this returns False\n", | |
" \n", | |
" def __hash__(self):\n", | |
" return hash(self.s)\n", | |
"\n", | |
" def __repr__(self): #not needed for this example... just easier to read the answers\n", | |
" return self.s\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"which gives us the normal result keeping both strings" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{asdf, fdsa}" | |
] | |
}, | |
"execution_count": 81, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"b = set([My_String('asdf'),My_String('fdsa')])\n", | |
"b" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### But If you wanted to change that behaviour we could change these functions" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# The one important thing to note is:\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## __hash__ and __eq__ need to have a special relationship\n", | |
"## the rule is, if x == y, then hash(x) == hash(y)\n", | |
"## if you don't make that rule work, then things act weird.\n", | |
"\n", | |
"#### to make it easier make sure to use the same variable in __hash__ and __eq__" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 82, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"class My_String():\n", | |
" def __init__(self, s):\n", | |
" self.s = s\n", | |
"\n", | |
" def __eq__(self, other):\n", | |
" return set(self.s)==set(other.s) #in our case {'a','s','d','f'}=={'s','a','f','d'}\n", | |
" \n", | |
" def __hash__(self):\n", | |
" return hash(tuple(sorted(set(self.s)))) #but this gets complicated\n", | |
" \n", | |
" def __repr__(self): #not needed for this example... just easier to read the answers\n", | |
" return self.s\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"I needed to turn it into something hashable which is why I turned the set of letters into a sorted tuple.\n", | |
"\n", | |
"But now My_String class says \"if the string is made of the same letters than don't put a second instance in a set\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{asdf, qwerty}" | |
] | |
}, | |
"execution_count": 83, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"b = set([My_String('asdf'), My_String('fdsa'), My_String('asdfasdfasfdfasdf'), My_String('qwerty')])\n", | |
"b" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"another example:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 84, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"class Alien():\n", | |
" def __init__(self, arms, name):\n", | |
" self.arms = arms\n", | |
" self.name = name\n", | |
"\n", | |
" def __eq__(self, other):\n", | |
" return self.arms == other.arms\n", | |
" \n", | |
" def __hash__(self):\n", | |
" return hash(self.arms) #make sure you are hashing the thing you are comparing in __eq__\n", | |
"\n", | |
" def __repr__(self): #not needed for this example... just easier to read the answers\n", | |
" return self.name\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"If you have seen a one arm alien you've seen them all." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{klaatu, Ewok}" | |
] | |
}, | |
"execution_count": 85, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"c = set([Alien(1, 'klaatu'), Alien(1, 'barada'), Alien(1, 'nikto'), Alien(2, 'Ewok'), Alien(2, 'Wookie'), ])\n", | |
"c" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment