Skip to content

Instantly share code, notes, and snippets.

@keimina
Created March 5, 2020 10:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save keimina/eb509ad201fc0e11f60d745baa58c863 to your computer and use it in GitHub Desktop.
Save keimina/eb509ad201fc0e11f60d745baa58c863 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"s = pd.Series([\"abc\", \"abb\", \"bbc\"])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 abc\n",
"1 abb\n",
"2 bbc\n",
"dtype: object"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 False\n",
"2 True\n",
"dtype: bool"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.str.contains(\"c\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on method map in module pandas.core.series:\n",
"\n",
"map(arg, na_action=None) method of pandas.core.series.Series instance\n",
" Map values of Series according to input correspondence.\n",
" \n",
" Used for substituting each value in a Series with another value,\n",
" that may be derived from a function, a ``dict`` or\n",
" a :class:`Series`.\n",
" \n",
" Parameters\n",
" ----------\n",
" arg : function, dict, or Series\n",
" Mapping correspondence.\n",
" na_action : {None, 'ignore'}, default None\n",
" If 'ignore', propagate NaN values, without passing them to the\n",
" mapping correspondence.\n",
" \n",
" Returns\n",
" -------\n",
" Series\n",
" Same index as caller.\n",
" \n",
" See Also\n",
" --------\n",
" Series.apply : For applying more complex functions on a Series.\n",
" DataFrame.apply : Apply a function row-/column-wise.\n",
" DataFrame.applymap : Apply a function elementwise on a whole DataFrame.\n",
" \n",
" Notes\n",
" -----\n",
" When ``arg`` is a dictionary, values in Series that are not in the\n",
" dictionary (as keys) are converted to ``NaN``. However, if the\n",
" dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.\n",
" provides a method for default values), then this default is used\n",
" rather than ``NaN``.\n",
" \n",
" Examples\n",
" --------\n",
" >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])\n",
" >>> s\n",
" 0 cat\n",
" 1 dog\n",
" 2 NaN\n",
" 3 rabbit\n",
" dtype: object\n",
" \n",
" ``map`` accepts a ``dict`` or a ``Series``. Values that are not found\n",
" in the ``dict`` are converted to ``NaN``, unless the dict has a default\n",
" value (e.g. ``defaultdict``):\n",
" \n",
" >>> s.map({'cat': 'kitten', 'dog': 'puppy'})\n",
" 0 kitten\n",
" 1 puppy\n",
" 2 NaN\n",
" 3 NaN\n",
" dtype: object\n",
" \n",
" It also accepts a function:\n",
" \n",
" >>> s.map('I am a {}'.format)\n",
" 0 I am a cat\n",
" 1 I am a dog\n",
" 2 I am a nan\n",
" 3 I am a rabbit\n",
" dtype: object\n",
" \n",
" To avoid applying the function to missing values (and keep them as\n",
" ``NaN``) ``na_action='ignore'`` can be used:\n",
" \n",
" >>> s.map('I am a {}'.format, na_action='ignore')\n",
" 0 I am a cat\n",
" 1 I am a dog\n",
" 2 NaN\n",
" 3 I am a rabbit\n",
" dtype: object\n",
"\n"
]
}
],
"source": [
"help(s.map)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 False\n",
"2 True\n",
"dtype: bool"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.map(lambda x: \"c\" in x)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 False\n",
"2 True\n",
"dtype: bool"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.str.contains(\"c\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"s[2] = np.nan"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "argument of type 'float' is not iterable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-16-129a5064ba26>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"c\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, arg, na_action)\u001b[0m\n\u001b[1;32m 3823\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3824\u001b[0m \"\"\"\n\u001b[0;32m-> 3825\u001b[0;31m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_map_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_action\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mna_action\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3826\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_values\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3827\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36m_map_values\u001b[0;34m(self, mapper, na_action)\u001b[0m\n\u001b[1;32m 1298\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1299\u001b[0m \u001b[0;31m# mapper is a function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1300\u001b[0;31m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmap_f\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmapper\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1301\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1302\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnew_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m<ipython-input-16-129a5064ba26>\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"c\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: argument of type 'float' is not iterable"
]
}
],
"source": [
"s.map(lambda x: \"c\" in x)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 False\n",
"2 NaN\n",
"dtype: object"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.str.contains(\"c\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-18-ce80e40847dc>, line 1)",
"output_type": "error",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-18-ce80e40847dc>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m s.map(lambda x: \"c\" in x if x)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"s.map(lambda x: \"c\" in x if x)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bool(np.nan)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 False\n",
"2 False\n",
"dtype: bool"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.map(lambda x: \"c\" in x if pd.notnull(x) else False)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 False\n",
"2 NaN\n",
"dtype: object"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.map(lambda x: \"c\" in x if pd.notnull(x) else np.nan)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'StringMethods' object has no attribute 'map'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-22-48235c39b090>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: 'StringMethods' object has no attribute 'map'"
]
}
],
"source": [
"s.str.map"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on method replace in module pandas.core.strings:\n",
"\n",
"replace(pat, repl, n=-1, case=None, flags=0, regex=True) method of pandas.core.strings.StringMethods instance\n",
" Replace occurrences of pattern/regex in the Series/Index with\n",
" some other string. Equivalent to :meth:`str.replace` or\n",
" :func:`re.sub`.\n",
" \n",
" Parameters\n",
" ----------\n",
" pat : str or compiled regex\n",
" String can be a character sequence or regular expression.\n",
" \n",
" .. versionadded:: 0.20.0\n",
" `pat` also accepts a compiled regex.\n",
" \n",
" repl : str or callable\n",
" Replacement string or a callable. The callable is passed the regex\n",
" match object and must return a replacement string to be used.\n",
" See :func:`re.sub`.\n",
" \n",
" .. versionadded:: 0.20.0\n",
" `repl` also accepts a callable.\n",
" \n",
" n : int, default -1 (all)\n",
" Number of replacements to make from start.\n",
" case : bool, default None\n",
" - If True, case sensitive (the default if `pat` is a string)\n",
" - Set to False for case insensitive\n",
" - Cannot be set if `pat` is a compiled regex\n",
" flags : int, default 0 (no flags)\n",
" - re module flags, e.g. re.IGNORECASE\n",
" - Cannot be set if `pat` is a compiled regex\n",
" regex : bool, default True\n",
" - If True, assumes the passed-in pattern is a regular expression.\n",
" - If False, treats the pattern as a literal string\n",
" - Cannot be set to False if `pat` is a compiled regex or `repl` is\n",
" a callable.\n",
" \n",
" .. versionadded:: 0.23.0\n",
" \n",
" Returns\n",
" -------\n",
" Series or Index of object\n",
" A copy of the object with all matching occurrences of `pat` replaced by\n",
" `repl`.\n",
" \n",
" Raises\n",
" ------\n",
" ValueError\n",
" * if `regex` is False and `repl` is a callable or `pat` is a compiled\n",
" regex\n",
" * if `pat` is a compiled regex and `case` or `flags` is set\n",
" \n",
" Notes\n",
" -----\n",
" When `pat` is a compiled regex, all flags should be included in the\n",
" compiled regex. Use of `case`, `flags`, or `regex=False` with a compiled\n",
" regex will raise an error.\n",
" \n",
" Examples\n",
" --------\n",
" When `pat` is a string and `regex` is True (the default), the given `pat`\n",
" is compiled as a regex. When `repl` is a string, it replaces matching\n",
" regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are\n",
" left as is:\n",
" \n",
" >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)\n",
" 0 bao\n",
" 1 baz\n",
" 2 NaN\n",
" dtype: object\n",
" \n",
" When `pat` is a string and `regex` is False, every `pat` is replaced with\n",
" `repl` as with :meth:`str.replace`:\n",
" \n",
" >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False)\n",
" 0 bao\n",
" 1 fuz\n",
" 2 NaN\n",
" dtype: object\n",
" \n",
" When `repl` is a callable, it is called on every `pat` using\n",
" :func:`re.sub`. The callable should expect one positional argument\n",
" (a regex object) and return a string.\n",
" \n",
" To get the idea:\n",
" \n",
" >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)\n",
" 0 <_sre.SRE_Match object; span=(0, 1), match='f'>oo\n",
" 1 <_sre.SRE_Match object; span=(0, 1), match='f'>uz\n",
" 2 NaN\n",
" dtype: object\n",
" \n",
" Reverse every lowercase alphabetic word:\n",
" \n",
" >>> repl = lambda m: m.group(0)[::-1]\n",
" >>> pd.Series(['foo 123', 'bar baz', np.nan]).str.replace(r'[a-z]+', repl)\n",
" 0 oof 123\n",
" 1 rab zab\n",
" 2 NaN\n",
" dtype: object\n",
" \n",
" Using regex groups (extract second group and swap case):\n",
" \n",
" >>> pat = r\"(?P<one>\\w+) (?P<two>\\w+) (?P<three>\\w+)\"\n",
" >>> repl = lambda m: m.group('two').swapcase()\n",
" >>> pd.Series(['One Two Three', 'Foo Bar Baz']).str.replace(pat, repl)\n",
" 0 tWO\n",
" 1 bAR\n",
" dtype: object\n",
" \n",
" Using a compiled regex with flags\n",
" \n",
" >>> import re\n",
" >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE)\n",
" >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar')\n",
" 0 foo\n",
" 1 bar\n",
" 2 NaN\n",
" dtype: object\n",
"\n"
]
}
],
"source": [
"help(s.str.replace)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 abc\n",
"1 abb\n",
"2 NaN\n",
"dtype: object"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"s = pd.Series([\"abc\", \"abb\", \"bbc\"])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 [(a, b, c)]\n",
"1 []\n",
"2 [(b, b, c)]\n",
"dtype: object"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.str.findall(r'(a|b)(b)(c)')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.strings.StringMethods at 0x7fbe40d17a10>"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.str.findall(r'(a|b)(b)(c)').str"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('O')"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.dtype"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 (a, b, c)\n",
"1 NaN\n",
"2 (b, b, c)\n",
"dtype: object"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.str.findall(r'(a|b)(b)(c)').str[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment