Skip to content

Instantly share code, notes, and snippets.

@EnsekiTT
Created March 18, 2018 21:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EnsekiTT/e52475c4ae1c36e931289d48600349a2 to your computer and use it in GitHub Desktop.
Save EnsekiTT/e52475c4ae1c36e931289d48600349a2 to your computer and use it in GitHub Desktop.
pandas.Series.strを全部試す。
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Series.strの文字列処理は52個ある\n",
"# https://pandas.pydata.org/pandas-docs/stable/api.html#string-handling\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Braund, Mr. Owen Harris\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n",
"2 Heikkinen, Miss. Laina\n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel)\n",
"4 Allen, Mr. William Henry\n",
"Name: Name, dtype: object"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# https://www.kaggle.com/c/titanic/data\n",
"# からダウンロードしてくれば動くはず\n",
"train_df = pd.read_csv('titanic/train.csv')\n",
"Names = train_df.head().Name\n",
"Names"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 大文字小文字と変換したり、文字を置き換えたりする"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"####capitalize\n",
"0 Braund, mr. owen harris\n",
"1 Cumings, mrs. john bradley (florence briggs th...\n",
"2 Heikkinen, miss. laina\n",
"3 Futrelle, mrs. jacques heath (lily may peel)\n",
"4 Allen, mr. william henry\n",
"Name: Name, dtype: object\n",
"\n",
"####lower\n",
"0 braund, mr. owen harris\n",
"1 cumings, mrs. john bradley (florence briggs th...\n",
"2 heikkinen, miss. laina\n",
"3 futrelle, mrs. jacques heath (lily may peel)\n",
"4 allen, mr. william henry\n",
"Name: Name, dtype: object\n",
"\n",
"####upper\n",
"0 BRAUND, MR. OWEN HARRIS\n",
"1 CUMINGS, MRS. JOHN BRADLEY (FLORENCE BRIGGS TH...\n",
"2 HEIKKINEN, MISS. LAINA\n",
"3 FUTRELLE, MRS. JACQUES HEATH (LILY MAY PEEL)\n",
"4 ALLEN, MR. WILLIAM HENRY\n",
"Name: Name, dtype: object\n",
"\n",
"####swapcase\n",
"0 bRAUND, mR. oWEN hARRIS\n",
"1 cUMINGS, mRS. jOHN bRADLEY (fLORENCE bRIGGS tH...\n",
"2 hEIKKINEN, mISS. lAINA\n",
"3 fUTRELLE, mRS. jACQUES hEATH (lILY mAY pEEL)\n",
"4 aLLEN, mR. wILLIAM hENRY\n",
"Name: Name, dtype: object\n",
"\n",
"####title\n",
"0 hoge hoge\n",
"1 fuga FuGa\n",
"dtype: object\n",
"0 Hoge Hoge\n",
"1 Fuga Fuga\n",
"dtype: object\n",
"\n",
"####translate\n",
"0 Hraund, Mr. Owen Harris\n",
"1 Iumings, Mrs. John Hradley (Llorence Hriggs Th...\n",
"2 Heikkinen, Miss. Laina\n",
"3 Lutrelle, Mrs. Jacques Heath (Lily May Peel)\n",
"4 Gllen, Mr. William Henry\n",
"Name: Name, dtype: object\n",
"\n",
"####replace\n",
"0 Braund, Title. Owen Harris\n",
"1 Cumings, Title. John Bradley (Florence Briggs ...\n",
"2 Heikkinen, Title. Laina\n",
"3 Futrelle, Title. Jacques Heath (Lily May Peel)\n",
"4 Allen, Title. William Henry\n",
"Name: Name, dtype: object\n"
]
}
],
"source": [
"# capitalize\n",
"## 1文字目を大文字にしてくれる。\n",
"print('\\n####capitalize')\n",
"print(Names.str.capitalize())\n",
"\n",
"# lower\n",
"## 小文字にしてくれる\n",
"print('\\n####lower')\n",
"print(Names.str.lower())\n",
"\n",
"# upper\n",
"## 大文字にしてくれる\n",
"print('\\n####upper')\n",
"print(Names.str.upper())\n",
"\n",
"# swapcase\n",
"## 大文字と小文字を入れ替える\n",
"print('\\n####swapcase')\n",
"print(Names.str.swapcase())\n",
"\n",
"# title\n",
"## titleケースに変換する(各単語の先頭を大文字にする)\n",
"print('\\n####title')\n",
"data = pd.Series(['hoge hoge', 'fuga FuGa'])\n",
"print(data)\n",
"print(data.str.title())\n",
"\n",
"# translate\n",
"## 辞書に合わせて単語を置き換えていく\n",
"## カエサル暗号みたいな感じ\n",
"dic = str.maketrans(\"ABCDEF\", \"GHIJKL\")\n",
"print('\\n####translate')\n",
"print(Names.str.translate(dic))\n",
"\n",
"# replace\n",
"## 指定のパターンを指定の文字列で置き換える\n",
"print('\\n####replace')\n",
"print(Names.str.replace('Mrs|Mr|Miss', 'Title'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 文字列やリストを結合する"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"####cat with \",\"\n",
"Braund, Mr. Owen Harris,Cumings, Mrs. John Bradley (Florence Briggs Thayer),Heikkinen, Miss. Laina,Futrelle, Mrs. Jacques Heath (Lily May Peel),Allen, Mr. William Henry\n",
"\n",
"####cat with list\n",
"0 Braund, Mr. Owen Harris,1\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n",
"2 Heikkinen, Miss. Laina,3\n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel),4\n",
"4 Allen, Mr. William Henry,5\n",
"Name: Name, dtype: object\n",
"\n",
"####raw\n",
"0 [hoge, fuga]\n",
"1 [uga, piyo]\n",
"dtype: object\n",
"\n",
"####join\n",
"0 hoge,fuga\n",
"1 uga,piyo\n",
"dtype: object\n"
]
}
],
"source": [
"# cat\n",
"## セパレータを使って結合してくれる。\n",
"print('\\n####cat with \",\"')\n",
"print(Names.str.cat(sep=','))\n",
"## 同数とかのリストを用意すると最後にくっつけてくれる\n",
"print('\\n####cat with list')\n",
"print(Names.str.cat(['1','2','3','4','5'], sep=','))\n",
"\n",
"# join\n",
"## Seriesに含まれるリストを指定の区切り文字で区切れる結合を返す\n",
"## 文字列のSeriesに適用すると1文字毎に区切り文字が入るので注意\n",
"data = pd.Series([['hoge','fuga'],['uga','piyo']])\n",
"print('\\n####raw')\n",
"print(data)\n",
"print('\\n####join')\n",
"print(data.str.join(','))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 文字列の前後を埋めて文字数を合わせたりする"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"####center\n",
"0 :::Braund, Mr. Owen Harris::::\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n",
"2 ::::Heikkinen, Miss. Laina::::\n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel)\n",
"4 :::Allen, Mr. William Henry:::\n",
"Name: Name, dtype: object\n",
"\n",
"####ljust\n",
"0 Braund, Mr. Owen Harris:::::::\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n",
"2 Heikkinen, Miss. Laina::::::::\n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel)\n",
"4 Allen, Mr. William Henry::::::\n",
"Name: Name, dtype: object\n",
"\n",
"####rjust\n",
"0 :::::::Braund, Mr. Owen Harris\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n",
"2 ::::::::Heikkinen, Miss. Laina\n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel)\n",
"4 ::::::Allen, Mr. William Henry\n",
"Name: Name, dtype: object\n",
"\n",
"####pad\n",
"0 :::::::Braund, Mr. Owen Harris\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n",
"2 ::::::::Heikkinen, Miss. Laina\n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel)\n",
"4 ::::::Allen, Mr. William Henry\n",
"Name: Name, dtype: object\n",
"\n",
"####zfill\n",
"0 030\n",
"1 002\n",
"2 100\n",
"dtype: object\n"
]
}
],
"source": [
"# center\n",
"## 文字列の左右に文字を追加してくれる\n",
"## デフォルトは空文字だけど今回は全体で最小30文字になるように':'を入れた\n",
"print('\\n####center')\n",
"print(Names.str.center(30, fillchar=':'))\n",
"\n",
"# ljust, rjust\n",
"## 文字列の左か右に文字を追加してくれる\n",
"## デフォルトは空文字だけど今回は全体で最小30文字になるように':'を入れた\n",
"print('\\n####ljust')\n",
"print(Names.str.ljust(30, fillchar=':'))\n",
"print('\\n####rjust')\n",
"print(Names.str.rjust(30, fillchar=':'))\n",
"\n",
"# pad\n",
"## 指定された文字数になるように埋める(デフォルトは左に空白を埋める)\n",
"## デフォルトは空文字だけど今回は全体で最小30文字になるように':'を入れた\n",
"## ljustとおなじになるはず\n",
"print('\\n####pad')\n",
"print(Names.str.pad(30, fillchar=':'))\n",
"\n",
"# zfill\n",
"## 数値などを同じ桁数にするために左側を0で埋める\n",
"## Object型からObject型でつかう\n",
"print('\\n####zfill')\n",
"data = pd.Series(['30', '2', '100'])\n",
"print(data.str.zfill(3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 文字列の空白文字を削除する"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"####raw\n",
"0 hoge \n",
"1 fuga \n",
"dtype: object\n",
"\n",
"####strip\n",
"0 hoge\n",
"1 fuga\n",
"dtype: object\n",
"\n",
"####lstrip\n",
"0 hoge \n",
"1 fuga \n",
"dtype: object\n",
"\n",
"####rstrip\n",
"0 hoge\n",
"1 fuga\n",
"dtype: object\n"
]
}
],
"source": [
"# lstrip, rstrip\n",
"## 左もしくは右、その両方から空文字(改行を含む)を削除する\n",
"data = pd.Series([' hoge ', ' fuga '])\n",
"print('\\n####raw')\n",
"print(data)\n",
"print('\\n####strip')\n",
"print(data.str.strip())\n",
"print('\\n####lstrip')\n",
"print(data.str.lstrip())\n",
"print('\\n####rstrip')\n",
"print(data.str.rstrip())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# パターンマッチや検索を行う"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"####contains\n",
"0 True\n",
"1 True\n",
"2 False\n",
"3 True\n",
"4 True\n",
"Name: Name, dtype: bool\n",
"\n",
"####match\n",
"0 True\n",
"1 True\n",
"2 False\n",
"3 True\n",
"4 True\n",
"Name: Name, dtype: bool\n",
"\n",
"####count\n",
"0 1\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 1\n",
"Name: Name, dtype: int64\n",
"\n",
"####endwith\n",
"0 False\n",
"1 True\n",
"2 False\n",
"3 True\n",
"4 False\n",
"Name: Name, dtype: bool\n",
"\n",
"####findall\n",
"0 [(Owen, Harris)]\n",
"1 [(John, Bradley), (Florence, Briggs)]\n",
"2 []\n",
"3 [(Jacques, Heath), (Lily, May)]\n",
"4 [(William, Henry)]\n",
"Name: Name, dtype: object\n",
"\n",
"####extract\n",
" Last Title Name Name2\n",
"0 Braund Mr Owen Harris \n",
"1 Cumings Mrs John Bradley Florence Briggs Thayer\n",
"2 Heikkinen Miss Laina \n",
"3 Futrelle Mrs Jacques Heath Lily May Peel\n",
"4 Allen Mr William Henry \n",
"\n",
"####find\n",
"0 11\n",
"1 8\n",
"2 10\n",
"3 9\n",
"4 10\n",
"Name: Name, dtype: int64\n",
"\n",
"####rfind\n",
"0 16\n",
"1 43\n",
"2 16\n",
"3 38\n",
"4 18\n",
"Name: Name, dtype: int64\n",
"\n",
"####index\n",
"0 11\n",
"1 8\n",
"2 10\n",
"3 9\n",
"4 10\n",
"Name: Name, dtype: int64\n",
"\n",
"####rindex\n",
"0 16\n",
"1 43\n",
"2 16\n",
"3 38\n",
"4 18\n",
"Name: Name, dtype: int64\n"
]
}
],
"source": [
"# contains\n",
"## パターン/正規表現が含まれているかを返す\n",
"## re.searchで動いてる\n",
"print('\\n####contains')\n",
"print(Names.str.contains('.*Mr.*')) #'Mr.'が入っている場合だけTrue\n",
"\n",
"# match\n",
"## re.matchで動いてる\n",
"## 差が出る例を出せていない()\n",
"print('\\n####match')\n",
"print(Names.str.match('.*Mr.*'))\n",
"\n",
"# count\n",
"## パターンに一致する数を数えて返す\n",
"print('\\n####count')\n",
"print(train_df.head().Name.str.count('.*Mr\\..*')) #'Mr.'が入っている数を数える\n",
"\n",
"# endswith\n",
"## 指定のパターンで終わっているかを返す\n",
"print('\\n####endwith')\n",
"print(Names.str.endswith(')')) # 閉じカッコで終わっているものだけTrue\n",
"\n",
"# findall\n",
"## パターン/正規表現を全て検索してリストを返す。\n",
"print('\\n####findall')\n",
"print(Names.str.findall('([A-z]+)[, |.]([A-z]+)')) #スペース前後の単語を抽出した。抽出要素が複数ある場合はタプルになる\n",
"\n",
"# extract\n",
"## パターン/正規表現の最初に一致する物を抽出してデータフレームを返せる\n",
"print('\\n####extract')\n",
"print(Names.str.extract('(?P<Last>.*),(?P<Title>.*)\\.(?P<Name>[A-z ]*) ?\\(?(?P<Name2>[A-z ]*)?\\)?', expand=True))\n",
"\n",
"# find, rfind\n",
"## 対称の文字列を検索して最も小さいインデックスを返す\n",
"## startとendで何文字目から何文字目までという形で検索範囲を指定できる\n",
"## 見つからないと-1\n",
"print('\\n####find')\n",
"print(Names.str.find(' ', start=8))\n",
"print('\\n####rfind')\n",
"print(Names.str.rfind(' ', start=8))\n",
"\n",
"# index, rindex\n",
"## 対称の文字列を検索して最も小さいインデックスを返す\n",
"## startとendで何文字目から何文字目までという形で検索範囲を指定できる\n",
"## 見つからないとValueErrorになる\n",
"print('\\n####index')\n",
"print(Names.str.index(' ', start=8))\n",
"print('\\n####rindex')\n",
"print(Names.str.rindex(' ', start=8))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 文字列の構成の判定を行う(プリペアドなパターンチェック)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"isalnum\n",
"[True, True, True, True, True, False, True, False, False, False, False]\n",
"isalpha\n",
"[True, False, False, True, False, False, True, False, False, False, False]\n",
"isdigit\n",
"[False, False, True, False, False, False, False, False, False, False, False]\n",
"isspace\n",
"[False, False, False, False, False, True, False, False, False, False, False]\n",
"islower\n",
"[True, True, False, False, False, False, False, False, False, False, True]\n",
"isupper\n",
"[False, False, False, True, False, False, False, False, False, False, False]\n",
"istitle\n",
"[False, False, False, False, True, False, False, True, False, False, False]\n",
"isnumeric\n",
"[False, False, True, False, False, False, False, False, False, False, False]\n",
"isdecimal\n",
"[False, False, True, False, False, False, False, False, False, False, False]\n"
]
}
],
"source": [
"data = pd.Series(['abc', 'ab2', '123', 'ABC', 'Abc30', ' ', 'abC', 'Abc Def', 'abcDef.', '3.14', '10.1e4'])\n",
"\n",
"# isalnum\n",
"## アルファベットと数字かどうかを返す\n",
"print('isalnum')\n",
"print(data.str.isalnum().tolist())\n",
"\n",
"# isalpha\n",
"## アルファベットかどうかを返す\n",
"print('isalpha')\n",
"print(data.str.isalpha().tolist())\n",
"\n",
"# isdigit\n",
"## 数字かどうかを返す\n",
"print('isdigit')\n",
"print(data.str.isdigit().tolist())\n",
"\n",
"# isspace\n",
"## 空文字かどうかを返す\n",
"print('isspace')\n",
"print(data.str.isspace().tolist())\n",
"\n",
"# islower\n",
"## 小文字かどうかを返す\n",
"print('islower')\n",
"print(data.str.islower().tolist())\n",
"\n",
"# isupper\n",
"## 大文字かどうかを返す\n",
"print('isupper')\n",
"print(data.str.isupper().tolist())\n",
"\n",
"# istitle\n",
"## タイトル形式かどうかを返す\n",
"print('istitle')\n",
"print(data.str.istitle().tolist())\n",
"\n",
"# isnumeric\n",
"## 数値かどうかを返す\n",
"print('isnumeric')\n",
"print(data.str.isnumeric().tolist())\n",
"\n",
"# isdecimal\n",
"## 整数値かどうかを返す\n",
"print('isdecimal')\n",
"print(data.str.isdecimal().tolist())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 文字コードを扱う"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"####encode\n",
"0 b'\\x82\\xd9\\x82\\xb0\\x82\\xd9\\x82\\xb0'\n",
"1 b'\\x82\\xd3\\x82\\xaa\\x82\\xd3\\x82\\xaa'\n",
"dtype: object\n",
"\n",
"####decode\n",
"0 ほげほげ\n",
"1 ふがふが\n",
"dtype: object\n",
"\n",
"####raw\n",
"0 ㌔\n",
"1 ㍉\n",
"2 ㍑\n",
"dtype: object\n",
"\n",
"####normalize\n",
"0 キロ\n",
"1 ミリ\n",
"2 リットル\n",
"dtype: object\n"
]
}
],
"source": [
"# encode / decode\n",
"## 指定の文字コードでエンコードやデコードする\n",
"## sjisのCSVとか掴まされた時に使う\n",
"data = pd.Series(['ほげほげ', 'ふがふが'])\n",
"print('\\n####encode')\n",
"print(data.str.encode('sjis'))\n",
"\n",
"sjis_data = data.str.encode('sjis')\n",
"print('\\n####decode')\n",
"print(sjis_data.str.decode('sjis'))\n",
"\n",
"# normalize\n",
"## ユニコード正規形式を返す\n",
"data = pd.Series([u'㌔', u'㍉', u'㍑'])\n",
"print('\\n####raw')\n",
"print(data)\n",
"print('\\n####normalize')\n",
"print(data.str.normalize('NFKC'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 文字列を分離したり抽出したりする"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"slice\n",
"0 Ban,M.Oe ar\n",
"1 Cmns r.Jh rde Foec rgsTae\n",
"2 Hiknn is an\n",
"3 Ftel,Ms aqe et Ll a el\n",
"4 Aln r ila er\n",
"Name: Name, dtype: object\n",
"slice_replace\n",
"0 hogehoge. Owen Harris\n",
"1 hogehogers. John Bradley (Florence Briggs Thayer)\n",
"2 hogehoge Miss. Laina\n",
"3 hogehogeMrs. Jacques Heath (Lily May Peel)\n",
"4 hogehoge William Henry\n",
"Name: Name, dtype: object\n",
"partition\n",
" 0 1 2\n",
"0 A _ B_C\n",
"1 D _ E_F\n",
"rpartition\n",
" 0 1 2\n",
"0 A_B _ C\n",
"1 D_E _ F\n",
"split\n",
" 0 1\n",
"0 Braund Mr. Owen Harris\n",
"1 Cumings Mrs. John Bradley (Florence Briggs Thayer)\n",
"2 Heikkinen Miss. Laina\n",
"3 Futrelle Mrs. Jacques Heath (Lily May Peel)\n",
"4 Allen Mr. William Henry\n"
]
}
],
"source": [
"# slice\n",
"## 指定したインデックスを抽出する(開始点と終点とステップ)\n",
"print('slice')\n",
"print(Names.str.slice(0,-1, 2))\n",
"\n",
"# slice_replace\n",
"## 指定した範囲を文字列で置換するする\n",
"print('slice_replace')\n",
"print(Names.str.slice_replace(0,10,'hogehoge'))\n",
"\n",
"# partition\n",
"## 指定の文字が最初に出てきたところの前後で分離する\n",
"data = pd.Series(['A_B_C', 'D_E_F'])\n",
"print('partition')\n",
"print(data.str.partition('_'))\n",
"\n",
"# rpartition\n",
"## 指定の文字が最後に出てきたところの前後で分離する\n",
"print('rpartition')\n",
"print(data.str.rpartition('_'))\n",
"\n",
"# split\n",
"## 指定のパターンで文字列を分割する\n",
"## expandをTrueにするとデータフレームとして帰ってくる\n",
"print('split')\n",
"print(Names.str.split(',', expand=True))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# その他諸々"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 23\n",
"1 51\n",
"2 22\n",
"3 44\n",
"4 24\n",
"Name: Name, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# len\n",
"## 各文字列の長さを返す\n",
"Names.str.len()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" a b c\n",
"0 1 1 0\n",
"1 1 0 0\n",
"2 1 0 1\n",
" a b c\n",
"0 1 1 0\n",
"1 0 0 0\n",
"2 1 0 1\n",
" a b c\n",
"0 1 1 0\n",
"1 1 0 1\n",
"2 1 0 1\n"
]
}
],
"source": [
"# get_dummies\n",
"## 区切りのある文字列のリストをダミーデータフレームにして返す\n",
"## 順番とかは気にならないらしい。\n",
"print(pd.Series(['a|b', 'a', 'a|c']).str.get_dummies())\n",
"print(pd.Series(['b|a', '', 'a|c']).str.get_dummies())\n",
"print(pd.Series(['b|a', 'c|a', 'a|c']).str.get_dummies())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 O\n",
"1 .\n",
"2 i\n",
"3 s\n",
"4 i\n",
"Name: Name, dtype: object"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get\n",
"## 指定のインデックスの文字を抽出する\n",
"Names.str.get(12)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"wrap\n",
"0 Braun\\nd,\\nMr.\\nOwen \\nHarri\\ns\n",
"1 Cumin\\ngs,\\nMrs.\\nJohn \\nBradl\\ney (F\\nloren\\n...\n",
"2 Heikk\\ninen,\\nMiss.\\nLaina\n",
"3 Futre\\nlle,\\nMrs. \\nJacqu\\nes\\nHeath\\n(Lily\\nM...\n",
"4 Allen\\n, Mr.\\nWilli\\nam\\nHenry\n",
"Name: Name, dtype: object\n"
]
}
],
"source": [
"# wrap\n",
"## 長い文章を折り返したり、長い単語をハイフンで引っ張って次の行に行ったりする\n",
"## 良いサンプルが無いのでNamesデータでいく\n",
"print('wrap')\n",
"print(Names.str.wrap(5))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Braund, Mr. Owen HarrisBraund, Mr. Owen Harris...\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n",
"2 Heikkinen, Miss. LainaHeikkinen, Miss. LainaHe...\n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel)Fu...\n",
"4 Allen, Mr. William HenryAllen, Mr. William Hen...\n",
"Name: Name, dtype: object"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# repeat\n",
"## 各文字列を指定回数リピートする\n",
"Names.str.repeat(3)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment