Created
April 16, 2017 08:27
-
-
Save ryoppippi/9b352601a68416d0455d8e9c24d36014 to your computer and use it in GitHub Desktop.
自然言語処理100本ノック10-19 http://www.cl.ecei.tohoku.ac.jp/nlp100/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%%bash\nwc hightemp.txt", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 24 98 813 hightemp.txt\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%%bash\nsed 's/\\t/ /g' hightemp.txt ", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "高知県\t江川崎\t41\t2013-08-12\n埼玉県\t熊谷\t40.9\t2007-08-16\n岐阜県\t多治見\t40.9\t2007-08-16\n山形県\t山形\t40.8\t1933-07-25\n山梨県\t甲府\t40.7\t2013-08-10\n和歌山県\tかつらぎ\t40.6\t1994-08-08\n静岡県\t天竜\t40.6\t1994-08-04\n山梨県\t勝沼\t40.5\t2013-08-10\n埼玉県\t越谷\t40.4\t2007-08-16\n群馬県\t館林\t40.3\t2007-08-16\n群馬県\t上里見\t40.3\t1998-07-04\n愛知県\t愛西\t40.3\t1994-08-05\n千葉県\t牛久\t40.2\t2004-07-20\n静岡県\t佐久間\t40.2\t2001-07-24\n愛媛県\t宇和島\t40.2\t1927-07-22\n山形県\t酒田\t40.1\t1978-08-03\n岐阜県\t美濃\t40\t2007-08-16\n群馬県\t前橋\t40\t2001-07-24\n千葉県\t茂原\t39.9\t2013-08-11\n埼玉県\t鳩山\t39.9\t1997-07-05\n大阪府\t豊中\t39.9\t1994-08-08\n山梨県\t大月\t39.9\t1990-07-19\n山形県\t鶴岡\t39.9\t1978-08-03\n愛知県\t名古屋\t39.9\t1942-08-02\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%%bash\ncut -f 1 hightemp.txt > col1.txt\ncut -f 2 hightemp.txt > col2.txt\necho \"col1.txt\"\ncat col1.txt\necho \"col2.txt\"\ncat col2.txt", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "col1.txt\n高知県\n埼玉県\n岐阜県\n山形県\n山梨県\n和歌山県\n静岡県\n山梨県\n埼玉県\n群馬県\n群馬県\n愛知県\n千葉県\n静岡県\n愛媛県\n山形県\n岐阜県\n群馬県\n千葉県\n埼玉県\n大阪府\n山梨県\n山形県\n愛知県\ncol2.txt\n江川崎\n熊谷\n多治見\n山形\n甲府\nかつらぎ\n天竜\n勝沼\n越谷\n館林\n上里見\n愛西\n牛久\n佐久間\n宇和島\n酒田\n美濃\n前橋\n茂原\n鳩山\n豊中\n大月\n鶴岡\n名古屋\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%%bash\npaste col1.txt col2.txt > colmerge.txt\ncat colmerge.txt", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "高知県\t江川崎\n埼玉県\t熊谷\n岐阜県\t多治見\n山形県\t山形\n山梨県\t甲府\n和歌山県\tかつらぎ\n静岡県\t天竜\n山梨県\t勝沼\n埼玉県\t越谷\n群馬県\t館林\n群馬県\t上里見\n愛知県\t愛西\n千葉県\t牛久\n静岡県\t佐久間\n愛媛県\t宇和島\n山形県\t酒田\n岐阜県\t美濃\n群馬県\t前橋\n千葉県\t茂原\n埼玉県\t鳩山\n大阪府\t豊中\n山梨県\t大月\n山形県\t鶴岡\n愛知県\t名古屋\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%%bash\nhead -n 5 hightemp.txt", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "高知県\t江川崎\t41\t2013-08-12\n埼玉県\t熊谷\t40.9\t2007-08-16\n岐阜県\t多治見\t40.9\t2007-08-16\n山形県\t山形\t40.8\t1933-07-25\n山梨県\t甲府\t40.7\t2013-08-10\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%%bash\ntail -n 5 hightemp.txt", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "埼玉県\t鳩山\t39.9\t1997-07-05\n大阪府\t豊中\t39.9\t1994-08-08\n山梨県\t大月\t39.9\t1990-07-19\n山形県\t鶴岡\t39.9\t1978-08-03\n愛知県\t名古屋\t39.9\t1942-08-02\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%%bash\nsplit -l hightemp.txt\ncat xaa\ncat xab\ncat xac\ncat xad\ncat xae", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "高知県\t江川崎\t41\t2013-08-12\n埼玉県\t熊谷\t40.9\t2007-08-16\n岐阜県\t多治見\t40.9\t2007-08-16\n山形県\t山形\t40.8\t1933-07-25\n山梨県\t甲府\t40.7\t2013-08-10\n和歌山県\tかつらぎ\t40.6\t1994-08-08\n静岡県\t天竜\t40.6\t1994-08-04\n山梨県\t勝沼\t40.5\t2013-08-10\n埼玉県\t越谷\t40.4\t2007-08-16\n群馬県\t館林\t40.3\t2007-08-16\n群馬県\t上里見\t40.3\t1998-07-04\n愛知県\t愛西\t40.3\t1994-08-05\n千葉県\t牛久\t40.2\t2004-07-20\n静岡県\t佐久間\t40.2\t2001-07-24\n愛媛県\t宇和島\t40.2\t1927-07-22\n山形県\t酒田\t40.1\t1978-08-03\n岐阜県\t美濃\t40\t2007-08-16\n群馬県\t前橋\t40\t2001-07-24\n千葉県\t茂原\t39.9\t2013-08-11\n埼玉県\t鳩山\t39.9\t1997-07-05\n大阪府\t豊中\t39.9\t1994-08-08\n山梨県\t大月\t39.9\t1990-07-19\n山形県\t鶴岡\t39.9\t1978-08-03\n愛知県\t名古屋\t39.9\t1942-08-02\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": "split: hightemp.txt: illegal line count\n", | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "%%bash\ncut -f 1 hightemp.txt | sort | uniq", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "千葉県\n埼玉県\n大阪府\n山形県\n山梨県\n岐阜県\n愛媛県\n愛知県\n群馬県\n静岡県\n高知県\n和歌山県\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "%%bash\nsort -k3r hightemp.txt", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "高知県\t江川崎\t41\t2013-08-12\n埼玉県\t熊谷\t40.9\t2007-08-16\n岐阜県\t多治見\t40.9\t2007-08-16\n山形県\t山形\t40.8\t1933-07-25\n山梨県\t甲府\t40.7\t2013-08-10\n和歌山県\tかつらぎ\t40.6\t1994-08-08\n静岡県\t天竜\t40.6\t1994-08-04\n山梨県\t勝沼\t40.5\t2013-08-10\n埼玉県\t越谷\t40.4\t2007-08-16\n群馬県\t館林\t40.3\t2007-08-16\n群馬県\t上里見\t40.3\t1998-07-04\n愛知県\t愛西\t40.3\t1994-08-05\n千葉県\t牛久\t40.2\t2004-07-20\n静岡県\t佐久間\t40.2\t2001-07-24\n愛媛県\t宇和島\t40.2\t1927-07-22\n山形県\t酒田\t40.1\t1978-08-03\n岐阜県\t美濃\t40\t2007-08-16\n群馬県\t前橋\t40\t2001-07-24\n千葉県\t茂原\t39.9\t2013-08-11\n埼玉県\t鳩山\t39.9\t1997-07-05\n大阪府\t豊中\t39.9\t1994-08-08\n山梨県\t大月\t39.9\t1990-07-19\n山形県\t鶴岡\t39.9\t1978-08-03\n愛知県\t名古屋\t39.9\t1942-08-02\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "%%bash\ncut -f 1 hightemp.txt | sort | uniq -c | sort -r", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 3 群馬県\n 3 山梨県\n 3 山形県\n 3 埼玉県\n 2 静岡県\n 2 愛知県\n 2 岐阜県\n 2 千葉県\n 1 和歌山県\n 1 高知県\n 1 愛媛県\n 1 大阪府\n", | |
"name": "stdout" | |
} | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.6.0", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"toc": { | |
"threshold": 4, | |
"number_sections": true, | |
"toc_cell": false, | |
"toc_window_display": false, | |
"toc_section_display": "block", | |
"sideBar": true, | |
"navigate_menu": true, | |
"moveMenuLeft": true, | |
"widenNotebook": false, | |
"colors": { | |
"hover_highlight": "#DAA520", | |
"selected_highlight": "#FFD700", | |
"running_highlight": "#FF0000" | |
}, | |
"nav_menu": { | |
"height": "12px", | |
"width": "252px" | |
} | |
}, | |
"varInspector": { | |
"window_display": true, | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"library": "var_list.py", | |
"delete_cmd_prefix": "del ", | |
"delete_cmd_postfix": "", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"library": "var_list.r", | |
"delete_cmd_prefix": "rm(", | |
"delete_cmd_postfix": ") ", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
] | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "自然言語処理100本ノック10-19 http://www.cl.ecei.tohoku.ac.jp/nlp100/", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment