Skip to content

Instantly share code, notes, and snippets.

@ryoppippi
Created April 16, 2017 08:27
Show Gist options
  • Save ryoppippi/9b352601a68416d0455d8e9c24d36014 to your computer and use it in GitHub Desktop.
Save ryoppippi/9b352601a68416d0455d8e9c24d36014 to your computer and use it in GitHub Desktop.
自然言語処理100本ノック10-19 http://www.cl.ecei.tohoku.ac.jp/nlp100/
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "%%bash\nwc hightemp.txt",
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": " 24 98 813 hightemp.txt\n",
"name": "stdout"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "%%bash\nsed 's/\\t/ /g' hightemp.txt ",
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": "高知県\t江川崎\t41\t2013-08-12\n埼玉県\t熊谷\t40.9\t2007-08-16\n岐阜県\t多治見\t40.9\t2007-08-16\n山形県\t山形\t40.8\t1933-07-25\n山梨県\t甲府\t40.7\t2013-08-10\n和歌山県\tかつらぎ\t40.6\t1994-08-08\n静岡県\t天竜\t40.6\t1994-08-04\n山梨県\t勝沼\t40.5\t2013-08-10\n埼玉県\t越谷\t40.4\t2007-08-16\n群馬県\t館林\t40.3\t2007-08-16\n群馬県\t上里見\t40.3\t1998-07-04\n愛知県\t愛西\t40.3\t1994-08-05\n千葉県\t牛久\t40.2\t2004-07-20\n静岡県\t佐久間\t40.2\t2001-07-24\n愛媛県\t宇和島\t40.2\t1927-07-22\n山形県\t酒田\t40.1\t1978-08-03\n岐阜県\t美濃\t40\t2007-08-16\n群馬県\t前橋\t40\t2001-07-24\n千葉県\t茂原\t39.9\t2013-08-11\n埼玉県\t鳩山\t39.9\t1997-07-05\n大阪府\t豊中\t39.9\t1994-08-08\n山梨県\t大月\t39.9\t1990-07-19\n山形県\t鶴岡\t39.9\t1978-08-03\n愛知県\t名古屋\t39.9\t1942-08-02\n",
"name": "stdout"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "%%bash\ncut -f 1 hightemp.txt > col1.txt\ncut -f 2 hightemp.txt > col2.txt\necho \"col1.txt\"\ncat col1.txt\necho \"col2.txt\"\ncat col2.txt",
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": "col1.txt\n高知県\n埼玉県\n岐阜県\n山形県\n山梨県\n和歌山県\n静岡県\n山梨県\n埼玉県\n群馬県\n群馬県\n愛知県\n千葉県\n静岡県\n愛媛県\n山形県\n岐阜県\n群馬県\n千葉県\n埼玉県\n大阪府\n山梨県\n山形県\n愛知県\ncol2.txt\n江川崎\n熊谷\n多治見\n山形\n甲府\nかつらぎ\n天竜\n勝沼\n越谷\n館林\n上里見\n愛西\n牛久\n佐久間\n宇和島\n酒田\n美濃\n前橋\n茂原\n鳩山\n豊中\n大月\n鶴岡\n名古屋\n",
"name": "stdout"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "%%bash\npaste col1.txt col2.txt > colmerge.txt\ncat colmerge.txt",
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": "高知県\t江川崎\n埼玉県\t熊谷\n岐阜県\t多治見\n山形県\t山形\n山梨県\t甲府\n和歌山県\tかつらぎ\n静岡県\t天竜\n山梨県\t勝沼\n埼玉県\t越谷\n群馬県\t館林\n群馬県\t上里見\n愛知県\t愛西\n千葉県\t牛久\n静岡県\t佐久間\n愛媛県\t宇和島\n山形県\t酒田\n岐阜県\t美濃\n群馬県\t前橋\n千葉県\t茂原\n埼玉県\t鳩山\n大阪府\t豊中\n山梨県\t大月\n山形県\t鶴岡\n愛知県\t名古屋\n",
"name": "stdout"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "%%bash\nhead -n 5 hightemp.txt",
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"text": "高知県\t江川崎\t41\t2013-08-12\n埼玉県\t熊谷\t40.9\t2007-08-16\n岐阜県\t多治見\t40.9\t2007-08-16\n山形県\t山形\t40.8\t1933-07-25\n山梨県\t甲府\t40.7\t2013-08-10\n",
"name": "stdout"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "%%bash\ntail -n 5 hightemp.txt",
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": "埼玉県\t鳩山\t39.9\t1997-07-05\n大阪府\t豊中\t39.9\t1994-08-08\n山梨県\t大月\t39.9\t1990-07-19\n山形県\t鶴岡\t39.9\t1978-08-03\n愛知県\t名古屋\t39.9\t1942-08-02\n",
"name": "stdout"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "%%bash\nsplit -l hightemp.txt\ncat xaa\ncat xab\ncat xac\ncat xad\ncat xae",
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": "高知県\t江川崎\t41\t2013-08-12\n埼玉県\t熊谷\t40.9\t2007-08-16\n岐阜県\t多治見\t40.9\t2007-08-16\n山形県\t山形\t40.8\t1933-07-25\n山梨県\t甲府\t40.7\t2013-08-10\n和歌山県\tかつらぎ\t40.6\t1994-08-08\n静岡県\t天竜\t40.6\t1994-08-04\n山梨県\t勝沼\t40.5\t2013-08-10\n埼玉県\t越谷\t40.4\t2007-08-16\n群馬県\t館林\t40.3\t2007-08-16\n群馬県\t上里見\t40.3\t1998-07-04\n愛知県\t愛西\t40.3\t1994-08-05\n千葉県\t牛久\t40.2\t2004-07-20\n静岡県\t佐久間\t40.2\t2001-07-24\n愛媛県\t宇和島\t40.2\t1927-07-22\n山形県\t酒田\t40.1\t1978-08-03\n岐阜県\t美濃\t40\t2007-08-16\n群馬県\t前橋\t40\t2001-07-24\n千葉県\t茂原\t39.9\t2013-08-11\n埼玉県\t鳩山\t39.9\t1997-07-05\n大阪府\t豊中\t39.9\t1994-08-08\n山梨県\t大月\t39.9\t1990-07-19\n山形県\t鶴岡\t39.9\t1978-08-03\n愛知県\t名古屋\t39.9\t1942-08-02\n",
"name": "stdout"
},
{
"output_type": "stream",
"text": "split: hightemp.txt: illegal line count\n",
"name": "stderr"
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "%%bash\ncut -f 1 hightemp.txt | sort | uniq",
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"text": "千葉県\n埼玉県\n大阪府\n山形県\n山梨県\n岐阜県\n愛媛県\n愛知県\n群馬県\n静岡県\n高知県\n和歌山県\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "%%bash\nsort -k3r hightemp.txt",
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": "高知県\t江川崎\t41\t2013-08-12\n埼玉県\t熊谷\t40.9\t2007-08-16\n岐阜県\t多治見\t40.9\t2007-08-16\n山形県\t山形\t40.8\t1933-07-25\n山梨県\t甲府\t40.7\t2013-08-10\n和歌山県\tかつらぎ\t40.6\t1994-08-08\n静岡県\t天竜\t40.6\t1994-08-04\n山梨県\t勝沼\t40.5\t2013-08-10\n埼玉県\t越谷\t40.4\t2007-08-16\n群馬県\t館林\t40.3\t2007-08-16\n群馬県\t上里見\t40.3\t1998-07-04\n愛知県\t愛西\t40.3\t1994-08-05\n千葉県\t牛久\t40.2\t2004-07-20\n静岡県\t佐久間\t40.2\t2001-07-24\n愛媛県\t宇和島\t40.2\t1927-07-22\n山形県\t酒田\t40.1\t1978-08-03\n岐阜県\t美濃\t40\t2007-08-16\n群馬県\t前橋\t40\t2001-07-24\n千葉県\t茂原\t39.9\t2013-08-11\n埼玉県\t鳩山\t39.9\t1997-07-05\n大阪府\t豊中\t39.9\t1994-08-08\n山梨県\t大月\t39.9\t1990-07-19\n山形県\t鶴岡\t39.9\t1978-08-03\n愛知県\t名古屋\t39.9\t1942-08-02\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "%%bash\ncut -f 1 hightemp.txt | sort | uniq -c | sort -r",
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"text": " 3 群馬県\n 3 山梨県\n 3 山形県\n 3 埼玉県\n 2 静岡県\n 2 愛知県\n 2 岐阜県\n 2 千葉県\n 1 和歌山県\n 1 高知県\n 1 愛媛県\n 1 大阪府\n",
"name": "stdout"
}
]
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.0",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"toc": {
"threshold": 4,
"number_sections": true,
"toc_cell": false,
"toc_window_display": false,
"toc_section_display": "block",
"sideBar": true,
"navigate_menu": true,
"moveMenuLeft": true,
"widenNotebook": false,
"colors": {
"hover_highlight": "#DAA520",
"selected_highlight": "#FFD700",
"running_highlight": "#FF0000"
},
"nav_menu": {
"height": "12px",
"width": "252px"
}
},
"varInspector": {
"window_display": true,
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"library": "var_list.py",
"delete_cmd_prefix": "del ",
"delete_cmd_postfix": "",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"library": "var_list.r",
"delete_cmd_prefix": "rm(",
"delete_cmd_postfix": ") ",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
]
},
"gist": {
"id": "",
"data": {
"description": "自然言語処理100本ノック10-19 http://www.cl.ecei.tohoku.ac.jp/nlp100/",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment