Skip to content

Instantly share code, notes, and snippets.

@ryoppippi
Last active April 16, 2017 07:53
Show Gist options
  • Save ryoppippi/5c60324b7f26cbf516f6c2c6fcac033e to your computer and use it in GitHub Desktop.
Save ryoppippi/5c60324b7f26cbf516f6c2c6fcac033e to your computer and use it in GitHub Desktop.
自然言語処理100本ノック00-09 http://www.cl.ecei.tohoku.ac.jp/nlp100/
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "text0 = 'stressed'\nprint(text0[::-1])",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "text1 = 'パタトクカシーー'\nprint(text1[::2])",
"execution_count": 20,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "パトカー\n"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "text2_1 = 'パトカー'\ntext2_2 = 'タクシー'\nans2 =''\nfor (a, b) in zip(text2_1, text2_2):\n ans2 += a\n ans2 += b\nprint(ans2)",
"execution_count": 3,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "パタトクカシーー\n"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "text3 = \"Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics.\"\ntext3 = text3.replace('.', \"\")\ntext3 = text3.replace(',', \"\")\ntext3 = text3.split()\ncount = []\nfor str in text3:\n count.append(len(str))\nprint(count)",
"execution_count": 4,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "[3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9]\n"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "text4 = \"Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can.\"\ntext4 = text4.split()\nindex1 = [1, 5, 6, 7, 8, 9, 15, 16, 19]\ndict = {}\nfor element in text4:\n if text4.index(element) + 1 in index1:\n dict[element[:1]] = text4.index(element) + 1\n else:\n dict[element[:2]] = text4.index(element) + 1\nprint(dict)",
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "{'H': 1, 'He': 2, 'Li': 3, 'Be': 4, 'B': 5, 'C': 6, 'N': 7, 'O': 8, 'F': 9, 'Ne': 10, 'Na': 11, 'Mi': 12, 'Al': 13, 'Si': 14, 'P': 15, 'S': 16, 'Cl': 17, 'Ar': 18, 'K': 19, 'Ca': 20}\n"
}
]
},
{
"metadata": {
"collapsed": false,
"scrolled": false,
"trusted": true
},
"cell_type": "code",
"source": "st = \"I am an NLPer\"\ndef ngram(input, n):\n ret = []\n for i in range(len(input)-n+1):\n ret.append(input[i:i+n])\n return ret\nprint(ngram(st,2))\nst = st.split()\nprint(ngram(st,2))",
"execution_count": 6,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "['I ', ' a', 'am', 'm ', ' a', 'an', 'n ', ' N', 'NL', 'LP', 'Pe', 'er']\n[['I', 'am'], ['am', 'an'], ['an', 'NLPer']]\n"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "text6_1 = \"paraparaparadise\"\ntext6_2 = \"paragraph\"\nX = set(ngram(text6_1,2))\nY = set(ngram(text6_2,2))\n\nprint(X|Y)\nprint(X&Y)\nprint(X-Y)\n\nprint(\"se\" in X)\nprint(\"se\" in Y)",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "{'ra', 'se', 'ag', 'gr', 'ad', 'ap', 'di', 'is', 'ar', 'pa', 'ph'}\n{'pa', 'ar', 'ra', 'ap'}\n{'di', 'se', 'ad', 'is'}\nTrue\nFalse\n"
}
]
},
{
"metadata": {
"code_folding": [],
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "x = 12\ny = u'気温'\nz = 22.4\n\ndef function(x, y, z):\n return repr(x) + '時の' + repr(y) + u'は' + repr(z)\n\nprint(function(x, y, z))",
"execution_count": 8,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "12時の'気温'は22.4\n"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "str = \"Atbash is a simple substitution cipher for the Hebrew alphabet.\"\n\ndef cipher(input):\n ret = \"\"\n for char in input:\n ret += chr(219-ord(char)) if char.islower() else char\n return ret\n\nstr = cipher(str)\nprint(str)\nstr = cipher(str)\nprint(str)\n",
"execution_count": 9,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "Agyzhs rh z hrnkov hfyhgrgfgrlm xrksvi uli gsv Hvyivd zokszyvg.\nAtbash is a simple substitution cipher for the Hebrew alphabet.\n"
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "import random\n\n\ndef word_typoglycemia(word):\n if len(word) <= 4:\n return word\n\n mid_list = list(word[1:-1])\n while mid_list == list(word[1:-1]):\n random.shuffle(mid_list)\n return word[0] + \"\".join(mid_list) + word[-1]\n\n\ndef str_typoglycemia(str):\n shuffled_list = []\n for word in str.split():\n shuffled_list.append(word_typoglycemia(word))\n return \" \".join(shuffled_list)\n\n\nstr = \"I couldn't believe that I could actually understand \\\n what I was reading : the phenomenal power of the human mind .\"\n\nprint(str_typoglycemia(str))\n",
"execution_count": 10,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "I clu'ondt beilvee that I cloud acullaty urtnandesd what I was rndeiag : the paneohmnel poewr of the huamn mind .\n"
}
]
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/5c60324b7f26cbf516f6c2c6fcac033e"
},
"gist": {
"id": "5c60324b7f26cbf516f6c2c6fcac033e",
"data": {
"description": "自然言語処理100本ノック00-09 http://www.cl.ecei.tohoku.ac.jp/nlp100/",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.0",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"toc": {
"threshold": 4,
"number_sections": true,
"toc_cell": false,
"toc_window_display": false,
"toc_section_display": "block",
"sideBar": true,
"navigate_menu": true,
"moveMenuLeft": true,
"widenNotebook": false,
"colors": {
"hover_highlight": "#DAA520",
"selected_highlight": "#FFD700",
"running_highlight": "#FF0000"
},
"nav_menu": {
"height": "12px",
"width": "252px"
}
},
"varInspector": {
"window_display": true,
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"library": "var_list.py",
"delete_cmd_prefix": "del ",
"delete_cmd_postfix": "",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"library": "var_list.r",
"delete_cmd_prefix": "rm(",
"delete_cmd_postfix": ") ",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"oldHeight": 122,
"position": {
"height": "144px",
"left": "auto",
"right": "20px",
"top": "120px",
"width": "350px"
},
"varInspector_section_display": "block"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment