Skip to content

Instantly share code, notes, and snippets.

@astynax
Created August 19, 2013 05:45
Show Gist options
  • Save astynax/6266033 to your computer and use it in GitHub Desktop.
Save astynax/6266033 to your computer and use it in GitHub Desktop.
Notebook для м/к по регуляркам
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# \u0420\u0435\u0433\u0443\u043b\u044f\u0440\u043d\u044b\u0435 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u044f\n",
"## \u0418\u0442\u0430\u043a:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\u0420\u0435\u0433\u0443\u043b\u044f\u0440\u043a\u0438 \u0441\u043e\u0441\u0442\u043e\u044f\u0442 \u0438\u0437\n",
"\n",
"- *\u043b\u044e\u0431\u043e\u0433\u043e \u0441\u0438\u043c\u0432\u043e\u043b\u0430*\n",
"- \u043a\u043b\u0430\u0441\u0441\u043e\u0432 \u0441\u0438\u043c\u0432\u043e\u043b\u043e\u0432\n",
"- \u043a\u0432\u0430\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u043e\u0432\n",
"- *\u044f\u043a\u043e\u0440\u0435\u0439*\n",
"- \u0441\u0438\u043c\u0432\u043e\u043b\u043e\u0432 \u0433\u0440\u0443\u043f\u043f\u0438\u0440\u043e\u0432\u043a\u0438\n",
"- \u043f\u0435\u0440\u0435\u0447\u0438\u0441\u043b\u0435\u043d\u0438\u0439\n",
"- \u043a\u043e\u043c\u043c\u0435\u043d\u0442\u0430\u0440\u0438\u0435\u0432\n",
"- \u043c\u043e\u0434\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u043e\u0432\n",
"- *\u0437\u0430\u0433\u043b\u044f\u0434\u044b\u0432\u0430\u043d\u0438\u0439* \u0432\u043f\u0435\u0440\u0435\u0434/\u043d\u0430\u0437\u0430\u0434"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u0421\u0438\u043c\u0432\u043e\u043b\u044b: \u043b\u044e\u0431\u043e\u0439 \u0438 \u043a\u043b\u0430\u0441\u0441\u044b"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def m(r, s):\n",
" match = re.match(r, s)\n",
" if match:\n",
" return match.group()\n",
" return \"Nope!\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# . \\d \\D \\s \\S \\w \\W [^...-...]\n",
"l = ['a', 'B', ' ', '\\t', '1', '0']\n",
"def select(r, xs):\n",
" return [m.group() for m in (re.match(r, x) for x in xs) if m]\n",
"print select('\\d', l), '\u0446\u0438\u0444\u0440\u044b'\n",
"print select('\\D', l), '\u041d\u0415 \u0446\u0438\u0444\u0440\u044b'\n",
"print select('\\s', l), '\u043f\u0440\u043e\u0431\u0435\u043b\u044c\u043d\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b'\n",
"print select('\\S', l), '\u041d\u0415 \u043f\u0440\u043e\u0431\u0435\u043b\u044c\u043d\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b'\n",
"print select('\\w', l), '\u0431\u0443\u043a\u0432\u0435\u043d\u043d\u043e-\u0446\u0438\u0444\u0440\u043e\u0432\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b'\n",
"print select('\\W', l), '\u041d\u0415 \u0431\u0443\u043a\u0432\u0435\u043d\u043d\u043e-\u0446\u0438\u0444\u0440\u043e\u0432\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"['1', '0'] \u0446\u0438\u0444\u0440\u044b\n",
"['a', 'B', ' ', '\\t'] \u041d\u0415 \u0446\u0438\u0444\u0440\u044b\n",
"[' ', '\\t'] \u043f\u0440\u043e\u0431\u0435\u043b\u044c\u043d\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b\n",
"['a', 'B', '1', '0'] \u041d\u0415 \u043f\u0440\u043e\u0431\u0435\u043b\u044c\u043d\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b\n",
"['a', 'B', '1', '0'] \u0431\u0443\u043a\u0432\u0435\u043d\u043d\u043e-\u0446\u0438\u0444\u0440\u043e\u0432\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b\n",
"[' ', '\\t'] \u041d\u0415 \u0431\u0443\u043a\u0432\u0435\u043d\u043d\u043e-\u0446\u0438\u0444\u0440\u043e\u0432\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u041a\u0432\u0430\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u044b, \u0436\u0430\u0434\u043d\u044b\u0435 \u0438 \u043d\u0435 \u043e\u0447\u0435\u043d\u044c"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# ? + * {}\n",
"l = ['aaa', 'aab', 'abb', 'bbb']\n",
"print select(r'a*', l), \"\u0431\u0435\u0440\u0443 \u043b\u044e\u0431\u043e\u0435 \u043a\u043e\u043b-\u0432\u043e \\\"a\\\", \u0434\u0430\u0436\u0435 \u043d\u0443\u043b\u0435\u0432\u043e\u0435!\"\n",
"print select(r'a+', l), \"\u043b\u044e\u0431\u043e\u0435 \u043d\u0435\u043d\u0443\u043b\u0435\u0432\u043e\u0435 \u043a\u043e\u043b-\u0432\u043e, \u0431\u0435\u0440\u0443 \u0432\u0441\u0451!\"\n",
"print select(r'a*?', l), \"\u0445\u043e\u0447\u0443 0+, \u0431\u0435\u0440\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u0442.\u0435. \u043d\u0435 \u0432\u043e\u0437\u044c\u043c\u0443 \u043d\u0438\u0447\u0435\u0433\u043e!)\"\n",
"print select(r'a+?', l), \"\u0445\u043e\u0447\u0443 1+, \u0431\u0435\u0440\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u0442.\u0435. \u043e\u0434\u043d\u0443 \u0448\u0442\u0443\u043a\u0443)\"\n",
"print select(r'a?', l), \"\u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u043e\u0434\u043d\u0443 \u0438\u043b\u0438 \u043d\u0438\u0447\u0435\u0433\u043e!\"\n",
"print select(r'a{,2}', l), \"\u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u0434\u043e \u0434\u0432\u0443\u0445 \u0448\u0442\u0443\u043a!\"\n",
"print select(r'a{1,2}?', l), \"\u0445\u043e\u0447\u0443 \u043e\u0434\u043d\u0443-\u0434\u0432\u0435 \u0448\u0442\u0443\u043a\u0438, \u0432\u043e\u0437\u044c\u043c\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u043e\u0434\u043d\u0443)!\"\n",
"print select(r'a{2}', l), \"\u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u0440\u043e\u0432\u043d\u043e \u0434\u0432\u0435!\""
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"['aaa', 'aa', 'a', ''] \u0431\u0435\u0440\u0443 \u043b\u044e\u0431\u043e\u0435 \u043a\u043e\u043b-\u0432\u043e \"a\", \u0434\u0430\u0436\u0435 \u043d\u0443\u043b\u0435\u0432\u043e\u0435!\n",
"['aaa', 'aa', 'a'] \u043b\u044e\u0431\u043e\u0435 \u043d\u0435\u043d\u0443\u043b\u0435\u0432\u043e\u0435 \u043a\u043e\u043b-\u0432\u043e, \u0431\u0435\u0440\u0443 \u0432\u0441\u0451!\n",
"['', '', '', ''] \u0445\u043e\u0447\u0443 0+, \u0431\u0435\u0440\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u0442.\u0435. \u043d\u0435 \u0432\u043e\u0437\u044c\u043c\u0443 \u043d\u0438\u0447\u0435\u0433\u043e!)\n",
"['a', 'a', 'a'] \u0445\u043e\u0447\u0443 1+, \u0431\u0435\u0440\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u0442.\u0435. \u043e\u0434\u043d\u0443 \u0448\u0442\u0443\u043a\u0443)\n",
"['a', 'a', 'a', ''] \u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u043e\u0434\u043d\u0443 \u0438\u043b\u0438 \u043d\u0438\u0447\u0435\u0433\u043e!\n",
"['aa', 'aa', 'a', ''] \u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u0434\u043e \u0434\u0432\u0443\u0445 \u0448\u0442\u0443\u043a!\n",
"['a', 'a', 'a'] \u0445\u043e\u0447\u0443 \u043e\u0434\u043d\u0443-\u0434\u0432\u0435 \u0448\u0442\u0443\u043a\u0438, \u0432\u043e\u0437\u044c\u043c\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u043e\u0434\u043d\u0443)!\n",
"['aa', 'aa'] \u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u0440\u043e\u0432\u043d\u043e \u0434\u0432\u0435!\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u042f\u043a\u043e\u0440\u044f"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# ^ $ \\b \\B\n",
"s = 'aa,a ab aa aaa'\n",
"print re.findall(r'a+', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u043f\u043e \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0435, \u043d\u043e \u043c\u043e\u0433\u0443\u0442 \u0431\u044b\u0442\u044c \u043d\u0435\u0432\u0435\u0440\u043d\u044b\u0435 \u0432\u044b\u0431\u043e\u0440\u043a\u0438!\"\n",
"print re.findall(r'\\Wa+\\W', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u0441 \u043d\u0435\u0431\u0443\u043a\u0432\u0430\u043c\u0438 \u043f\u043e \u0431\u043e\u043a\u0430\u043c \u0438 \u0431\u0435\u0440\u0443 \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u043d\u0435\u0431\u0443\u043a\u0432\u0430\u043c\u0438\"\n",
"print re.findall(r'\\sa+\\s', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u0441 \u043f\u0440\u043e\u0431\u0435\u043b\u0430\u043c\u0438 \u043f\u043e \u0431\u043e\u043a\u0430\u043c \u0438 \u0431\u0435\u0440\u0443 \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u043f\u0440\u043e\u0431\u0435\u043b\u0430\u043c\u0438\"\n",
"print re.findall(r'\\ba+\\b', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u043f\u043e \u0433\u0440\u0430\u043d\u0438\u0446\u0430\u043c \u0441\u043b\u043e\u0432, \u043d\u0435 \u0431\u0435\u0440\u0443 \u0441\u0430\u043c\u0438 \u0433\u0440\u0430\u043d\u0438\u0446\u044b!\"\n",
"print re.findall(r'^a+', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u043e \u0432 \u043d\u0430\u0447\u0430\u043b\u0435 \u0441\u0442\u0440\u043e\u043a\u0438\"\n",
"print re.findall(r'a+$', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u043e \u0432 \u043a\u043e\u043d\u0446\u0435 \u0441\u0442\u0440\u043e\u043a\u0438\""
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"['aa', 'a', 'a', 'aa', 'aaa'] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u043f\u043e \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0435, \u043d\u043e \u043c\u043e\u0433\u0443\u0442 \u0431\u044b\u0442\u044c \u043d\u0435\u0432\u0435\u0440\u043d\u044b\u0435 \u0432\u044b\u0431\u043e\u0440\u043a\u0438!\n",
"[',a ', ' aa '] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u0441 \u043d\u0435\u0431\u0443\u043a\u0432\u0430\u043c\u0438 \u043f\u043e \u0431\u043e\u043a\u0430\u043c \u0438 \u0431\u0435\u0440\u0443 \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u043d\u0435\u0431\u0443\u043a\u0432\u0430\u043c\u0438\n",
"[' aa '] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u0441 \u043f\u0440\u043e\u0431\u0435\u043b\u0430\u043c\u0438 \u043f\u043e \u0431\u043e\u043a\u0430\u043c \u0438 \u0431\u0435\u0440\u0443 \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u043f\u0440\u043e\u0431\u0435\u043b\u0430\u043c\u0438\n",
"['aa', 'a', 'aa', 'aaa'] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u043f\u043e \u0433\u0440\u0430\u043d\u0438\u0446\u0430\u043c \u0441\u043b\u043e\u0432, \u043d\u0435 \u0431\u0435\u0440\u0443 \u0441\u0430\u043c\u0438 \u0433\u0440\u0430\u043d\u0438\u0446\u044b!\n",
"['aa'] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u043e \u0432 \u043d\u0430\u0447\u0430\u043b\u0435 \u0441\u0442\u0440\u043e\u043a\u0438\n",
"['aaa'] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u043e \u0432 \u043a\u043e\u043d\u0446\u0435 \u0441\u0442\u0440\u043e\u043a\u0438\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u0413\u0440\u0443\u043f\u043f\u0438\u0440\u043e\u0432\u043a\u0430 \u0441 \u043e\u0431\u0440\u0430\u0442\u043d\u043e\u0439 \u0441\u0432\u044f\u0437\u044c\u044e \u0438 \u0431\u0435\u0437"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# () (?:) (?>) (?P<x>...) \\1 \\2 \\3\n",
"re.match(r'(a)\\s*(\\d+),?(c)', 'a121233c').groups()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"('a', '121233', 'c')"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print re.match(\n",
" r'^([0-9a-fA-F]{1,2})([0-9a-fA-F]{1,2})([0-9a-fA-F]{1,2})$',\n",
" 'ff00bb'\n",
").groups()\n",
"\n",
"# \u0432\u0430\u0436\u043d\u043e! \u043a\u0432\u0430\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u044b \u0440\u0430\u0431\u043e\u0442\u0430\u044e\u0442 \u0434\u043b\u044f \u0433\u0440\u0443\u043f\u043f \u043f\u0440\u0438 \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0435,\n",
"# \u043d\u043e \u0433\u0440\u0443\u043f\u043f\u044b \u043a\u043e\u043f\u0438\u0440\u0443\u044e\u0442\u0441\u044f \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u0438\u043d\u0434\u0435\u043a\u0441\u0430\u043c\u0438, \u043f\u043e\u044d\u0442\u043e\u043c\u0443 \u0432 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u043f\u043e\u043f\u0430\u0434\u0451\u0442 \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0430, \u0437\u0430\u0445\u0432\u0430\u0447\u0435\u043d\u043d\u0430\u044f \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u0439 \u043a\u043e\u043f\u0438\u0435\u0439!\n",
"print re.match(\n",
" r'^([0-9a-fA-F]{1,2}){3}$',\n",
" 'ff00bb'\n",
").groups(), \"\u0421\u0442\u0440\u043e\u043a\u0430 \u0441\u043c\u0430\u0442\u0447\u0438\u043b\u0430\u0441\u044c, \u043d\u043e \u0433\u0440\u0443\u043f\u043f\u0430 \u043e\u0441\u0442\u0430\u043b\u0430\u0441\u044c \u043e\u0434\u043d\u0430!\""
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"('ff', '00', 'bb')\n",
"('bb',) \u0421\u0442\u0440\u043e\u043a\u0430 \u0441\u043c\u0430\u0442\u0447\u0438\u043b\u0430\u0441\u044c, \u043d\u043e \u0433\u0440\u0443\u043f\u043f\u0430 \u043e\u0441\u0442\u0430\u043b\u0430\u0441\u044c \u043e\u0434\u043d\u0430!\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"re.findall(r'([\\\"\\']+)(\\d+)\\1', '\"123\" \\'1\\' \"7\\' \"\"4\"\" \"\"\"42\"\"\"')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"[('\"', '123'), (\"'\", '1'), ('\"\"', '4'), ('\"\"\"', '42')]"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"re.match(r'^(.?)(.?)(.?).?\\3\\2\\1$', 'acbbca').groups()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"('a', 'c', 'b')"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"re.match(r'\\s*(?P<x>\\d+)\\s*,\\s*(?P<y>\\d+)\\s*', '1,2'\n",
" ).groupdict()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"{'x': '1', 'y': '2'}"
]
}
],
"prompt_number": 10
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u041f\u0435\u0440\u0435\u0447\u0438\u0441\u043b\u0435\u043d\u0438\u044f"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# a|b"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print re.match(r'^((?:\\d+)|(?:[VXI]+))$', '123').groups()\n",
"print re.match(r'^((?:\\d+)|(?:[VXI]+))$', 'VII').groups()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"('123',)\n",
"('VII',)\n"
]
}
],
"prompt_number": 12
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u041a\u043e\u043c\u043c\u0435\u043d\u0442\u0430\u0440\u0438\u0438"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# (?#)\n",
"re.match(\n",
" (\n",
" r'^'\n",
" r'(?#red)([0-9a-fA-F]{1,2})'\n",
" r'(?#green)([0-9a-fA-F]{1,2})'\n",
" r'(?#green)([0-9a-fA-F]{1,2})'\n",
" r'$'\n",
" ),\n",
" r'ff00ff'\n",
").groups()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 13,
"text": [
"('ff', '00', 'ff')"
]
}
],
"prompt_number": 13
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u041c\u043e\u0434\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u044b"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# (?ismx)\n",
"m(r'(?i)a', 'A')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"'A'"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"m(r'''(?x)\n",
" ^ # begin of line\n",
" ([0-9a-fA-F]{1,2}) # red\n",
" ([0-9a-fA-F]{1,2}) # green\n",
" ([0-9a-fA-F]{1,2}) # blue\n",
" $ # end of line\n",
" ''', 'ff00ff')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 15,
"text": [
"'ff00ff'"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s = 'a\\nab\\nabc'\n",
"print \"\u043f\u043e-\u0443\u043c\u043e\u043b\u0447\u0430\u043d\u0438\u044e \u0442\u043e\u0447\u043a\u0430 \u043d\u0435 \u0437\u0430\u0445\u0432\u0430\u0442\u044b\u0432\u0430\u0435\u0442 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0441\u0442\u0440\u043e\u043a, \u0430 ^ \u0438 $ \u043e\u0431\u043e\u0437\u043d\u0430\u0447\u0430\u044e\u0442 \u0433\u0440\u0430\u043d\u0438\u0446\u044b \u0432\u0441\u0435\u0433\u043e \u0442\u0435\u043a\u0441\u0442\u0430:\\n\", (\n",
" re.findall(r'^.+$', s))\n",
"print \"^ \u0438 $ \u0442\u0435\u043f\u0435\u0440\u044c \u043e\u0431\u0440\u0430\u0431\u0430\u0442\u044b\u0432\u0430\u044e\u0442 \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0438:\\n\", (\n",
" re.findall(r'(?m)^.+$', s))\n",
"print \"\u0442\u043e\u0447\u043a\u0430 \u0437\u0430\u0445\u0432\u0430\u0442\u044b\u0432\u0430\u0435\u0442 \u0438 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0441\u0442\u0440\u043e\u043a:\\n\", (\n",
" re.findall(r'(?s)^.+$', s))\n",
"print \"\\A\\Z \u0432\u0441\u0435\u0433\u0434\u0430 \u043e\u0431\u043e\u0437\u043d\u0430\u0447\u0430\u044e\u0442 \u0433\u0440\u0430\u043d\u0438\u0446\u044b \u0442\u0435\u043a\u0441\u0442\u0430:\"\n",
"print re.findall(r'\\A.+\\Z', s)\n",
"print re.findall(r'(?m)\\A.+\\Z', s)\n",
"print re.findall(r'(?s)\\A.+\\Z', s)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\u043f\u043e-\u0443\u043c\u043e\u043b\u0447\u0430\u043d\u0438\u044e \u0442\u043e\u0447\u043a\u0430 \u043d\u0435 \u0437\u0430\u0445\u0432\u0430\u0442\u044b\u0432\u0430\u0435\u0442 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0441\u0442\u0440\u043e\u043a, \u0430 ^ \u0438 $ \u043e\u0431\u043e\u0437\u043d\u0430\u0447\u0430\u044e\u0442 \u0433\u0440\u0430\u043d\u0438\u0446\u044b \u0432\u0441\u0435\u0433\u043e \u0442\u0435\u043a\u0441\u0442\u0430:\n",
"[]\n",
"^ \u0438 $ \u0442\u0435\u043f\u0435\u0440\u044c \u043e\u0431\u0440\u0430\u0431\u0430\u0442\u044b\u0432\u0430\u044e\u0442 \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0438:\n",
"['a', 'ab', 'abc']\n",
"\u0442\u043e\u0447\u043a\u0430 \u0437\u0430\u0445\u0432\u0430\u0442\u044b\u0432\u0430\u0435\u0442 \u0438 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0441\u0442\u0440\u043e\u043a:\n",
"['a\\nab\\nabc']\n",
"\\A\\Z \u0432\u0441\u0435\u0433\u0434\u0430 \u043e\u0431\u043e\u0437\u043d\u0430\u0447\u0430\u044e\u0442 \u0433\u0440\u0430\u043d\u0438\u0446\u044b \u0442\u0435\u043a\u0441\u0442\u0430:\n",
"[]\n",
"[]\n",
"['a\\nab\\nabc']\n"
]
}
],
"prompt_number": 16
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u0417\u0430\u0433\u043b\u044f\u0434\u044b\u0432\u0430\u043d\u0438\u0435 \u0432\u043f\u0435\u0440\u0435\u0434 \u0438 \u043d\u0430\u0437\u0430\u0434"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# (?=...)(?!...)(?<=...)(?<!...)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"re.findall(r'(aaa(?=b))|(bbb(?=a))', 'aaabbbaaaccc')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 18,
"text": [
"[('aaa', ''), ('', 'bbb')]"
]
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"re.findall(r'(aaa)(?:b)|(bbb)(?:a)', 'aaabbbaaabbb')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 19,
"text": [
"[('aaa', ''), ('aaa', '')]"
]
}
],
"prompt_number": 19
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \u041f\u043e\u0438\u0441\u043a/\u0417\u0430\u043c\u0435\u043d\u0430"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \\1 \\g<n>"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print re.sub(\n",
" r\"(?P<name>\\w)\\s*=\\s*(?P<val>(?:\\d+)|(?:\\S+))\",\n",
" r\"'\\g<name>': '\\g<val>'\",\n",
" \"x = asd\"\n",
")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"'x': 'asd'\n"
]
}
],
"prompt_number": 21
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment