-
-
Save ischurov/391a4ef5aa0c44b5f424e95a029abbaf to your computer and use it in GitHub Desktop.
Lecture 25.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import re", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "s = \"Hello! My phone is 71232343212. Can you call me?\"", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m = re.search(\"\\\\d+\", s)", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m.group(0)", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'71232343212'" | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m = re.search(r\"\\d+\", s) # \\d - digit (цифра)", | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m.group(0)", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'71232343212'" | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m = re.search(r\"\\d+\", \"Hello! 23132 ladjf 1312\")", | |
"execution_count": 15, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m.group(0)", | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'23132'" | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "re.findall(r\"\\d+\", \"Hello! 23132 ladjf 1312\")", | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "['23132', '1312']" | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "s = \"My phone is +7(999) 123 12-22! Call me back.\"\nm = re.search(r\"\\d+\", s)", | |
"execution_count": 22, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m.group(0)", | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'7'" | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m = re.search(r\"phone is(.+)\", s, )", | |
"execution_count": 27, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m", | |
"execution_count": 28, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "<re.Match object; span=(3, 44), match='phone is +7(999) 123 12-22! Call me back.'>" | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m.group(1)", | |
"execution_count": 29, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "' +7(999) 123 12-22! Call me back.'" | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m.group(0)", | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'phone is +7(999) 123 12-22! Call me back.'" | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m = re.search(r\"phone is([^!,.a-zA-Z]+)\", s, )", | |
"execution_count": 33, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m.group(1)", | |
"execution_count": 32, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "' +7(999) 123 12-22'" | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "s = \"My phone is +7(999) 123 12-22. Call me back.\"\nm = re.search(r\"\\d+\", s)", | |
"execution_count": 34, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m = re.search(r\"phone is([^!,. a-zA-Z]+)\", s)", | |
"execution_count": 47, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m is None", | |
"execution_count": 49, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "True" | |
}, | |
"execution_count": 49, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m = re.search(r\"phone is ([^!,. a-zA-Z]+)\", s)", | |
"execution_count": 42, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "m.group(1)", | |
"execution_count": 44, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'+7(999)'" | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "re.sub(\"[!,. a-zA-Z]\", \"\", s)", | |
"execution_count": 46, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'+7(999)12312-22'" | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import pandas as pd", | |
"execution_count": 50, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df = pd.DataFrame([[\"a\", \"(12)\"],\n [\"b\", \"(34)\"]], columns=['x', 'y'])", | |
"execution_count": 63, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "s = df['y'].str", | |
"execution_count": 58, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df['y'].str.findall(\"\\\\$\")", | |
"execution_count": 61, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "0 [$]\n1 [$]\nName: y, dtype: object" | |
}, | |
"execution_count": 61, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df['y'].str.replace('[\\\\(\\\\)]','')", | |
"execution_count": 65, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "0 12\n1 34\nName: y, dtype: object" | |
}, | |
"execution_count": 65, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "re.split(\"[!,. ?]+\", \"Hello! This is a test.How are you?\")", | |
"execution_count": 69, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "['Hello', 'This', 'is', 'a', 'test', 'How', 'are', 'you', '']" | |
}, | |
"execution_count": 69, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.7.2", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "Lecture 25.ipynb", | |
"public": false | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment