Skip to content

Instantly share code, notes, and snippets.

@ischurov
Created May 17, 2020 14:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ischurov/391a4ef5aa0c44b5f424e95a029abbaf to your computer and use it in GitHub Desktop.
Save ischurov/391a4ef5aa0c44b5f424e95a029abbaf to your computer and use it in GitHub Desktop.
Lecture 25.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import re",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "s = \"Hello! My phone is 71232343212. Can you call me?\"",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m = re.search(\"\\\\d+\", s)",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m.group(0)",
"execution_count": 6,
"outputs": [
{
"data": {
"text/plain": "'71232343212'"
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m = re.search(r\"\\d+\", s) # \\d - digit (цифра)",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m.group(0)",
"execution_count": 8,
"outputs": [
{
"data": {
"text/plain": "'71232343212'"
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m = re.search(r\"\\d+\", \"Hello! 23132 ladjf 1312\")",
"execution_count": 15,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m.group(0)",
"execution_count": 16,
"outputs": [
{
"data": {
"text/plain": "'23132'"
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "re.findall(r\"\\d+\", \"Hello! 23132 ladjf 1312\")",
"execution_count": 17,
"outputs": [
{
"data": {
"text/plain": "['23132', '1312']"
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "s = \"My phone is +7(999) 123 12-22! Call me back.\"\nm = re.search(r\"\\d+\", s)",
"execution_count": 22,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m.group(0)",
"execution_count": 23,
"outputs": [
{
"data": {
"text/plain": "'7'"
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m = re.search(r\"phone is(.+)\", s, )",
"execution_count": 27,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m",
"execution_count": 28,
"outputs": [
{
"data": {
"text/plain": "<re.Match object; span=(3, 44), match='phone is +7(999) 123 12-22! Call me back.'>"
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m.group(1)",
"execution_count": 29,
"outputs": [
{
"data": {
"text/plain": "' +7(999) 123 12-22! Call me back.'"
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m.group(0)",
"execution_count": 30,
"outputs": [
{
"data": {
"text/plain": "'phone is +7(999) 123 12-22! Call me back.'"
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m = re.search(r\"phone is([^!,.a-zA-Z]+)\", s, )",
"execution_count": 33,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m.group(1)",
"execution_count": 32,
"outputs": [
{
"data": {
"text/plain": "' +7(999) 123 12-22'"
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "s = \"My phone is +7(999) 123 12-22. Call me back.\"\nm = re.search(r\"\\d+\", s)",
"execution_count": 34,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m = re.search(r\"phone is([^!,. a-zA-Z]+)\", s)",
"execution_count": 47,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m is None",
"execution_count": 49,
"outputs": [
{
"data": {
"text/plain": "True"
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m = re.search(r\"phone is ([^!,. a-zA-Z]+)\", s)",
"execution_count": 42,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "m.group(1)",
"execution_count": 44,
"outputs": [
{
"data": {
"text/plain": "'+7(999)'"
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "re.sub(\"[!,. a-zA-Z]\", \"\", s)",
"execution_count": 46,
"outputs": [
{
"data": {
"text/plain": "'+7(999)12312-22'"
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd",
"execution_count": 50,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df = pd.DataFrame([[\"a\", \"(12)\"],\n [\"b\", \"(34)\"]], columns=['x', 'y'])",
"execution_count": 63,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "s = df['y'].str",
"execution_count": 58,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df['y'].str.findall(\"\\\\$\")",
"execution_count": 61,
"outputs": [
{
"data": {
"text/plain": "0 [$]\n1 [$]\nName: y, dtype: object"
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df['y'].str.replace('[\\\\(\\\\)]','')",
"execution_count": 65,
"outputs": [
{
"data": {
"text/plain": "0 12\n1 34\nName: y, dtype: object"
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "re.split(\"[!,. ?]+\", \"Hello! This is a test.How are you?\")",
"execution_count": 69,
"outputs": [
{
"data": {
"text/plain": "['Hello', 'This', 'is', 'a', 'test', 'How', 'are', 'you', '']"
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.2",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "Lecture 25.ipynb",
"public": false
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment