-
-
Save ischurov/ad210f6f13a7b81168d2915d379c9c37 to your computer and use it in GitHub Desktop.
lesson24.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"id": "caring-effectiveness", | |
"cell_type": "markdown", | |
"source": "### ORM: SQLAlchemy" | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "prescribed-binding", | |
"cell_type": "code", | |
"source": "%pip install sqlalchemy", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "Requirement already satisfied: sqlalchemy in /Users/user/miniconda3/envs/featurevis-py3.6/lib/python3.6/site-packages (1.3.22)\nNote: you may need to restart the kernel to use updated packages.\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "secondary-painting", | |
"cell_type": "code", | |
"source": "from sqlalchemy import create_engine\n\nfrom sqlalchemy.ext.declarative import declarative_base\n\nBase = declarative_base()\n\nfrom sqlalchemy import Column, Integer, String\n\nclass User(Base):\n __tablename__ = \"user\"\n \n id = Column(Integer, primary_key=True)\n username = Column(String)\n nickname = Column(String)\n age = Column(Integer)\n \n def __repr__(self):\n return f\"User(username={self.username!r}, nickname={self.nickname!r}, age={self.age!r})\"", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "proprietary-montreal", | |
"cell_type": "code", | |
"source": "alice = User(username=\"alice\", nickname=\"Alice Smith\", age=18)\nbob = User(username='bobby', nickname='BOB', age=19)", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "authorized-toilet", | |
"cell_type": "code", | |
"source": "alice", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "User(username='alice', nickname='Alice Smith', age=18)" | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "hawaiian-watershed", | |
"cell_type": "code", | |
"source": "alice.username", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'alice'" | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "signed-insulin", | |
"cell_type": "code", | |
"source": "alice.age", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "18" | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "overhead-static", | |
"cell_type": "code", | |
"source": "engine = create_engine('sqlite:///my_users.sqlite', echo=True)\nBase.metadata.create_all(engine)", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "2021-04-14 18:23:05,316 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1\n2021-04-14 18:23:05,317 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,319 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1\n2021-04-14 18:23:05,320 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,322 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info(\"user\")\n2021-04-14 18:23:05,322 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,324 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info(\"user\")\n2021-04-14 18:23:05,325 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,327 INFO sqlalchemy.engine.base.Engine \nCREATE TABLE user (\n\tid INTEGER NOT NULL, \n\tusername VARCHAR, \n\tnickname VARCHAR, \n\tage INTEGER, \n\tPRIMARY KEY (id)\n)\n\n\n2021-04-14 18:23:05,328 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,330 INFO sqlalchemy.engine.base.Engine COMMIT\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "unnecessary-norway", | |
"cell_type": "code", | |
"source": "from sqlalchemy.orm import sessionmaker\nSession = sessionmaker(bind=engine)\nsession = Session()", | |
"execution_count": 9, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "nearby-rugby", | |
"cell_type": "code", | |
"source": "session.add(alice)", | |
"execution_count": 10, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "reported-prince", | |
"cell_type": "code", | |
"source": "session.add(bob)", | |
"execution_count": 11, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"scrolled": true, | |
"trusted": false | |
}, | |
"id": "occupational-steal", | |
"cell_type": "code", | |
"source": "session.commit()", | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "2021-04-14 18:24:19,160 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)\n2021-04-14 18:24:19,162 INFO sqlalchemy.engine.base.Engine INSERT INTO user (username, nickname, age) VALUES (?, ?, ?)\n2021-04-14 18:24:19,163 INFO sqlalchemy.engine.base.Engine ('alice', 'Alice Smith', 18)\n2021-04-14 18:24:19,165 INFO sqlalchemy.engine.base.Engine INSERT INTO user (username, nickname, age) VALUES (?, ?, ?)\n2021-04-14 18:24:19,166 INFO sqlalchemy.engine.base.Engine ('bobby', 'BOB', 19)\n2021-04-14 18:24:19,167 INFO sqlalchemy.engine.base.Engine COMMIT\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "chicken-setup", | |
"cell_type": "code", | |
"source": "from sqlalchemy import create_engine\n\nfrom sqlalchemy.ext.declarative import declarative_base\n\nBase = declarative_base()\n\nfrom sqlalchemy import Column, Integer, String\n\nclass User(Base):\n __tablename__ = \"user\"\n \n id = Column(Integer, primary_key=True)\n username = Column(String)\n nickname = Column(String)\n age = Column(Integer)\n \n def __repr__(self):\n return f\"User(username={self.username!r}, nickname={self.nickname!r}, age={self.age!r})\"\n \n def say_hello(self):\n print(f\"Hello, I'm {self.nickname}!\")\n\n \nengine = create_engine('sqlite:///my_users.sqlite', echo=True)\n\nfrom sqlalchemy.orm import sessionmaker\nSession = sessionmaker(bind=engine)\nsession = Session()", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "digital-litigation", | |
"cell_type": "code", | |
"source": "users = session.query(User).all()\n# SELECT * FROM user", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "2021-04-18 20:14:15,238 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1\n2021-04-18 20:14:15,239 INFO sqlalchemy.engine.base.Engine ()\n2021-04-18 20:14:15,241 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1\n2021-04-18 20:14:15,242 INFO sqlalchemy.engine.base.Engine ()\n2021-04-18 20:14:15,243 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)\n2021-04-18 20:14:15,245 INFO sqlalchemy.engine.base.Engine SELECT user.id AS user_id, user.username AS user_username, user.nickname AS user_nickname, user.age AS user_age \nFROM user\n2021-04-18 20:14:15,245 INFO sqlalchemy.engine.base.Engine ()\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "material-society", | |
"cell_type": "code", | |
"source": "users[0].username", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'alice'" | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "stopped-scott", | |
"cell_type": "code", | |
"source": "users[0].say_hello()", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "Hello, I'm Alice Smith!\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "judicial-pennsylvania", | |
"cell_type": "code", | |
"source": "session.query(User).filter(User.age >= 19).all()\n# SELECT * FROM user WHERE user.age >= 19", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "2021-04-18 20:14:38,257 INFO sqlalchemy.engine.base.Engine SELECT user.id AS user_id, user.username AS user_username, user.nickname AS user_nickname, user.age AS user_age \nFROM user \nWHERE user.age >= ?\n2021-04-18 20:14:38,259 INFO sqlalchemy.engine.base.Engine (19,)\n" | |
}, | |
{ | |
"data": { | |
"text/plain": "[User(username='bobby', nickname='BOB', age=19)]" | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"id": "successful-straight", | |
"cell_type": "markdown", | |
"source": "### SQL-injections" | |
}, | |
{ | |
"metadata": {}, | |
"id": "opposed-reporter", | |
"cell_type": "markdown", | |
"source": "Рассмотрим такой код:\n\n```\nuser = input(\"Enter username\")\npasswd = input(\"Enter password\")\nconn = sqlite3.connect(\"database.sqlite\")\nc = conn.cursor()\nc.execute(f\"SELECT * FROM user WHERE user.username == '{user}'\")\n```\nЕсли `user` окажется равен `'; DROP TABLE user; --`, окажется выполненным следующий запрос:\n\n\n```\nSELECT * FROM user WHERE user.username == ''; DROP TABLE user; --'\n```\n\nМногие движки баз данных будут рассматривать его как три отдельных строчки, разделяя по точке с запятой.\n\n```\nSELECT * FROM user WHERE user.username == '';\nDROP TABLE user;\n--'\n```\n\nпоследняя строчка — комментарий (они начинаются в SQL с `--`), первая запрашивает пользователя с пустым именем, а вторая — удаляет базу данных пользователей.\n\nОй.\n\nПоэтому формировать SQL-запросы с помощью подстановки каких-то переменных вручную нельзя — есть риск нарваться на такую SQL injection.\n\nНадо делать так:\n\n```\nc.execute(\"SELECT * FROM user WHERE user.username == ?\", (user, ))\n```\n\nВ этом случае `sqlite` сам подставит значение переменной вместо знака вопроса. Если знаков вопроса несколько, будут подставлены соответствующие элементы кортежа.\n\nДругой способ избежать sql-injections — использовать ORM типа sqlalchemy, как показано выше. В этом случае вы совсем не пишете SQL-код и не рискуете написать его неправильно." | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Регулярные выражения" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "gorgeous-continuity", | |
"cell_type": "code", | |
"source": "text = \"\"\"\nHello! My name is Alice. I'm 29. My phone is +7-919-99-912321.\n\nHello, Alice. My name is Bob Smith. I'm 25 years old. \nMy tel. is +7-991-199992836. How are you?\n\nWhat is 45+49?\n\"\"\"", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "finite-study", | |
"cell_type": "code", | |
"source": "import re", | |
"execution_count": 21, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "developing-croatia", | |
"cell_type": "code", | |
"source": "re.findall(r\"[+\\d-]{5,}\", text)", | |
"execution_count": 22, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "['+7-919-99-912321', '+7-991-199992836', '45+49']" | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "minute-orange", | |
"cell_type": "code", | |
"source": "print(r\"\\n\")", | |
"execution_count": 24, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "\\n\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "impressed-offer", | |
"cell_type": "code", | |
"source": "print(\"\\n\")", | |
"execution_count": 25, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "\n\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "sustained-avenue", | |
"cell_type": "code", | |
"source": "data = \"\"\"\nFirst name: Alice\nLast name: Smith\nTelephone: +7 916 900998876\n\"\"\"", | |
"execution_count": 26, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "activated-consolidation", | |
"cell_type": "code", | |
"source": "m = re.search(\"First name: (.+)\", data)", | |
"execution_count": 33, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "alive-auction", | |
"cell_type": "code", | |
"source": "m.group(1)", | |
"execution_count": 36, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'Alice'" | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "automotive-taiwan", | |
"cell_type": "code", | |
"source": "text", | |
"execution_count": 37, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "\"\\nHello! My name is Alice. I'm 29. My phone is +7-919-99-912321.\\n\\nHello, Alice. My name is Bob Smith. I'm 25 years old. \\nMy tel. is +7-991-199992836. How are you?\\n\\nWhat is 45+49?\\n\"" | |
}, | |
"execution_count": 37, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "powerful-stations", | |
"cell_type": "code", | |
"source": "re.split(r\"[\\n! \\.,']+\", text)", | |
"execution_count": 40, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "['',\n 'Hello',\n 'My',\n 'name',\n 'is',\n 'Alice',\n 'I',\n 'm',\n '29',\n 'My',\n 'phone',\n 'is',\n '+7-919-99-912321',\n 'Hello',\n 'Alice',\n 'My',\n 'name',\n 'is',\n 'Bob',\n 'Smith',\n 'I',\n 'm',\n '25',\n 'years',\n 'old',\n 'My',\n 'tel',\n 'is',\n '+7-991-199992836',\n 'How',\n 'are',\n 'you?',\n 'What',\n 'is',\n '45+49?',\n '']" | |
}, | |
"execution_count": 40, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "another-parcel", | |
"cell_type": "code", | |
"source": "re.sub(\"\\d\", \"*\", text)", | |
"execution_count": 42, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "\"\\nHello! My name is Alice. I'm **. My phone is +*-***-**-******.\\n\\nHello, Alice. My name is Bob Smith. I'm ** years old. \\nMy tel. is +*-***-*********. How are you?\\n\\nWhat is **+**?\\n\"" | |
}, | |
"execution_count": 42, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "continuous-columbus", | |
"cell_type": "code", | |
"source": "re.sub(\"(\\d|\\+)\", \"*\", text)", | |
"execution_count": 43, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "\"\\nHello! My name is Alice. I'm **. My phone is **-***-**-******.\\n\\nHello, Alice. My name is Bob Smith. I'm ** years old. \\nMy tel. is **-***-*********. How are you?\\n\\nWhat is *****?\\n\"" | |
}, | |
"execution_count": 43, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "mounted-meditation", | |
"cell_type": "code", | |
"source": "m", | |
"execution_count": 50, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "<_sre.SRE_Match object; span=(1, 18), match='First name: Alice'>" | |
}, | |
"execution_count": 50, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "fundamental-avatar", | |
"cell_type": "code", | |
"source": "def digit_to_word(d):\n return {0: '[ноль]', \n 1: '[один]',\n 2: '[два]', \n 3: '[три]'}.get(int(d.group(0)), d.group(0))", | |
"execution_count": 51, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "champion-prime", | |
"cell_type": "code", | |
"source": "re.sub(\"\\d\", digit_to_word, text)", | |
"execution_count": 52, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "\"\\nHello! My name is Alice. I'm [два]9. My phone is +7-9[один]9-99-9[один][два][три][два][один].\\n\\nHello, Alice. My name is Bob Smith. I'm [два]5 years old. \\nMy tel. is +7-99[один]-[один]9999[два]8[три]6. How are you?\\n\\nWhat is 45+49?\\n\"" | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "alpine-maximum", | |
"cell_type": "code", | |
"source": "text = \"\"\"\nAlpha Beta\nGamma Delta\n\"\"\"", | |
"execution_count": 53, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "polyphonic-trance", | |
"cell_type": "code", | |
"source": "print(re.sub(r\"(\\w+)\\W(\\w+)$\", r\"\\2, \\1\", text, flags=re.MULTILINE))", | |
"execution_count": 60, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "\nBeta, Alpha\nDelta, Gamma\n\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "allied-vanilla", | |
"cell_type": "code", | |
"source": "import pandas as pd", | |
"execution_count": 61, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "manufactured-saudi", | |
"cell_type": "code", | |
"source": "dat = pd.DataFrame({'s': ['alpha', 'beta', 'gamma']})", | |
"execution_count": 62, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "through-combination", | |
"cell_type": "code", | |
"source": "dat['s'].str.split('[ap]', expand=True)", | |
"execution_count": 65, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td></td>\n <td>l</td>\n <td>h</td>\n <td></td>\n </tr>\n <tr>\n <th>1</th>\n <td>bet</td>\n <td></td>\n <td>None</td>\n <td>None</td>\n </tr>\n <tr>\n <th>2</th>\n <td>g</td>\n <td>mm</td>\n <td></td>\n <td>None</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"text/plain": " 0 1 2 3\n0 l h \n1 bet None None\n2 g mm None" | |
}, | |
"execution_count": 65, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "moved-clinic", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.6.10", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "lesson24.ipynb", | |
"public": false | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment