Skip to content

Instantly share code, notes, and snippets.

@ischurov
Created April 18, 2021 18:06
Show Gist options
  • Save ischurov/ad210f6f13a7b81168d2915d379c9c37 to your computer and use it in GitHub Desktop.
Save ischurov/ad210f6f13a7b81168d2915d379c9c37 to your computer and use it in GitHub Desktop.
lesson24.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"id": "caring-effectiveness",
"cell_type": "markdown",
"source": "### ORM: SQLAlchemy"
},
{
"metadata": {
"trusted": false
},
"id": "prescribed-binding",
"cell_type": "code",
"source": "%pip install sqlalchemy",
"execution_count": 1,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "Requirement already satisfied: sqlalchemy in /Users/user/miniconda3/envs/featurevis-py3.6/lib/python3.6/site-packages (1.3.22)\nNote: you may need to restart the kernel to use updated packages.\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "secondary-painting",
"cell_type": "code",
"source": "from sqlalchemy import create_engine\n\nfrom sqlalchemy.ext.declarative import declarative_base\n\nBase = declarative_base()\n\nfrom sqlalchemy import Column, Integer, String\n\nclass User(Base):\n __tablename__ = \"user\"\n \n id = Column(Integer, primary_key=True)\n username = Column(String)\n nickname = Column(String)\n age = Column(Integer)\n \n def __repr__(self):\n return f\"User(username={self.username!r}, nickname={self.nickname!r}, age={self.age!r})\"",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "proprietary-montreal",
"cell_type": "code",
"source": "alice = User(username=\"alice\", nickname=\"Alice Smith\", age=18)\nbob = User(username='bobby', nickname='BOB', age=19)",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "authorized-toilet",
"cell_type": "code",
"source": "alice",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": "User(username='alice', nickname='Alice Smith', age=18)"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "hawaiian-watershed",
"cell_type": "code",
"source": "alice.username",
"execution_count": 6,
"outputs": [
{
"data": {
"text/plain": "'alice'"
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "signed-insulin",
"cell_type": "code",
"source": "alice.age",
"execution_count": 7,
"outputs": [
{
"data": {
"text/plain": "18"
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "overhead-static",
"cell_type": "code",
"source": "engine = create_engine('sqlite:///my_users.sqlite', echo=True)\nBase.metadata.create_all(engine)",
"execution_count": 8,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "2021-04-14 18:23:05,316 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1\n2021-04-14 18:23:05,317 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,319 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1\n2021-04-14 18:23:05,320 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,322 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info(\"user\")\n2021-04-14 18:23:05,322 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,324 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info(\"user\")\n2021-04-14 18:23:05,325 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,327 INFO sqlalchemy.engine.base.Engine \nCREATE TABLE user (\n\tid INTEGER NOT NULL, \n\tusername VARCHAR, \n\tnickname VARCHAR, \n\tage INTEGER, \n\tPRIMARY KEY (id)\n)\n\n\n2021-04-14 18:23:05,328 INFO sqlalchemy.engine.base.Engine ()\n2021-04-14 18:23:05,330 INFO sqlalchemy.engine.base.Engine COMMIT\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "unnecessary-norway",
"cell_type": "code",
"source": "from sqlalchemy.orm import sessionmaker\nSession = sessionmaker(bind=engine)\nsession = Session()",
"execution_count": 9,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "nearby-rugby",
"cell_type": "code",
"source": "session.add(alice)",
"execution_count": 10,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "reported-prince",
"cell_type": "code",
"source": "session.add(bob)",
"execution_count": 11,
"outputs": []
},
{
"metadata": {
"scrolled": true,
"trusted": false
},
"id": "occupational-steal",
"cell_type": "code",
"source": "session.commit()",
"execution_count": 12,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "2021-04-14 18:24:19,160 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)\n2021-04-14 18:24:19,162 INFO sqlalchemy.engine.base.Engine INSERT INTO user (username, nickname, age) VALUES (?, ?, ?)\n2021-04-14 18:24:19,163 INFO sqlalchemy.engine.base.Engine ('alice', 'Alice Smith', 18)\n2021-04-14 18:24:19,165 INFO sqlalchemy.engine.base.Engine INSERT INTO user (username, nickname, age) VALUES (?, ?, ?)\n2021-04-14 18:24:19,166 INFO sqlalchemy.engine.base.Engine ('bobby', 'BOB', 19)\n2021-04-14 18:24:19,167 INFO sqlalchemy.engine.base.Engine COMMIT\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "chicken-setup",
"cell_type": "code",
"source": "from sqlalchemy import create_engine\n\nfrom sqlalchemy.ext.declarative import declarative_base\n\nBase = declarative_base()\n\nfrom sqlalchemy import Column, Integer, String\n\nclass User(Base):\n __tablename__ = \"user\"\n \n id = Column(Integer, primary_key=True)\n username = Column(String)\n nickname = Column(String)\n age = Column(Integer)\n \n def __repr__(self):\n return f\"User(username={self.username!r}, nickname={self.nickname!r}, age={self.age!r})\"\n \n def say_hello(self):\n print(f\"Hello, I'm {self.nickname}!\")\n\n \nengine = create_engine('sqlite:///my_users.sqlite', echo=True)\n\nfrom sqlalchemy.orm import sessionmaker\nSession = sessionmaker(bind=engine)\nsession = Session()",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "digital-litigation",
"cell_type": "code",
"source": "users = session.query(User).all()\n# SELECT * FROM user",
"execution_count": 2,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "2021-04-18 20:14:15,238 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1\n2021-04-18 20:14:15,239 INFO sqlalchemy.engine.base.Engine ()\n2021-04-18 20:14:15,241 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1\n2021-04-18 20:14:15,242 INFO sqlalchemy.engine.base.Engine ()\n2021-04-18 20:14:15,243 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)\n2021-04-18 20:14:15,245 INFO sqlalchemy.engine.base.Engine SELECT user.id AS user_id, user.username AS user_username, user.nickname AS user_nickname, user.age AS user_age \nFROM user\n2021-04-18 20:14:15,245 INFO sqlalchemy.engine.base.Engine ()\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "material-society",
"cell_type": "code",
"source": "users[0].username",
"execution_count": 3,
"outputs": [
{
"data": {
"text/plain": "'alice'"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "stopped-scott",
"cell_type": "code",
"source": "users[0].say_hello()",
"execution_count": 4,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "Hello, I'm Alice Smith!\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "judicial-pennsylvania",
"cell_type": "code",
"source": "session.query(User).filter(User.age >= 19).all()\n# SELECT * FROM user WHERE user.age >= 19",
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "2021-04-18 20:14:38,257 INFO sqlalchemy.engine.base.Engine SELECT user.id AS user_id, user.username AS user_username, user.nickname AS user_nickname, user.age AS user_age \nFROM user \nWHERE user.age >= ?\n2021-04-18 20:14:38,259 INFO sqlalchemy.engine.base.Engine (19,)\n"
},
{
"data": {
"text/plain": "[User(username='bobby', nickname='BOB', age=19)]"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {},
"id": "successful-straight",
"cell_type": "markdown",
"source": "### SQL-injections"
},
{
"metadata": {},
"id": "opposed-reporter",
"cell_type": "markdown",
"source": "Рассмотрим такой код:\n\n```\nuser = input(\"Enter username\")\npasswd = input(\"Enter password\")\nconn = sqlite3.connect(\"database.sqlite\")\nc = conn.cursor()\nc.execute(f\"SELECT * FROM user WHERE user.username == '{user}'\")\n```\nЕсли `user` окажется равен `'; DROP TABLE user; --`, окажется выполненным следующий запрос:\n\n\n```\nSELECT * FROM user WHERE user.username == ''; DROP TABLE user; --'\n```\n\nМногие движки баз данных будут рассматривать его как три отдельных строчки, разделяя по точке с запятой.\n\n```\nSELECT * FROM user WHERE user.username == '';\nDROP TABLE user;\n--'\n```\n\nпоследняя строчка — комментарий (они начинаются в SQL с `--`), первая запрашивает пользователя с пустым именем, а вторая — удаляет базу данных пользователей.\n\nОй.\n\nПоэтому формировать SQL-запросы с помощью подстановки каких-то переменных вручную нельзя — есть риск нарваться на такую SQL injection.\n\nНадо делать так:\n\n```\nc.execute(\"SELECT * FROM user WHERE user.username == ?\", (user, ))\n```\n\nВ этом случае `sqlite` сам подставит значение переменной вместо знака вопроса. Если знаков вопроса несколько, будут подставлены соответствующие элементы кортежа.\n\nДругой способ избежать sql-injections — использовать ORM типа sqlalchemy, как показано выше. В этом случае вы совсем не пишете SQL-код и не рискуете написать его неправильно."
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Регулярные выражения"
},
{
"metadata": {
"trusted": true
},
"id": "gorgeous-continuity",
"cell_type": "code",
"source": "text = \"\"\"\nHello! My name is Alice. I'm 29. My phone is +7-919-99-912321.\n\nHello, Alice. My name is Bob Smith. I'm 25 years old. \nMy tel. is +7-991-199992836. How are you?\n\nWhat is 45+49?\n\"\"\"",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "finite-study",
"cell_type": "code",
"source": "import re",
"execution_count": 21,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "developing-croatia",
"cell_type": "code",
"source": "re.findall(r\"[+\\d-]{5,}\", text)",
"execution_count": 22,
"outputs": [
{
"data": {
"text/plain": "['+7-919-99-912321', '+7-991-199992836', '45+49']"
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "minute-orange",
"cell_type": "code",
"source": "print(r\"\\n\")",
"execution_count": 24,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "\\n\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "impressed-offer",
"cell_type": "code",
"source": "print(\"\\n\")",
"execution_count": 25,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "\n\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "sustained-avenue",
"cell_type": "code",
"source": "data = \"\"\"\nFirst name: Alice\nLast name: Smith\nTelephone: +7 916 900998876\n\"\"\"",
"execution_count": 26,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "activated-consolidation",
"cell_type": "code",
"source": "m = re.search(\"First name: (.+)\", data)",
"execution_count": 33,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "alive-auction",
"cell_type": "code",
"source": "m.group(1)",
"execution_count": 36,
"outputs": [
{
"data": {
"text/plain": "'Alice'"
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "automotive-taiwan",
"cell_type": "code",
"source": "text",
"execution_count": 37,
"outputs": [
{
"data": {
"text/plain": "\"\\nHello! My name is Alice. I'm 29. My phone is +7-919-99-912321.\\n\\nHello, Alice. My name is Bob Smith. I'm 25 years old. \\nMy tel. is +7-991-199992836. How are you?\\n\\nWhat is 45+49?\\n\""
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "powerful-stations",
"cell_type": "code",
"source": "re.split(r\"[\\n! \\.,']+\", text)",
"execution_count": 40,
"outputs": [
{
"data": {
"text/plain": "['',\n 'Hello',\n 'My',\n 'name',\n 'is',\n 'Alice',\n 'I',\n 'm',\n '29',\n 'My',\n 'phone',\n 'is',\n '+7-919-99-912321',\n 'Hello',\n 'Alice',\n 'My',\n 'name',\n 'is',\n 'Bob',\n 'Smith',\n 'I',\n 'm',\n '25',\n 'years',\n 'old',\n 'My',\n 'tel',\n 'is',\n '+7-991-199992836',\n 'How',\n 'are',\n 'you?',\n 'What',\n 'is',\n '45+49?',\n '']"
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "another-parcel",
"cell_type": "code",
"source": "re.sub(\"\\d\", \"*\", text)",
"execution_count": 42,
"outputs": [
{
"data": {
"text/plain": "\"\\nHello! My name is Alice. I'm **. My phone is +*-***-**-******.\\n\\nHello, Alice. My name is Bob Smith. I'm ** years old. \\nMy tel. is +*-***-*********. How are you?\\n\\nWhat is **+**?\\n\""
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "continuous-columbus",
"cell_type": "code",
"source": "re.sub(\"(\\d|\\+)\", \"*\", text)",
"execution_count": 43,
"outputs": [
{
"data": {
"text/plain": "\"\\nHello! My name is Alice. I'm **. My phone is **-***-**-******.\\n\\nHello, Alice. My name is Bob Smith. I'm ** years old. \\nMy tel. is **-***-*********. How are you?\\n\\nWhat is *****?\\n\""
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "mounted-meditation",
"cell_type": "code",
"source": "m",
"execution_count": 50,
"outputs": [
{
"data": {
"text/plain": "<_sre.SRE_Match object; span=(1, 18), match='First name: Alice'>"
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "fundamental-avatar",
"cell_type": "code",
"source": "def digit_to_word(d):\n return {0: '[ноль]', \n 1: '[один]',\n 2: '[два]', \n 3: '[три]'}.get(int(d.group(0)), d.group(0))",
"execution_count": 51,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "champion-prime",
"cell_type": "code",
"source": "re.sub(\"\\d\", digit_to_word, text)",
"execution_count": 52,
"outputs": [
{
"data": {
"text/plain": "\"\\nHello! My name is Alice. I'm [два]9. My phone is +7-9[один]9-99-9[один][два][три][два][один].\\n\\nHello, Alice. My name is Bob Smith. I'm [два]5 years old. \\nMy tel. is +7-99[один]-[один]9999[два]8[три]6. How are you?\\n\\nWhat is 45+49?\\n\""
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "alpine-maximum",
"cell_type": "code",
"source": "text = \"\"\"\nAlpha Beta\nGamma Delta\n\"\"\"",
"execution_count": 53,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "polyphonic-trance",
"cell_type": "code",
"source": "print(re.sub(r\"(\\w+)\\W(\\w+)$\", r\"\\2, \\1\", text, flags=re.MULTILINE))",
"execution_count": 60,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "\nBeta, Alpha\nDelta, Gamma\n\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "allied-vanilla",
"cell_type": "code",
"source": "import pandas as pd",
"execution_count": 61,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "manufactured-saudi",
"cell_type": "code",
"source": "dat = pd.DataFrame({'s': ['alpha', 'beta', 'gamma']})",
"execution_count": 62,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "through-combination",
"cell_type": "code",
"source": "dat['s'].str.split('[ap]', expand=True)",
"execution_count": 65,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td></td>\n <td>l</td>\n <td>h</td>\n <td></td>\n </tr>\n <tr>\n <th>1</th>\n <td>bet</td>\n <td></td>\n <td>None</td>\n <td>None</td>\n </tr>\n <tr>\n <th>2</th>\n <td>g</td>\n <td>mm</td>\n <td></td>\n <td>None</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " 0 1 2 3\n0 l h \n1 bet None None\n2 g mm None"
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "moved-clinic",
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.10",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "lesson24.ipynb",
"public": false
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment