Last active
September 9, 2022 07:00
-
-
Save kenji4569/b0523114f4b0b4a30a911c0e864c70a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "b028215a-2c31-41b6-9b19-72d09edeac06", | |
"metadata": {}, | |
"source": [ | |
"# An interactive tutorial to understand how MySQL database works especially for indexes." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "6b21907d-ac8c-4bf5-9f9f-92c8e568487e", | |
"metadata": {}, | |
"source": [ | |
"# Preparation" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ca58cda4-13d2-451b-a853-e8e12bf62248", | |
"metadata": {}, | |
"source": [ | |
"## Launch JupyterLab:\n", | |
"\n", | |
"```\n", | |
"pip3 install --upgrade pip\n", | |
"pip3 install jupyterlab\n", | |
"\n", | |
"cd your-workspace\n", | |
"jupyter-lab\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "aade2d34-1e2d-4d82-aee4-45818ba079b8", | |
"metadata": {}, | |
"source": [ | |
"## Install Docker Engine\n", | |
"\n", | |
"https://docs.docker.com/engine/install/" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "81ccabff-c714-4fd7-a59f-7be3700a5a71", | |
"metadata": {}, | |
"source": [ | |
"## Run MySQL container" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "16ef6d00-4452-4680-a182-4d2ecf4b962e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Run a mysql container\n", | |
"!docker run --name demo-mysql -e MYSQL_ROOT_PASSWORD=pass -p 8306:3306 --platform linux/amd64 -d mysql:5.7" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "1f999729-5c69-40d2-8fee-6720d76cc1f8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Check the docker container running\n", | |
"!docker ps -a | grep demo-mysql" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "0d6b875a-5070-4f00-82f5-a2bd3c760071", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Wait until mysql is fully up\n", | |
"!sleep 60" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "7f572f44-0c9d-4403-800f-1f455c044e16", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Create a \"database\" in mysql\n", | |
"!docker exec demo-mysql mysqladmin -u root -ppass create demo-db" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "8b3a27cf-9d6b-4962-8a87-352f64c77e96", | |
"metadata": {}, | |
"source": [ | |
"# Connect to MySQL" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f78a7b57-7d3d-4d7e-93db-305e980ccf20", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Install python package\n", | |
"from IPython.display import clear_output\n", | |
"%pip install sqlalchemy pandas pymysql\n", | |
"clear_output()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "6eb1877d-438c-479a-8457-cac8511fab2e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Connect to the \"database\"\n", | |
"import sqlalchemy as sa\n", | |
"engine = sa.create_engine('mysql+pymysql://root:pass@localhost:8306/demo-db')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "3b2ec752-90d4-436e-bd77-5cd3cef4921f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Create a table\n", | |
"engine.execute('CREATE TABLE demo_users (id INT NOT NULL PRIMARY KEY, name VARCHAR(64)) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "458e1a1d-bd4e-489c-8a0e-a56c7859c66e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# insert records to the table\n", | |
"engine.execute('INSERT demo_users VALUES (1, \"一郎\")')\n", | |
"engine.execute('INSERT demo_users VALUES (2, \"二郎\")')\n", | |
"engine.execute('INSERT demo_users VALUES (3, \"三郎\")')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "7609e91c-dde5-4a1d-ad94-f36832ae16b9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select the records\n", | |
"engine.execute('SELECT * FROM demo_users').fetchall()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "787c1502-517f-4723-bff2-5383800d984c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "1227e749-fe10-4f19-9029-d34edbd27420", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select the records with a useful tool\n", | |
"pd.read_sql_query('SELECT * FROM demo_users', engine)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "b1152e45-e5a6-4fa5-8c4b-655196fd0472", | |
"metadata": {}, | |
"source": [ | |
"# Inspect" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "3d15ff76-271a-49f5-b1de-e520988f3647", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Inspect the table definition in detail\n", | |
"print(pd.read_sql_query('SHOW CREATE TABLE demo_users', engine)['Create Table'][0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "ec3f5e5d-8883-4f1e-8d05-ba54feacf242", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Inspect how the select execution is planned\n", | |
"pd.read_sql_query('EXPLAIN SELECT * FROM demo_users', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "d94db5b1-9779-40df-ae0a-3435e86bf1c3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Primary key index is used to select a record by id\n", | |
"pd.read_sql_query('EXPLAIN SELECT * FROM demo_users WHERE id=2', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "84c23966-b3b1-422c-af48-8f751689e103", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Primary key index is still used to select a record by ids\n", | |
"pd.read_sql_query('EXPLAIN SELECT * FROM demo_users WHERE id in (2, 3)', engine)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ba680202-5675-45d2-8e30-db85bec73c32", | |
"metadata": {}, | |
"source": [ | |
"# Index" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f29b6bbd-aec9-4146-a1c3-717ff8d9ad3d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Delete all current records\n", | |
"engine.execute(f'TRUNCATE TABLE demo_users')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "5c382c62-c641-4757-b8e2-e186e0158430", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Insert record for each up to 100\n", | |
"for i in range(100):\n", | |
" print(i, end=',')\n", | |
" engine.execute(f'INSERT demo_users VALUES ({i}, \"{i}郎\")')\n", | |
"pd.read_sql_query('SELECT COUNT(*) FROM demo_users', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "55aca50c-cc60-4078-9e3f-6281efb2b6fc", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Bulk insert records up to 100000\n", | |
"values = ','.join([f'({i}, \"{i}郎\")' for i in range(100, 100000)])\n", | |
"engine.execute(f'INSERT demo_users VALUES {values}')\n", | |
"pd.read_sql_query('SELECT COUNT(*) FROM demo_users', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "05677494-26d1-4931-ad4a-94cda20bf359", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from time import perf_counter\n", | |
"\n", | |
"class catchtime:\n", | |
" def __enter__(self):\n", | |
" self.time = perf_counter()\n", | |
" return self\n", | |
"\n", | |
" def __exit__(self, type, value, traceback):\n", | |
" self.time = perf_counter() - self.time\n", | |
" self.readout = f'Time: {self.time:.3f} seconds'\n", | |
" print(self.readout)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b0229731-d8a8-4f57-bda8-f569f549ab9c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by name\n", | |
"with catchtime():\n", | |
" print(pd.read_sql_query('SELECT * FROM demo_users WHERE name = \"100郎\"', engine))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "de2a2842-db44-4048-acd4-0c771093e054", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Bulk insert records up to 200000\n", | |
"values = ','.join([f'({i}, \"{i}郎\")' for i in range(100000, 200000)])\n", | |
"engine.execute(f'INSERT demo_users VALUES {values}')\n", | |
"pd.read_sql_query('SELECT COUNT(*) FROM demo_users', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "bd6b6aa6-829e-47bb-a1aa-1e47afcf7d60", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by name\n", | |
"with catchtime():\n", | |
" print(pd.read_sql_query('SELECT * FROM demo_users WHERE name = \"100郎\"', engine))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "e9cf172e-28ae-4e99-b565-93f73beb9e4f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Bulk insert records up to 300000\n", | |
"values = ','.join([f'({i}, \"{i}郎\")' for i in range(200000, 300000)])\n", | |
"engine.execute(f'INSERT demo_users VALUES {values}')\n", | |
"pd.read_sql_query('SELECT COUNT(*) FROM demo_users', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "088ab1eb-7818-47fd-a10f-08a5e9dea8b0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by name\n", | |
"# --> bad\n", | |
"with catchtime():\n", | |
" print(pd.read_sql_query('SELECT * FROM demo_users WHERE name = \"100郎\"', engine))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "29555fee-ce54-4650-97d8-5a5f51eaca18", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by id\n", | |
"# --> good\n", | |
"with catchtime():\n", | |
" print(pd.read_sql_query('SELECT * FROM demo_users WHERE id = 100', engine))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "32a1f70b-7b79-4f72-b857-c6fb52fbfb2f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# No index is used\n", | |
"pd.read_sql_query('EXPLAIN SELECT * FROM demo_users WHERE name = \"100郎\"', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "fffe4168-2dae-4cdf-b5ae-cfdc14d9b8a9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Let's apply an index to name\n", | |
"engine.execute('CREATE INDEX ix_name ON demo_users(name)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "8c0ca94e-8157-4891-adfb-1cc60cc0c170", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Check the index is created\n", | |
"print(pd.read_sql_query('SHOW CREATE TABLE demo_users', engine)['Create Table'][0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "48778c5e-0e0d-43fc-9d6a-993d1ca04e72", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by name\n", | |
"# --> good\n", | |
"with catchtime():\n", | |
" print(pd.read_sql_query('SELECT * FROM demo_users WHERE name = \"100郎\"', engine))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b5136f26-5dbb-4ff0-8382-0d230de1f0ba", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# The index is used\n", | |
"pd.read_sql_query('EXPLAIN SELECT * FROM demo_users WHERE name = \"100郎\"', engine)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "2aeae268-5e12-46e2-906d-38df82983e38", | |
"metadata": {}, | |
"source": [ | |
"# Multiple-Column Indexes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "4b440542-832d-428d-8fc7-627e6311b365", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Create another table\n", | |
"engine.execute('CREATE TABLE demo_members (id INT NOT NULL PRIMARY KEY, first_name VARCHAR(64), last_name VARCHAR(64)) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "e8941972-159e-4c34-bf4e-231492fbdc41", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Insert records with 30 last_names x 10000 first_names\n", | |
"for j in range(30):\n", | |
" print(10000*j, '-', 10000*(j+1), end=', ')\n", | |
" values = ','.join([f'({10000*j + i}, \"{i}郎\", \"{j}山\")' for i in range(10000)])\n", | |
" engine.execute(f'INSERT demo_members VALUES {values}')\n", | |
"pd.read_sql_query('SELECT COUNT(*) FROM demo_members', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "65bdf9c0-f847-44a4-a6d3-5deddfa6371b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by last_name and first_name\n", | |
"# --> bad\n", | |
"with catchtime():\n", | |
" print(pd.read_sql_query('SELECT * FROM demo_members WHERE last_name=\"10山\" AND first_name = \"100郎\"', engine))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "8ac79c47-307d-497a-814c-33358954288d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by id\n", | |
"# --> good\n", | |
"with catchtime():\n", | |
" print(pd.read_sql_query('SELECT * FROM demo_members WHERE id=\"100100\"', engine))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "1481cf3e-be40-4ad7-bb21-2fcb87ee990a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Let's apply an index to last_name\n", | |
"engine.execute('CREATE INDEX ix_name ON demo_members(last_name)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "3c3846c1-26a3-4c16-99c7-912ed1f7213c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def query_with_explain(sql, engine):\n", | |
" with catchtime():\n", | |
" print(pd.read_sql_query(sql, engine))\n", | |
" return pd.read_sql_query(f'EXPLAIN {sql}', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "c76e5532-74f6-42f6-b790-9ae1414a92bf", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by last_name and first_name\n", | |
"# The index is used but with scanning over 10000 records\n", | |
"# --> poor\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE last_name=\"10山\" AND first_name = \"100郎\"', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "2078911b-af6b-440b-8693-16b3dcb49688", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Let's apply an index to first_name with dropping the the existing index\n", | |
"engine.execute('DROP INDEX ix_name ON demo_members')\n", | |
"engine.execute('CREATE INDEX ix_name ON demo_members(first_name)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "d6fb54ef-bcdb-457f-872b-4d7bae8872ba", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by last_name and first_name\n", | |
"# The index is used with scanning 30 records\n", | |
"# --> average\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE last_name=\"10山\" AND first_name = \"100郎\"', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b0eb518b-8984-4bc7-b9a9-1c1a033fa5fa", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Let's apply an index to last_name x first_name with dropping the existing index\n", | |
"engine.execute('DROP INDEX ix_name ON demo_members')\n", | |
"engine.execute('CREATE INDEX ix_name ON demo_members(last_name, first_name)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "c914bdf8-c8b3-4dea-aba6-7a1c955511df", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by last_name and first_name\n", | |
"# The index is used with scanning only 1 record\n", | |
"# * Note that key_len is now longer which would affect performance and memory usage\n", | |
"# --> good\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE last_name=\"10山\" AND first_name = \"100郎\"', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "da7d547d-11a7-4c0f-8922-9f158e51a2ab", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by last_name under the last_name x first_name index\n", | |
"# The index is still used but with \"possibly\" scanning over 10000 records\n", | |
"# --> poor\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE last_name=\"10山\" LIMIT 1', engine)\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE last_name=\"10山\" LIMIT 1 OFFSET 5000', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "33fc0bd5-ceac-4cd8-bb3b-d18c1e32e0cf", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by first_name under the last_name x first_name index\n", | |
"# The index is still used but with \"possibly\" scanning almost all records\n", | |
"# --> bad\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE first_name = \"100郎\" LIMIT 1', engine)\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE first_name = \"100郎\" LIMIT 1 OFFSET 20', engine)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d0a3c5cd-2473-4cb5-aa5e-b7788448eae3", | |
"metadata": {}, | |
"source": [ | |
"# Order by" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "133a7cc0-c41d-479b-a1fc-db05824737e4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record with order by id\n", | |
"# Primary key index is used\n", | |
"# --> good\n", | |
"query_with_explain('SELECT * FROM demo_members ORDER BY id DESC LIMIT 1 OFFSET 1000', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "1b5ffc52-ac6e-41c3-81b9-2ba4d4604346", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record with order by last_name\n", | |
"# The last_name x first_name index is used with scanning only 1 record\n", | |
"# --> good\n", | |
"query_with_explain('SELECT * FROM demo_members ORDER BY last_name DESC LIMIT 1 OFFSET 1000', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "0bf6fe2a-e4c2-4e7b-8dd6-bfce48597003", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record with order by first_name\n", | |
"# The last_name x first_name index is used \"possibly\" scanning almost all records and with \"filesort\"\n", | |
"# --> too bad\n", | |
"query_with_explain('SELECT * FROM demo_members ORDER BY first_name DESC LIMIT 1 OFFSET 1000', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "8c9afa93-6720-4a08-bdee-1a53911586ad", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by last_name with order by id\n", | |
"# --> poor\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE last_name=\"10山\" ORDER BY id DESC LIMIT 1 OFFSET 1000', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "d061415f-6f2d-4438-b472-c5cb6d52f3bb", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by last_name with order by first_name\n", | |
"# --> good\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE last_name=\"10山\" ORDER BY first_name DESC LIMIT 1 OFFSET 1000', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "99a07b03-3770-4c87-926c-2363c4bcb223", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Let's apply an index to last_name with dropping the the existing index\n", | |
"engine.execute('DROP INDEX ix_name ON demo_members')\n", | |
"engine.execute('CREATE INDEX ix_name ON demo_members(last_name)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f839f259-8d3d-489a-ab2d-675306c6abcf", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by last_name with order by id with specifying last_name index\n", | |
"# --> average\n", | |
"# (Since primary key index is clustered index, no filesort is necessary)\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE last_name=\"10山\" ORDER BY id DESC LIMIT 1 OFFSET 1000', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "be9fec0e-1602-4a35-a3e5-bbd5e2cfaf04", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a record by last_name with order by first_name\n", | |
"# --> poor\n", | |
"query_with_explain('SELECT * FROM demo_members WHERE last_name=\"10山\" ORDER BY first_name DESC LIMIT 1 OFFSET 1000', engine)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "fafcc869-5225-43e6-b427-977e4e9a1987", | |
"metadata": {}, | |
"source": [ | |
"# JOIN" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f83463f9-8d17-42c9-9df9-28eb82aacbc8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Create a table\n", | |
"engine.execute('CREATE TABLE demo_favorites (id INT NOT NULL PRIMARY KEY, user_id INT NOT NULL, name VARCHAR(64)) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "0ff7af67-6acd-4149-92b2-bde597fd79b8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Inspect the table definition in detail\n", | |
"print(pd.read_sql_query('SHOW CREATE TABLE demo_favorites', engine)['Create Table'][0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "d65bdd7e-97df-4fb9-8364-d137b51c8b53", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Insert records up to 1000 users where each user have 30 items\n", | |
"_id = 0\n", | |
"values = []\n", | |
"for user_id in range(1000):\n", | |
" for item_id in range(30):\n", | |
" values.append(f'({_id}, {user_id}, \"商品{item_id}\")')\n", | |
" _id += 1\n", | |
"values = ','.join(values)\n", | |
"\n", | |
"engine.execute(f'INSERT demo_favorites VALUES {values}')\n", | |
"pd.read_sql_query('SELECT COUNT(*) FROM demo_favorites', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "0465f290-f6e3-4ffb-bc7d-8674d3d0feb0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a user having favorites by inner-join\n", | |
"query_with_explain('''\n", | |
"SELECT u.*, f.* FROM demo_users AS u\n", | |
"INNER JOIN demo_favorites AS f ON u.id = f.user_id\n", | |
"WHERE\n", | |
"u.name = \"100郎\"\n", | |
"LIMIT 2\n", | |
"''', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "833de19e-c78d-4d59-9f40-e9af689ec554", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a user having favorites by inner-join\n", | |
"query_with_explain('''\n", | |
"SELECT u.*, f.* FROM demo_users AS u\n", | |
"INNER JOIN demo_favorites AS f ON u.id = f.user_id\n", | |
"WHERE\n", | |
"u.name = \"10000郎\"\n", | |
"LIMIT 2\n", | |
"''', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "e57e2265-5a49-4afd-96ba-fa57780f5e5e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select a user having favorites by left-join\n", | |
"query_with_explain('''\n", | |
"SELECT u.*, f.* FROM demo_users AS u\n", | |
"LEFT JOIN demo_favorites AS f ON u.id = f.user_id\n", | |
"WHERE\n", | |
"u.name = \"10000郎\"\n", | |
"LIMIT 2\n", | |
"''', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "a276b431-88a7-41d3-bd52-de7e2ec2bedd", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select many users with favorites\n", | |
"# Primary key index is used\n", | |
"# --> good\n", | |
"query_with_explain('''\n", | |
"SELECT u.*, f.* FROM demo_users AS u\n", | |
"INNER JOIN demo_favorites AS f ON u.id = f.user_id\n", | |
"LIMIT 2 OFFSET 10000\n", | |
"''', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "e430e930-c821-4944-8dc2-b0a1b072ef16", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select many users with favorites\n", | |
"# The ix_name index is used\n", | |
"# --> average\n", | |
"query_with_explain('''\n", | |
"SELECT u.*, f.* FROM demo_users AS u\n", | |
"INNER JOIN demo_favorites AS f ON u.name = CONCAT(f.user_id, '郎')\n", | |
"LIMIT 2 OFFSET 10000\n", | |
"''', engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f3880ff0-384d-4113-b3ae-4cd70df1b9a8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Let's drop the name index of users\n", | |
"engine.execute('DROP INDEX ix_name ON demo_users')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "1ab3fd6f-d057-4ee7-a715-cd04b9633dc9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Select many users with favorites\n", | |
"# --> too bad\n", | |
"query_with_explain('''\n", | |
"SELECT u.*, f.* FROM demo_users AS u\n", | |
"INNER JOIN demo_favorites AS f ON u.name = concat(f.user_id, '郎')\n", | |
"LIMIT 2 OFFSET 10000\n", | |
"''', engine)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "b440133e-3557-4cbd-ab49-bc0817c3899d", | |
"metadata": {}, | |
"source": [ | |
"# Clean up" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "fac7edb8-a9fa-4a5b-b4c9-9719f1174ab8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Drop tables\n", | |
"engine.execute('DROP TABLE demo_users')\n", | |
"engine.execute('DROP TABLE demo_members')\n", | |
"engine.execute('DROP TABLE demo_favorites')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "5cc8b22b-4548-4ad4-9f1e-724c20f35ae2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Remove the mysql container\n", | |
"!docker rm -f demo-mysql" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "8ef8e595-12b0-4881-9757-123ad2c085f1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Uninstall Docker Engine if necessary\n", | |
"# Uninstall JupyterLab if necessary" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment