Created
March 1, 2023 07:54
-
-
Save strayge/68df810813a13d2c96e265c3347e06bb to your computer and use it in GitHub Desktop.
python sqlite performance test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import sqlite3\n", | |
"import os\n", | |
"import json\n", | |
"from time import time\n", | |
"from typing import Iterable\n", | |
"from sqlalchemy import create_engine, Column, Integer, String, Identity\n", | |
"from sqlalchemy.orm import DeclarativeBase, MappedAsDataclass, Mapped, mapped_column, Session, declarative_base\n", | |
"\n", | |
"\n", | |
"def read_json_by_line(filename: str) -> Iterable[dict]:\n", | |
" with open(filename, 'r') as f:\n", | |
" for line in f:\n", | |
" if not line:\n", | |
" continue\n", | |
" yield json.loads(line) # type: ignore[no-any-return]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total time: 7.99 seconds, 516151 rows\n" | |
] | |
} | |
], | |
"source": [ | |
"\"\"\"Raw sqlite3\"\"\"\n", | |
"\n", | |
"if os.path.exists('data.db'):\n", | |
" os.remove('data.db')\n", | |
"\n", | |
"conn = sqlite3.connect('data.db')\n", | |
"c = conn.cursor()\n", | |
"c.execute('''\n", | |
" CREATE TABLE packages (\n", | |
" name text PRIMARY KEY, \n", | |
" name_lower text, \n", | |
" version text, \n", | |
" upload_time integer, \n", | |
" home_page text, \n", | |
" summary text, \n", | |
" summary_lower text\n", | |
" )\n", | |
"'''\n", | |
")\n", | |
"\n", | |
"t2 = time()\n", | |
"for i, data in enumerate(read_json_by_line(os.path.join('data', 'metadata_lines.json'))):\n", | |
" name = data['name']\n", | |
" summary = data.get('summary') or ''\n", | |
" c.execute(\n", | |
" '''\n", | |
" INSERT INTO packages (\n", | |
" name, name_lower, version, upload_time, home_page, summary, summary_lower\n", | |
" )\n", | |
" VALUES (\n", | |
" ?, ?, ?, ?, ?, ?, ?\n", | |
" )\n", | |
" ''', \n", | |
" (\n", | |
" name, \n", | |
" name.lower(), \n", | |
" data['version'], \n", | |
" data['upload_time'], \n", | |
" data['home_page'], \n", | |
" summary, \n", | |
" summary.lower(), \n", | |
" )\n", | |
" )\n", | |
"conn.commit()\n", | |
"t3 = time()\n", | |
"\n", | |
"print(f'Total time: {t3 - t2:.2f} seconds, {i} rows')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Inserted packages in 42.72 seconds, 516151 rows\n" | |
] | |
} | |
], | |
"source": [ | |
"\"\"\"SQLAlchemy, dataclass, no batch\"\"\"\n", | |
"\n", | |
"if os.path.exists('data.db'):\n", | |
" os.remove('data.db')\n", | |
"\n", | |
"engine = create_engine('sqlite:///data.db')\n", | |
"\n", | |
"class BaseModel(MappedAsDataclass, DeclarativeBase):\n", | |
" pass\n", | |
"\n", | |
"class Package(BaseModel):\n", | |
" __tablename__ = 'packages'\n", | |
"\n", | |
" name: Mapped[str] = mapped_column(primary_key=True)\n", | |
" name_lower: Mapped[str]\n", | |
" version: Mapped[str]\n", | |
" upload_time: Mapped[str]\n", | |
" home_page: Mapped[str]\n", | |
" summary: Mapped[str]\n", | |
" summary_lower: Mapped[str]\n", | |
"\n", | |
"BaseModel.metadata.create_all(engine)\n", | |
"\n", | |
"t1 = time()\n", | |
"with Session(engine) as session:\n", | |
" for i, data in enumerate(read_json_by_line(os.path.join('data', 'metadata_lines.json'))):\n", | |
" name = data['name']\n", | |
" summary = data.get('summary') or ''\n", | |
" session.add(Package(\n", | |
" name=name, \n", | |
" name_lower=name.lower(), \n", | |
" version=data['version'], \n", | |
" upload_time=data['upload_time'], \n", | |
" home_page=data['home_page'] or '', \n", | |
" summary=summary, \n", | |
" summary_lower=summary.lower(), \n", | |
" ))\n", | |
" session.commit()\n", | |
"t2 = time()\n", | |
"print(f'Inserted packages in {t2 - t1:.2f} seconds, {i} rows')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Inserted packages in 31.73 seconds, 516151 rows\n" | |
] | |
} | |
], | |
"source": [ | |
"\"\"\"SQLAlchemy, dataclass, batch\"\"\"\n", | |
"\n", | |
"if os.path.exists('data.db'):\n", | |
" os.remove('data.db')\n", | |
"\n", | |
"engine = create_engine('sqlite:///data.db')\n", | |
"\n", | |
"class BaseModel(MappedAsDataclass, DeclarativeBase):\n", | |
" pass\n", | |
"\n", | |
"class Package(BaseModel):\n", | |
" __tablename__ = 'packages'\n", | |
"\n", | |
" name: Mapped[str] = mapped_column(primary_key=True)\n", | |
" name_lower: Mapped[str]\n", | |
" version: Mapped[str]\n", | |
" upload_time: Mapped[str]\n", | |
" home_page: Mapped[str]\n", | |
" summary: Mapped[str]\n", | |
" summary_lower: Mapped[str]\n", | |
"\n", | |
"BaseModel.metadata.create_all(engine)\n", | |
"\n", | |
"with open(os.path.join('data', 'metadata.json'), 'r') as f:\n", | |
" metadata = json.load(f)\n", | |
"\n", | |
"t1 = time()\n", | |
"with Session(engine) as session:\n", | |
" packages = []\n", | |
" i = 0\n", | |
" for i, data in enumerate(metadata.values()):\n", | |
" name = data['name']\n", | |
" summary = data.get('summary') or ''\n", | |
" p = Package(\n", | |
" name=name, \n", | |
" name_lower=name.lower(), \n", | |
" version=data['version'], \n", | |
" upload_time=data['upload_time'], \n", | |
" home_page=data['home_page'] or '', \n", | |
" summary=summary, \n", | |
" summary_lower=summary.lower(), \n", | |
" )\n", | |
" packages.append(p)\n", | |
" if i % 1000 == 0:\n", | |
" session.add_all(packages)\n", | |
" session.flush()\n", | |
" packages = []\n", | |
"\n", | |
" session.commit()\n", | |
"t2 = time()\n", | |
"print(f'Inserted packages in {t2 - t1:.2f} seconds, {i} rows')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Read packages in 3.20 seconds\n", | |
"Inserted packages in 38.20 seconds\n", | |
"Total time: 41.39 seconds, 516151 rows\n" | |
] | |
} | |
], | |
"source": [ | |
"\"\"\"SQLAlchemy, no dataclass, batch\"\"\"\n", | |
"\n", | |
"if os.path.exists('data.db'):\n", | |
" os.remove('data.db')\n", | |
"\n", | |
"engine = create_engine('sqlite:///data.db')\n", | |
"\n", | |
"Base = declarative_base()\n", | |
"\n", | |
"class Package(Base):\n", | |
" __tablename__ = 'packages'\n", | |
" name = Column(String, primary_key=True)\n", | |
" name_lower = Column(String)\n", | |
" version = Column(String)\n", | |
" upload_time = Column(String)\n", | |
" home_page = Column(String)\n", | |
" summary = Column(String)\n", | |
" summary_lower = Column(String)\n", | |
"\n", | |
"BaseModel.metadata.create_all(engine)\n", | |
"\n", | |
"t1 = time()\n", | |
"with open(os.path.join('data', 'metadata.json'), 'r') as f:\n", | |
" metadata = json.load(f)\n", | |
"\n", | |
"t2 = time()\n", | |
"with Session(engine) as session:\n", | |
" packages = []\n", | |
" for i, data in enumerate(metadata.values()):\n", | |
" name = data['name']\n", | |
" summary = data.get('summary') or ''\n", | |
" packages.append(Package(\n", | |
" name=name, \n", | |
" name_lower=name.lower(), \n", | |
" version=data['version'], \n", | |
" upload_time=data['upload_time'], \n", | |
" home_page=data['home_page'] or '', \n", | |
" summary=summary, \n", | |
" summary_lower=summary.lower(), \n", | |
" ))\n", | |
" if i % 10000 == 0:\n", | |
" session.add_all(packages)\n", | |
" session.flush()\n", | |
" packages = []\n", | |
" if packages:\n", | |
" session.add_all(packages)\n", | |
" session.flush()\n", | |
" packages = []\n", | |
" session.commit()\n", | |
"t3 = time()\n", | |
"\n", | |
"print(f'Read packages in {t2 - t1:.2f} seconds')\n", | |
"print(f'Inserted packages in {t3 - t2:.2f} seconds')\n", | |
"print(f'Total time: {t3 - t1:.2f} seconds, {i} rows')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total time: 23.40 seconds, 500000 rows\n" | |
] | |
} | |
], | |
"source": [ | |
"\"\"\"SQLAlchemy, example from their performance test\"\"\"\n", | |
"\n", | |
"Base = declarative_base()\n", | |
"\n", | |
"\n", | |
"class Customer(Base):\n", | |
" __tablename__ = \"customer\"\n", | |
" id = Column(Integer, Identity(), primary_key=True)\n", | |
" name = Column(String(255))\n", | |
" description = Column(String(255))\n", | |
"\n", | |
"engine = create_engine(\"sqlite:///:memory:\")\n", | |
"Base.metadata.drop_all(engine)\n", | |
"Base.metadata.create_all(engine)\n", | |
"\n", | |
"t1 = time()\n", | |
"session = Session(bind=engine)\n", | |
"n = 500000\n", | |
"for chunk in range(0, n, 1000):\n", | |
" session.add_all(\n", | |
" [\n", | |
" Customer(\n", | |
" id=i + 1,\n", | |
" name=\"customer name %d\" % i,\n", | |
" description=\"customer description %d\" % i,\n", | |
" )\n", | |
" for i in range(chunk, chunk + 1000)\n", | |
" ]\n", | |
" )\n", | |
" session.flush()\n", | |
"session.commit()\n", | |
"\n", | |
"t2 = time()\n", | |
"print(f'Total time: {t2 - t1:.2f} seconds, {n} rows')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.11.1 ('venv': venv)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.1" | |
}, | |
"orig_nbformat": 4, | |
"vscode": { | |
"interpreter": { | |
"hash": "4f283fdae1340cad2e43ca9876253c01895e38babc9160fc03a0f6de2150e0d8" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment