Skip to content

Instantly share code, notes, and snippets.

@evjeny
Created August 17, 2022 04:37
Show Gist options
  • Save evjeny/3954b3c7438300b194a2750c5b05b822 to your computer and use it in GitHub Desktop.
Save evjeny/3954b3c7438300b194a2750c5b05b822 to your computer and use it in GitHub Desktop.
Different ways to check if entry is missing in paired table
import sqlite3
import typing
def create_docs_table(
connection: sqlite3.Connection,
ids: list[int]
):
cursor = connection.cursor()
cursor.execute("DROP TABLE IF EXISTS docs")
cursor.execute(
"""
CREATE TABLE docs (
doc_id TEXT,
stupid_num INTEGER
);
"""
)
cursor.executemany(
"INSERT INTO docs (doc_id, stupid_num) VALUES (?, ?)",
[
(f"id{i}", i+10)
for i in ids
]
)
connection.commit()
def create_doc_items_table(
connection: sqlite3.Connection,
ids: list[int]
):
cursor = connection.cursor()
cursor.execute("DROP TABLE IF EXISTS doc_items")
cursor.execute(
"""
CREATE TABLE doc_items (
doc_id TEXT,
value REAL
);
"""
)
cursor.executemany(
"INSERT INTO doc_items (doc_id, value) VALUES (?, ?)",
[
(f"id{i}", i * 3.14)
for i in ids
]
)
connection.commit()
def get_docs_no_pair(connection: sqlite3.Connection) -> int:
cursor = connection.cursor()
return cursor.execute(
"""
SELECT COUNT(*) FROM docs
WHERE doc_id NOT IN (SELECT DISTINCT doc_id FROM doc_items)
"""
).fetchone()[0]
def get_doc_items_no_pair(connection: sqlite3.Connection) -> int:
cursor = connection.cursor()
return cursor.execute(
"""
SELECT COUNT(*) FROM doc_items
WHERE doc_id NOT IN (SELECT DISTINCT doc_id FROM docs)
"""
).fetchone()[0]
def get_docs_no_pair_join(connection: sqlite3.Connection) -> int:
cursor = connection.cursor()
return cursor.execute(
"""
SELECT COUNT(*) FROM docs
LEFT JOIN doc_items ON docs.doc_id = doc_items.doc_id
WHERE doc_items.doc_id IS NULL
"""
).fetchone()[0]
def get_doc_items_no_pair_join(connection: sqlite3.Connection) -> int:
cursor = connection.cursor()
return cursor.execute(
"""
SELECT COUNT(*) FROM doc_items
LEFT JOIN docs ON docs.doc_id = doc_items.doc_id
WHERE docs.doc_id IS NULL
"""
).fetchone()[0]
def test_ids_intersection(
connection: sqlite3.Connection,
ids_docs: list[int],
ids_doc_items: list[int],
docs_no_pair_counter: typing.Callable[[sqlite3.Connection], int],
doc_items_no_pair_counter: typing.Callable[[sqlite3.Connection], int]
):
docs_no_pair = len([
i for i in ids_docs
if i not in ids_doc_items
])
doc_items_no_pair = len([
i for i in ids_doc_items
if i not in ids_docs
])
create_docs_table(connection, ids=ids_docs)
create_doc_items_table(connection, ids=ids_doc_items)
if docs_no_pair_counter(connection) != docs_no_pair or \
doc_items_no_pair_counter(connection) != doc_items_no_pair:
raise Exception(f"Not matched for doc_ids={ids_docs}, doc_item_ids={ids_doc_items}")
else:
print("Passed test")
def main():
connection = sqlite3.connect("test_check_queries.db")
for ids_docs, ids_doc_items in [
(
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
[7, 8, 9, 10, 11]
),
([1], [2])
]:
print("vanilla test")
test_ids_intersection(
connection,
ids_docs=ids_docs,
ids_doc_items=ids_doc_items,
docs_no_pair_counter=get_docs_no_pair,
doc_items_no_pair_counter=get_doc_items_no_pair
)
print("join test")
test_ids_intersection(
connection,
ids_docs=ids_docs,
ids_doc_items=ids_doc_items,
docs_no_pair_counter=get_docs_no_pair_join,
doc_items_no_pair_counter=get_doc_items_no_pair_join
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment