leontrolski/sqlalchemy-joins.sql

## sqlalchemy-joins.sql
-- given tables a, b
-- equivalent SQLAlchemy models A, B
-- a has many b, joined on a_id
-- a has l non-id fields, b has m

a_id | x1 | x2 | x... | xl

b_id | a_id | y1 | y2 | y... | ym

-- let's do a joined load:

session.query(A).options(joinedload(B))

-- does:

SELECT a.a_id, a.x1, a.x2 ..., b.b_id, b.a_id, b.y1, ...
FROM a
JOIN b on a.a_id = b.b_id

-- number of queries: 1

-- query returns data:

a_id | x1 | x2 | x... | xl | b_id | a_id | y1 | y2 | y... | ym
_some data_

-- if `l` is large, let's say 1kb per row
-- and we find 10 `a`s matching our query
-- and there are 100 `b`s per `a`
-- we end up unnecessarily sending
-- 1kb * (100 - 1) * 10 = 990kb of data over the wire
-- (that we discard when SQLAlchemy nests our data Python-side)


-- to prevent this, we do:

session.query(A).options(selectin(B))

-- does

SELECT a.a_id, a.x1, a.x2 ...,
FROM a

SELECT b.b_id, b.a_id, b.y1, ...
WHERE b.a_id in (the a_ids from the previous query)

-- number of queries: 1 + 1 = 2
-- or just number of queries = number of joins

-- queries return

a_id | x1 | x2 | x... | xl
_some data_

b_id | a_id | y1 | y2 | y... | ym
_some data_
	-- given tables a, b
	-- equivalent SQLAlchemy models A, B
	-- a has many b, joined on a_id
	-- a has l non-id fields, b has m

	a_id \| x1 \| x2 \| x... \| xl

	b_id \| a_id \| y1 \| y2 \| y... \| ym

	-- let's do a joined load:

	session.query(A).options(joinedload(B))

	-- does:

	SELECT a.a_id, a.x1, a.x2 ..., b.b_id, b.a_id, b.y1, ...
	FROM a
	JOIN b on a.a_id = b.b_id

	-- number of queries: 1

	-- query returns data:

	a_id \| x1 \| x2 \| x... \| xl \| b_id \| a_id \| y1 \| y2 \| y... \| ym
	_some data_

	-- if `l` is large, let's say 1kb per row
	-- and we find 10 `a`s matching our query
	-- and there are 100 `b`s per `a`
	-- we end up unnecessarily sending
	-- 1kb * (100 - 1) * 10 = 990kb of data over the wire
	-- (that we discard when SQLAlchemy nests our data Python-side)


	-- to prevent this, we do:

	session.query(A).options(selectin(B))

	-- does

	SELECT a.a_id, a.x1, a.x2 ...,
	FROM a

	SELECT b.b_id, b.a_id, b.y1, ...
	WHERE b.a_id in (the a_ids from the previous query)

	-- number of queries: 1 + 1 = 2
	-- or just number of queries = number of joins

	-- queries return

	a_id \| x1 \| x2 \| x... \| xl
	_some data_

	b_id \| a_id \| y1 \| y2 \| y... \| ym
	_some data_