Skip to content

Instantly share code, notes, and snippets.

@bitner
Last active February 7, 2022 21:19
Show Gist options
  • Save bitner/bc6bc22b0334796ff8b6d7ea9808bc24 to your computer and use it in GitHub Desktop.
Save bitner/bc6bc22b0334796ff8b6d7ea9808bc24 to your computer and use it in GitHub Desktop.
import psycopg
import os
import time
# Get a an iterator to use for sample of half million ids cached to a file
def sample_ids():
file='sampleids.csv'
if not os.path.exists(file):
with open(file, 'wb') as f:
with psycopg.connect(os.environ.get('PGURL')) as conn:
with conn.cursor() as cur:
with cur.copy("""
COPY (
SELECT id
FROM images TABLESAMPLE SYSTEM (5)
LIMIT 500000
) TO STDOUT;
""") as copy:
for data in copy:
f.write(data)
yield data
else:
for line in open(file, 'r'):
yield line
def count_from_ids(showexplain=False):
start = time.time()
print(f"Started {time.time() - start}")
with psycopg.connect(os.environ.get('PGURL')) as conn:
with conn.cursor() as cur:
print(f"Connection Started {time.time() - start}")
cur.execute("""
SET WORK_MEM TO '100MB';
SET TEMP_BUFFERS TO '100MB';
CREATE TEMP TABLE lookup_ids (id BIGINT PRIMARY KEY) ON COMMIT DROP;
""")
print(f"Created Temp Table {time.time() - start}")
with cur.copy("""COPY lookup_ids FROM STDIN WITH CSV""") as copy:
for id in sample_ids():
copy.write_row((int(id),))
cur.execute("""
ANALYZE lookup_ids;
""")
print(f"Copied data to temp table {time.time() - start}")
q="""
SELECT
substring(quadkey FOR 4),
count(*),
min(image_dt),
max(image_dt)
FROM images JOIN lookup_ids USING (id)
GROUP BY 1 ORDER BY 2 DESC
;
"""
if showexplain:
print('-----------------------EXPLAIN------------------------------')
cur.execute(f"EXPLAIN (ANALYZE,BUFFERS) {q}")
for record in cur:
print(record[0])
print(f"Ran Explain {time.time() - start}")
print('-------------------END EXPLAIN------------------------------')
cur.execute(q)
print(f"Ran Aggregate {time.time() - start}")
print("quadkey\tcount\tmindate\tmaxdate")
for record in cur:
print(f"{record[0]}\t{record[1]}\t{record[2].isoformat()}\t{record[3].isoformat()}")
conn.commit()
print(f"Done {time.time() - start}")
count_from_ids(True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment