Skip to content

Instantly share code, notes, and snippets.

@dexity
Last active October 18, 2021 21:07
Show Gist options
  • Save dexity/23232c394e707132b139fa70ccf9d801 to your computer and use it in GitHub Desktop.
Save dexity/23232c394e707132b139fa70ccf9d801 to your computer and use it in GitHub Desktop.
Memory profiling for top key4Hashes
"""Profiles BigQuery results for Top N key4Hashes."""
# pip install memory_profiler
from memory_profiler import profile
from google.cloud import bigquery as bq
QUERY_TMPL = """SELECT *
FROM `phyndexer-production.phyndexer_production.key4Grouped1`
where partId = 1
and key4Hash in (
SELECT key4Hash
FROM `phyndexer-production.phyndexer_production.key4_uniqHashes_1`
where nModelsRank <= {top_n}
)"""
def format(items):
out = []
for item in items:
out.append({'key4Hash': item.key4Hash, 'key4Count': item.key4Count, 'md5s': item.md5s})
return out
@profile
def mem_usage(n):
client = bq.Client()
q_job = client.query(QUERY_TMPL.format(top_n=n))
result = q_job.result()
output = format(result)
if __name__ == '__main__':
mem_usage(1000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment