Skip to content

Instantly share code, notes, and snippets.

@ryantuck
Created November 27, 2018 23:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryantuck/14931c852d399d7d41f1dc09787a6dfc to your computer and use it in GitHub Desktop.
Save ryantuck/14931c852d399d7d41f1dc09787a6dfc to your computer and use it in GitHub Desktop.
Configurable python code to generate random SQL tables and indexes using pure SQL.
import json
import random
def _gen_ids(b,e):
return f'generate_series({b}, {e})::bigint'
def _gen_float():
return 'random()'
def _gen_int(b=0,e=100000):
return f'(random()*{e})::int + {b}'
def _gen_str():
return 'substring(md5(random()::text), (random()*4)::int+1, (random()*20)::int+5)'
def _gen_date():
return "('2000-01-01'::timestamp + random() * '20 years'::interval)::date"
def _gen_timestamp():
return "'2000-01-01'::timestamp + random() * '20 years'::interval"
def _gen_bool():
return '(random() < 0.5)'
def _gen_select_as(field_name, fn):
return f'{fn()} as {field_name}'
def _gen_set_f(field_name, fn):
return f'{field_name} = {fn()}'
def _gen_create_index(table_name, field_name):
return f'create index on junk.{table_name} ({field_name});'
def _gen_on_conflict_do_update(ff_map):
updates = [
_gen_set_f(field_name, fn)
for field_name, fn in ff_map.items()
]
updates_str = ',\n\t'.join(updates)
on_conflict_str = f'on conflict (id) do update\nset\n\t{updates_str}'
return on_conflict_str
def _gen_selects(ff_map, b, e):
id_select = f'\t{_gen_ids(b, e)} as id'
selects = [id_select] + [
_gen_select_as(field_name, fn)
for field_name, fn in ff_map.items()
]
selects_str = ',\n\t'.join(selects)
return f'select \n{selects_str}'
def _pick_fn():
fns = [
_gen_float,
_gen_int,
_gen_str,
_gen_date,
_gen_timestamp,
_gen_bool,
]
return random.choice(fns)
def gen_random_field_fn_map():
max_fields = 10
return {f'f_{i}': _pick_fn() for i in range(random.randint(1,max_fields))}
def pick_random_fields_from_field_fn_map(ff_map):
fields = list(ff_map.keys())
random.shuffle(fields)
return fields[:random.randint(0, len(fields)-1)]
def gen_create_table_as_stmt(table_name, ff_map, n_rows):
selects = _gen_selects(ff_map, 1, n_rows)
create_stmt = f'create table junk.{table_name} as\n{selects}\n;'
pkey_stmt = f'alter table junk.{table_name} add primary key (id);'
owner_stmt = f'alter table junk.{table_name} owner to reporting;'
stmts = [create_stmt, pkey_stmt, owner_stmt]
return '\n'.join(stmts)
def gen_create_index_stmts(table_name, index_fields):
index_stmts = [_gen_create_index(table_name, f) for f in index_fields]
return '\n'.join(index_stmts)
def gen_insert_into_stmt(table_name, ff_map, b, e):
selects = _gen_selects(ff_map, b, e)
on_conflict_str = _gen_on_conflict_do_update(ff_map)
return f'insert into junk.{table_name}\n{selects}\n{on_conflict_str}\n;'
def gen_recreate_schema_stmt():
stmts = [
'drop schema if exists junk cascade;',
'create schema junk;',
'alter schema junk owner to reporting;',
]
return '\n'.join(stmts)
def generate_config(n_tables, min_rows, max_rows):
table_configs = []
for i in range(n_tables):
ff_map = gen_random_field_fn_map()
idx_fields = pick_random_fields_from_field_fn_map(ff_map)
table_name = f't_{i}'
n_rows = random.randint(min_rows, max_rows)
table_configs.append({
'field_fn_map': ff_map,
'index_fields': idx_fields,
'table_name': table_name,
'n_rows': n_rows,
})
return table_configs
def write_config_to_file(config, filepath):
with open(filepath, 'w') as f:
json.dump(config, f, indent=4, sort_keys=True, default=lambda x:str(x))
def read_config_from_file(filepath):
with open(filepath) as f:
return json.load(f)
def mini_test():
ff_map = {
'd': _gen_date,
'e': _gen_bool,
'f': _gen_timestamp,
'g': _gen_str,
}
ss = gen_recreate_schema_stmt()
crs = gen_create_table_as_stmt('my_table', ff_map, 1000000)
print(ss)
print(crs)
def generate_create_stmts_from_config(config):
stmts = []
for cfg in config:
stmt = gen_create_table_as_stmt(
table_name=cfg['table_name'],
ff_map=cfg['field_fn_map'],
n_rows=cfg['n_rows'],
)
stmts.append(stmt)
return '\n\n'.join(stmts)
def generate_create_index_stmts_from_config(config):
stmts = []
for cfg in config:
stmt = gen_create_index_stmts(
table_name=cfg['table_name'],
index_fields=cfg['index_fields'],
)
stmts.append(stmt)
return '\n\n'.join(stmts)
def generate_insert_stmts_from_config(config):
stmts = []
for cfg in config:
stmt = gen_insert_into_stmt(
table_name=cfg['table_name'],
ff_map=cfg['field_fn_map'],
b=1,
e=random.randint(1, cfg['n_rows']),
)
stmts.append(stmt)
return '\n\n'.join(stmts)
def main():
config = generate_config(50, 100, 1000000)
write_config_to_file(config, 'config.json')
cr = generate_create_stmts_from_config(config)
idx = generate_create_index_stmts_from_config(config)
ins = generate_insert_stmts_from_config(config)
print(cr)
print(idx)
print(ins)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment