Skip to content

Instantly share code, notes, and snippets.

@lotas
Created June 14, 2013 10:57
Show Gist options
  • Save lotas/5781019 to your computer and use it in GitHub Desktop.
Save lotas/5781019 to your computer and use it in GitHub Desktop.
>python fill_table.py database table_name 20 [ Really dump script to fill the void of mysql tables. Will create 20 records with in *database.table_name* table with random data (depending on column type) ]
import re
import sys
import random
import string
import numpy as np
import MySQLdb
import MySQLdb.cursors
DB_HOST = "localhost"
DB_USER = "root"
DB_PASSWORD = "root"
if len(sys.argv) < 3:
print "Not enough arguments."
print sys.argv[0], " db-name table-name [number-of-records]"
print ""
print "\t Generating random data for given table"
print "\t Please specify table-name parameter and an optional number-of-records (default 20)"
print ""
exit(1)
db_name = sys.argv[1]
table_name = sys.argv[2]
records = int(sys.argv[3]) if len(sys.argv) == 4 else 20
# connect to db
db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASSWORD, db=db_name,
cursorclass=MySQLdb.cursors.DictCursor)
cur = db.cursor()
# find out table structure
cur.execute('SELECT * FROM information_schema.columns WHERE table_name=%s', table_name)
columns = cur.fetchall()
cur.close()
if len(columns) == 0:
raise Exception, "Zero columns found for table %s" % table_name
def generate_string(maxChars):
return ''.join(random.choice(string.ascii_letters+' ') for x in xrange(random.randrange(maxChars//2, maxChars)))
def generate_data_by_def(col):
value = ''
if col['EXTRA'].startswith('auto_increment'): return None # auto incr key
if col['COLUMN_DEFAULT'] is not None: return col['COLUMN_DEFAULT']
if col['DATA_TYPE'] == 'int': return random.randrange(0, 2**col['NUMERIC_PRECISION']-1)
if col['DATA_TYPE'].startswith('varchar'): return generate_string(col['CHARACTER_OCTET_LENGTH'])
if col['DATA_TYPE'].startswith('enum'): return random.choice(col['COLUMN_TYPE'][5:-1].split(',')).strip('"\'')
if col['DATA_TYPE'] == 'float': return random.random()
return value
def generate_record(columns):
"""
Generate single row values - for each column
"""
row = dict()
for x in columns:
value = generate_data_by_def(x)
row[x['COLUMN_NAME']] = value
return row
col_cnt = len(columns)
vals = ['%s'] * col_cnt
vals_str = ','.join(vals)
for i in range(records):
cur = db.cursor()
row = generate_record(columns)
sql = "INSERT INTO %s (%s) VALUES (%s)" % (table_name, ','.join(row.keys()), vals_str)
cur.execute(sql, row.values())
cur.close()
print "Inserted %s" % (i+1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment