Skip to content

Instantly share code, notes, and snippets.

Last active January 24, 2016 01:08
What would you like to do?
Generate a Bulk of Data into Postgre
require 'pg'
conn = 'mydb',
host: 'localhost',
user: 'testuser')
# create the first table (cup_matches)
conn.exec("CREATE TABLE cup_matches (
mid serial PRIMARY KEY,
round varchar(20),
year integer,
num_ratings integer,
rating real);")
# create the second table (played_in)
conn.exec("create table played_in(
mid integer references cup_matches(mid),
name varchar(80),
year integer,
position integer,
PRIMARY KEY(mid,name));")
# generate data for cup_matches
# array for match rounds
round_array = ['32th', '16th', '8th', 'quarter_final', 'semi_final', 'final']
for i in (1..2680) do
round_i = rand(0..5)
year_value = rand(2000..2015)
num_ratings_value = rand(1..205)
rating_value = rand(1.0..5.0)
# insert this row
conn.prepare("ins#{i}" ,"INSERT INTO cup_matches(round, year, num_ratings,
rating) VALUES ($1, $2, $3, $4)")
conn.exec_prepared("ins#{i}", [round_array[round_i], year_value,
num_ratings_value, rating_value])
# generate data for played_in
# array of random names
name_array = ['Shinichi', 'Nitrogen', 'Yan', 'Itachi', 'Kurosan', 'Tamashii',
'Ben', 'Owen', 'Ror', 'Bo', 'Yama', 'Kaminari',
'Kintama', 'Sasuke', 'Aasare', 'Leila', 'Xou', 'Xubuntu',
'Gnomy', 'Lolly', 'Gintoki', 'Kogami', 'Kurotora', 'Matz',
'Yuki', 'Yumi', 'Kakashi', 'Naruto', 'K.O.', 'Gohan',
'Kagura', 'Kuroon', 'Ran', 'Aurora', 'Conan', 'Kudo',
'Lol', 'Yami', 'Sabasa', 'Namik', 'Goku', 'Tintin',
'Gara', 'Aiomi']
for i in (1..58960) do
while true
name_i = rand(0..43)
year_value = rand(2000..2015)
position_value = rand(1..11)
mid_value = rand(1..2680)
# SELECT query that searches for the pair (mid_value, name_array[name_i])
result = conn.exec( "SELECT * FROM played_in WHERE mid=#{mid_value} AND
name='#{name_array[name_i]}' " )
# loop until we have result array empty
# i.e., (mid_value, name_array[name_i]) is not used
break unless result.any?
conn.prepare("inse#{i}" ,"INSERT INTO played_in(mid, name, year, position)
VALUES ($1, $2, $3, $4)")
conn.exec_prepared("inse#{i}", [mid_value, name_array[name_i], year_value,
# this is where we make 118 rows have the name 'pele'
for i in (1..118) do
while true
mid_value = rand(1..2680)
# make sure that we don't have a (mid_value, 'pele') pair in the database
result = conn.exec( "SELECT * FROM played_in WHERE mid='#{mid_value}' AND
name='pele' " )
# break if mid_value is valid to use
break unless result.any?
# retrieve all rows that have mid_value
result = conn.exec( "SELECT * FROM played_in WHERE mid='#{mid_value}'" )
# result is an array of rows
# retrieve the name (row[1]) from the first row of the result (result[0])
target_name = result.getvalue(0, 1)
# update this row to have name='pele'
conn.exec_params("UPDATE played_in SET name = $1 WHERE mid=#{mid_value} AND
name='#{target_name}'", ['pele'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment