Skip to content

Instantly share code, notes, and snippets.

@neil90
Created July 12, 2016 01:07
Show Gist options
  • Save neil90/d6c11e21b540a2d667b2ac76704856d2 to your computer and use it in GitHub Desktop.
Save neil90/d6c11e21b540a2d667b2ac76704856d2 to your computer and use it in GitHub Desktop.
from faker import Faker
import datetime
import random
import sys
import csv
startTime = datetime.datetime.now()
fake = Faker()
outfile = 'data_test.csv'
outsize = 1024 * 1024 * 1024
percentage_print = ['{:.3%}'.format(i/100) for i in range(0,105,5)]
data = []
#Create people
for i in range(15):
name = fake.name()
age = fake.random_int(min=18, max=99)
address = fake.address().replace('\n',' ')
salary = fake.random_int(min=50000, max=250000)
data.append((name, age, address, salary))
with open(outfile, 'w', newline='') as csvfile:
#Creates lists for combination for random data
amount_list = [random.randint(1,50) for i in range(30)]
qty_list = [i for i in range(15)]
purchase_date_list = [datetime.date(random.randint(2005,2015),
random.randint(1,12),
random.randint(1,28)).strftime('%Y-%m-%d')
for i in range(10000)]
product_list = [
'water', 'beer', 'chips', 'chocolate',
'pretzels', 'wine', 'fish', 'steak'
]
size = 0
csvwriter = csv.writer(csvfile)
while csvfile.tell() < outsize:
profile = random.choice(data)
product = random.choice(product_list)
amount = random.choice(amount_list)
qty = random.choice(qty_list)
purchase_date = random.choice(purchase_date_list)
row = [
profile[0], profile[1],
profile[2], profile[3],
product, amount,
qty, purchase_date
]
csvwriter.writerow(row)
save_amount = csvfile.tell()
percent_done = '{:.3%}'.format(save_amount / outsize)
#percent_done_mb = (save_amount / outsize)
#print(sizeof_fmt(save_amount))
#if percent_done % 5 == 0:
# print(percent_done)
if percent_done in percentage_print:
sys.stdout.write('\r' + str(percent_done))
sys.stdout.flush()
# #if save_amount
#p = float(csvfile.tell()) / outsize
#status = r"{0} [{1:.2%}]".format(csvfile.tell(), p)
#status = status + chr(8)*(len(status)+1)
#sys.stdout.write(status)
#print(datetime.datetime.now() - startTime)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment