Skip to content

Instantly share code, notes, and snippets.

@VolkmarR
Created November 1, 2020 19:18
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VolkmarR/c4dba35037e2a4e438189ec90269bcbc to your computer and use it in GitHub Desktop.
Save VolkmarR/c4dba35037e2a4e438189ec90269bcbc to your computer and use it in GitHub Desktop.
Script for generating test data for DuckDB article (https://dev.to/volkmarr/duckdb-an-embedded-db-for-data-wrangling-4hfm)
import csv
from faker import Faker
# size definition
countPersons = 10000
countBooks = 100000
countOrderItems = 1000000
fake = Faker()
# create CSV files
with open('testData\\persons.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["id", "name", "street", "city", "email", "category"])
for id in range(1, countPersons + 1):
writer.writerow([id, fake.name(), fake.street_address(), fake.city(), fake.email(), fake.random_int(1, 10)])
with open('testData\\books.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["id", "isbn", "name", "price"])
for id in range(1, countBooks + 1):
writer.writerow([id, fake.isbn13(), fake.catch_phrase(), fake.random_int(1000, 7500) / 100])
with open('testData\\orderItems.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["id", "person_id", "book_id", "quantity", "date"])
for id in range(1, countOrderItems + 1):
writer.writerow([id, fake.random_int(1, countPersons), fake.random_int(1, countBooks), fake.random_int(1, 5), fake.date_this_year()])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment