Skip to content

Instantly share code, notes, and snippets.

@alejandro-du
Last active January 15, 2024 23:20
Show Gist options
  • Save alejandro-du/6be74d3136d17a9ec5458a4d13d523b6 to your computer and use it in GitHub Desktop.
Save alejandro-du/6be74d3136d17a9ec5458a4d13d523b6 to your computer and use it in GitHub Desktop.
Insert demo data in batches into MariaDB databases using Python
import random
import os
import subprocess
from datetime import datetime, timedelta
# Function to generate a random date within a given range
def random_date(start, end):
return start + timedelta(days=random.randint(0, int((end - start).days)))
# Function to execute a given SQL command using MariaDB
def execute_sql(sql):
# Write the SQL command to a temporary file
with open("temp.sql", "w") as file:
file.write(sql)
# Execute the SQL command using the MariaDB client
subprocess.run(["mariadb", "-h", "127.0.0.1", "-P", "3307", "-u", "admin", "-pC0lumnStore!", "-e", "source temp.sql"])
# Remove the temporary file
os.remove("temp.sql")
print("Generating and inserting data...")
# Total number of rows to be inserted
total_rows = 4000000
# Number of rows to insert in each batch
batch_size = 10000
# Possible values for the 'reason' column and their associated weights for random selection
reasons = ["Consultation", "Follow-up", "Preventive", "Chronic"]
reason_weights = [0.5, 0.15, 0.25, 0.1]
# Possible values for the 'status' column and their associated weights for random selection
statuses = ["Scheduled", "Canceled", "Completed", "No Show"]
status_weights = [0.1, 0.15, 0.7, 0.05]
# Possible values for the 'doctor_id' column and their associated weights for random selection
doctors = [1, 2, 3]
doctors_weights = [0.4, 0.35, 0.25]
# List of patient names
names = [f"Patient_{i}" for i in range(total_rows)]
# Insert data in batches
for batch_start in range(0, total_rows, batch_size):
batch_values = []
# Generate data for each row in the batch
for i in range(batch_start, min(batch_start + batch_size, total_rows)):
name = names[i]
phone_number = f"{random.randint(100, 999)}-{random.randint(100, 999)}-{random.randint(1000, 9999)}"
email = f"patient_{i}@example.com"
time = random_date(datetime(2023, 1, 1), datetime(2024, 1, 1)).strftime("%Y-%m-%d %H:%M:%S")
reason = random.choices(reasons, reason_weights)[0]
status = random.choices(statuses, status_weights)[0]
doctor_id = random.choices(doctors, doctors_weights)[0]
# Append the generated row to the batch
batch_values.append(f"('{name}', '{phone_number}', '{email}', '{time}', '{reason}', '{status}', {doctor_id})")
# SQL command to insert the batch of data into the 'appointments' table
sql = "USE operations;\nINSERT INTO appointments (name, phone_number, email, time, reason, status, doctor_id) VALUES " + ", ".join(batch_values) + ";"
# Execute the SQL command
execute_sql(sql)
# Print progress
print(f"Inserted up to row {min(batch_start + batch_size, total_rows)}")
print("Data insertion complete.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment