Last active
January 15, 2024 23:20
-
-
Save alejandro-du/6be74d3136d17a9ec5458a4d13d523b6 to your computer and use it in GitHub Desktop.
Insert demo data in batches into MariaDB databases using Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import os | |
import subprocess | |
from datetime import datetime, timedelta | |
# Function to generate a random date within a given range | |
def random_date(start, end): | |
return start + timedelta(days=random.randint(0, int((end - start).days))) | |
# Function to execute a given SQL command using MariaDB | |
def execute_sql(sql): | |
# Write the SQL command to a temporary file | |
with open("temp.sql", "w") as file: | |
file.write(sql) | |
# Execute the SQL command using the MariaDB client | |
subprocess.run(["mariadb", "-h", "127.0.0.1", "-P", "3307", "-u", "admin", "-pC0lumnStore!", "-e", "source temp.sql"]) | |
# Remove the temporary file | |
os.remove("temp.sql") | |
print("Generating and inserting data...") | |
# Total number of rows to be inserted | |
total_rows = 4000000 | |
# Number of rows to insert in each batch | |
batch_size = 10000 | |
# Possible values for the 'reason' column and their associated weights for random selection | |
reasons = ["Consultation", "Follow-up", "Preventive", "Chronic"] | |
reason_weights = [0.5, 0.15, 0.25, 0.1] | |
# Possible values for the 'status' column and their associated weights for random selection | |
statuses = ["Scheduled", "Canceled", "Completed", "No Show"] | |
status_weights = [0.1, 0.15, 0.7, 0.05] | |
# Possible values for the 'doctor_id' column and their associated weights for random selection | |
doctors = [1, 2, 3] | |
doctors_weights = [0.4, 0.35, 0.25] | |
# List of patient names | |
names = [f"Patient_{i}" for i in range(total_rows)] | |
# Insert data in batches | |
for batch_start in range(0, total_rows, batch_size): | |
batch_values = [] | |
# Generate data for each row in the batch | |
for i in range(batch_start, min(batch_start + batch_size, total_rows)): | |
name = names[i] | |
phone_number = f"{random.randint(100, 999)}-{random.randint(100, 999)}-{random.randint(1000, 9999)}" | |
email = f"patient_{i}@example.com" | |
time = random_date(datetime(2023, 1, 1), datetime(2024, 1, 1)).strftime("%Y-%m-%d %H:%M:%S") | |
reason = random.choices(reasons, reason_weights)[0] | |
status = random.choices(statuses, status_weights)[0] | |
doctor_id = random.choices(doctors, doctors_weights)[0] | |
# Append the generated row to the batch | |
batch_values.append(f"('{name}', '{phone_number}', '{email}', '{time}', '{reason}', '{status}', {doctor_id})") | |
# SQL command to insert the batch of data into the 'appointments' table | |
sql = "USE operations;\nINSERT INTO appointments (name, phone_number, email, time, reason, status, doctor_id) VALUES " + ", ".join(batch_values) + ";" | |
# Execute the SQL command | |
execute_sql(sql) | |
# Print progress | |
print(f"Inserted up to row {min(batch_start + batch_size, total_rows)}") | |
print("Data insertion complete.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment