Skip to content

Instantly share code, notes, and snippets.

@Krailon
Created July 15, 2015 02:25
Show Gist options
  • Save Krailon/cf34f555da81b535e039 to your computer and use it in GitHub Desktop.
Save Krailon/cf34f555da81b535e039 to your computer and use it in GitHub Desktop.
Python2 CSV Test Data Generator
#!/usr/bin/env python2
#
# CSV Random Test Data Generator
# "Allow me to algorithmically test your algorithm with algorithmically-generated test data..."
# Written by Kerberos/Krailon
#
import sys, argparse
from random import SystemRandom
from datetime import datetime, timedelta
parser = argparse.ArgumentParser()
parser.add_argument("-r", "--rowcount", help="Row count", type=int, required=True)
parser.add_argument("-c", "--columns", help="Columns (format: comma-separated)", required=True)
parser.add_argument("-d", "--date", help="Date of first sample (format: YYYY-MM-DD; default: current date)", default=datetime.now().strftime("%Y-%m-%d"))
parser.add_argument("-s", "--start", help="Timestamp of first sample (format: H:M:S; default: current time", default=datetime.now().strftime("%H:%M:%S"))
parser.add_argument("-i", "--interval", help="Time interval between rows (format: seconds; default: 1)", type=int, default=1)
parser.add_argument("-a", "--range", help="Data range (format: low-high: default: 0-255)", default="0-255")
parser.add_argument("-o", "--output", help="Output path (default: dustdata.csv)", default="dustdata.csv")
args = parser.parse_args()
csv = []
sys_rand = SystemRandom()
try:
row_count = args.rowcount
cols = args.columns.split(",")
interval = timedelta(seconds=args.interval)
sample_date = datetime.strptime(args.date, "%Y-%m-%d")
sample_time = datetime.strptime(args.start, "%H:%M:%S")
data_range = map(int, args.range.split("-"))
except ex:
print "- Error: %s" % (ex)
sys.exit(1)
if data_range[0] > data_range[1]:
print "- Error: Invalid data range: %s" % (args.range)
sys.exit(1)
print "+ Generating %s rows of data..." % (row_count)
# Ensure 'date' and 'time' columns are present
if "date" not in cols:
cols.insert(0, "date")
if "time" not in cols:
cols.insert(1, "time")
# Header
header = ""
for col in cols:
header += col + ","
# Data
for ri in range(0, row_count):
csv.append("")
for col in cols:
if col == "date":
csv[ri] += sample_date.strftime("%Y-%m-%d")
elif col == "time":
csv[ri] += sample_time.strftime("%H:%M:%S")
else:
csv[ri] += str(sys_rand.randint(data_range[0], data_range[1]))
csv[ri] += ","
csv[ri] = csv[ri][:-1]
sample_time += interval
print "+ Saving to %s..." % (args.output)
csv.insert(0, header[:-1])
with open(args.output, 'w') as out_csv:
out_csv.write("\n".join(csv))
print "+ Done!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment