Last active
December 15, 2015 22:49
-
-
Save vanzaj/5335515 to your computer and use it in GitHub Desktop.
helper functions to generate random data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import string | |
import datetime | |
def resample(lst, n): | |
''' return a list of length n of randomly chosen elements | |
from an input list''' | |
inp = list(lst) | |
if n <= 0: | |
return random.choice(inp) | |
out = [random.choice(inp) for _ in xrange(n)] | |
return out | |
def rnd_string(n=8, uppercase=False, mixcase=False, digits=False): | |
'''return a string made of n random letters | |
and optionally digits''' | |
let = string.ascii_lowercase | |
if uppercase: | |
let = string.ascii_uppercase | |
if mixcase: | |
let = string.letters | |
if digits: | |
let += string.digits | |
return ''.join(random.sample(let, n)) | |
def rnd_numbers(n=10, range=(0, 10), integer=False): | |
'''return a list of n random numbers within a range''' | |
low = range[0] | |
high = range[1] | |
out = [random.uniform(low, high) for _ in xrange(n)] | |
if integer: | |
return map(int, out) | |
else: | |
return out | |
def rnd_times(n=10, seconds=False): | |
'''return a list of n random time-stamps converted to | |
strings like hh:mm[:ss]''' | |
hrs = rnd_numbers(n=n, range=(0, 24), integer=True) | |
mns = rnd_numbers(n=n, range=(0, 60), integer=True) | |
out = [] | |
for i in xrange(n): | |
out.append("%02d:%02d" % (hrs[i], mns[i])) | |
if seconds: | |
for i in xrange(n): | |
out[i] += ':%02d' % (int(random.uniform(0, 60))) | |
return out | |
def rnd_dates(n=10, startdate='2000-01-01', enddate='2010-12-31', | |
sort=False): | |
'''return a list of n random dates between start and end dates | |
converted to strings like yyyy-mm-dd''' | |
ys, ms, ds = map(int, startdate.split('-')) | |
ye, me, de = map(int, enddate.split('-')) | |
sd = datetime.date(ys, ms, ds) | |
ed = datetime.date(ye, me, de) | |
subset = random.sample(xrange(abs((sd - ed).days)), n) | |
if sort: | |
subset.sort() | |
days = [] | |
for d in subset: | |
nd = d1 + datetime.timedelta(days=d) | |
days.append(nd.strftime('%Y-%m-%d')) | |
return days | |
def rnd_names(n=10, male=True, female=True): | |
'''return a list of n male and/or female names''' | |
# data from http://listofrandomnames.com/ | |
files = ['dat/names_male.txt', 'dat/names_female.txt'] | |
def read_file(f): | |
fd = open(f, 'r') | |
lines = [] | |
for line in fd: | |
if not line.startswith('#'): | |
lines.append(line.strip()) | |
return lines | |
names = [] | |
if male: | |
names.extend(read_file(files[0])) | |
if female: | |
names.extend(read_file(files[1])) | |
if n <= 0: | |
return random.choice(names) | |
if n > len(names): | |
n = len(names) | |
return random.sample(names, n) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment