Skip to content

Instantly share code, notes, and snippets.

@fluffy-critter
Created April 12, 2016 03:16
Show Gist options
  • Save fluffy-critter/aa9983161b2b56f8f2750d661279cb9e to your computer and use it in GitHub Desktop.
Save fluffy-critter/aa9983161b2b56f8f2750d661279cb9e to your computer and use it in GitHub Desktop.
Dissociator to generate names by example
#!/usr/bin/env python
#
# Silly thing to generate random names from examples. Uses the CSV files obtained from
# http://catalog.data.gov/dataset/baby-names-from-social-security-card-applications-national-level-data
#
# Any corpus will work if it's formatted like:
# Name,[ignored],weight
import csv
import sys
import collections
import random
class Node:
def __init__(self):
self.total = 0
self.next_nodes = collections.defaultdict(int)
nodes = collections.defaultdict(Node)
for arg in (sys.argv[1:]):
with open(arg, 'r') as file:
reader = csv.reader(file)
for row in reader:
weight = int(row[2])
node = nodes[None]
pos = 0
for c in row[0]:
node.total += weight
node.next_nodes[c] += weight
node = nodes[(c,pos)]
pos += 1
node.total += weight
node.next_nodes[None] += weight
# for letter,weights in nodes.items():
# print "{} = {}".format(letter, weights.total)
# for nn,wt in weights.next_nodes.items():
# print " -> {} = {}".format(nn, wt)
# sys.exit(0)
def pick_weighted(node):
rnd = random.randint(0, node.total)
# print "{} {}".format(node.total, rnd)
for k,v in node.next_nodes.items():
rnd -= v
if rnd <= 0:
return k
for x in xrange(200):
out = ''
node = nodes[None]
pos = 0
while True:
letter = pick_weighted(node)
if not letter:
break
out += letter
node = nodes[(letter, pos)]
pos += 1
print out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment