fluffy-critter/name-androgyny.py

## name-androgyny.py
# Generate a list of popular names, sortable by their relative androgyny. Data is obtained
# from the SSA website at http://catalog.data.gov/dataset/baby-names-from-social-security-card-applications-national-level-data
#
# The expected input is a .csv file with columns of:
#
#     name,assigned gender,count
#
# Some example uses of this program:
#
# Find the most androgynous names from 1978 beginning with Q:
#     python name-androgyny.py yob1978.txt | grep ' Q' | sort -nr | head
#
# Find the least androgynous names overall:
#     python name-androgyny.py yob*.txt | sort -n | head
#
# One particular analysis: http://tumblr.beesbuzz.biz/post/141748658234/

import csv
import sys
import collections
import math

names = collections.defaultdict(lambda:collections.defaultdict(lambda:0))
def getname(name):
    return names.setdefault()

for arg in (sys.argv[1:]):
    with open(arg, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            names[row[0]][row[1].lower()] += float(row[2])

for name, counts in names.items():
    if 'm' in counts and 'f' in counts:
        r1 = counts['m']
        r2 = counts['f']
        d = (r1 + r2)/math.sqrt(r1*r1 + r2*r2)
        print '{} {} {} {}'.format(d, name, counts['m'], counts['f'])
	# Generate a list of popular names, sortable by their relative androgyny. Data is obtained
	# from the SSA website at http://catalog.data.gov/dataset/baby-names-from-social-security-card-applications-national-level-data
	#
	# The expected input is a .csv file with columns of:
	#
	# name,assigned gender,count
	#
	# Some example uses of this program:
	#
	# Find the most androgynous names from 1978 beginning with Q:
	# python name-androgyny.py yob1978.txt \| grep ' Q' \| sort -nr \| head
	#
	# Find the least androgynous names overall:
	# python name-androgyny.py yob*.txt \| sort -n \| head
	#
	# One particular analysis: http://tumblr.beesbuzz.biz/post/141748658234/

	import csv
	import sys
	import collections
	import math

	names = collections.defaultdict(lambda:collections.defaultdict(lambda:0))
	def getname(name):
	return names.setdefault()

	for arg in (sys.argv[1:]):
	with open(arg, 'r') as file:
	reader = csv.reader(file)
	for row in reader:
	names[row[0]][row[1].lower()] += float(row[2])

	for name, counts in names.items():
	if 'm' in counts and 'f' in counts:
	r1 = counts['m']
	r2 = counts['f']
	d = (r1 + r2)/math.sqrt(r1r1 + r2r2)
	print '{} {} {} {}'.format(d, name, counts['m'], counts['f'])