Skip to content

Instantly share code, notes, and snippets.

@dylanjf
Last active August 29, 2015 14:06
Show Gist options
  • Save dylanjf/389f7b56ef73ad6ae75b to your computer and use it in GitHub Desktop.
Save dylanjf/389f7b56ef73ad6ae75b to your computer and use it in GitHub Desktop.
rare count by column
from csv import DictReader
import numpy as np
import pandas as pd
class NestedDictCreator(dict):
"""
Implementation of perl's autovivification feature.
Used to generate nested dictionaries on the fly
"""
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
categoric = ["C" + str(x) for x in range(1, 27)]
category_counts = NestedDictCreator()
for idx, x in enumerate(range(15, 15+len(categoric))):
train = pd.read_csv('train.csv', header=0, usecols=[x])
category_counts[categoric[idx]] = {key: value for (key, value) in
dict(train[categoric[idx]].value_counts()).iteritems() if value == 1}
print "col %s complete" % categoric[idx]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment