Skip to content

Instantly share code, notes, and snippets.

@onurvarol
Created April 6, 2017 22:06
Show Gist options
  • Save onurvarol/ba101ebed3c9cd966d0e58f15932e999 to your computer and use it in GitHub Desktop.
Save onurvarol/ba101ebed3c9cd966d0e58f15932e999 to your computer and use it in GitHub Desktop.
Parse feature names in Fragile Families Challenge
def parse_codebook_data(fname):
fNames = dict()
with open(fname, 'r') as fl:
lstate = False
for line in fl:
if line.startswith('-----'):
lstate = True
else:
if lstate:
while ' ' in line:
line = line.replace(' ', ' ')
#print line
temp = line.strip().split(' ')
fNames[temp[0]] = ' '.join(temp[1:])
lstate = False
return fNames
featureNames = dict()
# Looking for a path that has all feature description files
for fname in glob.glob('data/codebooks/ff*.txt'):
featureNames.update(parse_codebook_data(fname))
for f in featureNames:
if 'education' in featureNames[f]:
print '[{}]: {}'.format(f, featureNames[f])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment