Skip to content

Instantly share code, notes, and snippets.

@joncle
Last active August 29, 2015 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joncle/8c580b781bdbd93d60ca to your computer and use it in GitHub Desktop.
Save joncle/8c580b781bdbd93d60ca to your computer and use it in GitHub Desktop.
import csv
FIELDS = [
"name", "timeZone_label", "utcOffset", "homepage", "governmentType_label", "isPartOf_label", "areaCode", "populationTotal",
"elevation", "maximumElevation", "minimumElevation", "populationDensity", "wgs84_pos#lat", "wgs84_pos#long",
"areaLand", "areaMetro", "areaUrban"
]
def audit_file(filename):
with open(filename, 'rb') as fin:
csvin = csv.DictReader(fin)
col_types = {k:set() for k in csvin.fieldnames}
missing = col_types.viewkeys() - FIELDS
for row in csvin:
for name, value in row.items():
col_types[name].add(get_the_type(value))
return col_types, missing
def get_the_type(value):
if value in ('', 'NULL'):
return None
elif value.startswith('{'):
return list
for T in (int, float, str):
try:
return type(T(value))
except ValueError:
pass
col_types, missing = audit_file('/Users/stephan/Desktop/cities.csv')
print col_types, missing
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment