Skip to content

Instantly share code, notes, and snippets.

@aglove2189
Last active February 16, 2017 23:15
Show Gist options
  • Save aglove2189/0155046423de99982c18ed01182f9df1 to your computer and use it in GitHub Desktop.
Save aglove2189/0155046423de99982c18ed01182f9df1 to your computer and use it in GitHub Desktop.
'''
Command line tool that takes a csv as input and exports
a statistical summary of the data points in html format.
'''
import pandas as pd
import pandas_profiling
import argparse
import os
parser = argparse.ArgumentParser(description='data profiler and statistical summary of CSV files')
parser.add_argument('file')
parser.add_argument('-e', '--encoding')
args = parser.parse_args()
def encode_dataframe(encode_string):
return pd.read_csv(args.file, encoding=encode_string)
if args.encoding:
df = encode_dataframe(args.encoding)
else:
try:
df = encode_dataframe('UTF-8')
except UnicodeDecodeError:
try:
df = encode_dataframe('ISO-8859-1')
except UnicodeDecodeError:
print('The encoding type threw an error, please try again.')
print(df.describe())
print('-------------------------------')
print('generating data profile file...')
pfr = pandas_profiling.ProfileReport(df)
file_name = args.file.split('.')[0]
pfr.to_file(file_name + '.html')
print("'" + file_name + ".html'" + ' has been created!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment