Created
April 24, 2016 17:53
-
-
Save dengjonathan/5be63bff43fc65ee575d5ec02b50f184 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy, sklearn | |
import pandas as pd | |
import os | |
# os.chdir('./Ranger Data/data') | |
def csv2dataframe(fileobj): | |
"""Returns csv file as a pandas dataframe""" | |
with open(fileobj, 'r') as f: | |
return pd.read_csv(f, header=0) | |
df_list = [csv2dataframe(file) for file in os.listdir(os.getcwd()) if file.endswith('.csv')] | |
super_df = pd.concat(df_list, axis=0) | |
# due to trash values in the original excel files, drop all the empty columns which are named 'unnamed' | |
# this still leaves us with 35 features to look at. | |
cols_to_keep = [col for col in super_df.columns if 'Unnamed' not in col] | |
super_df = super_df[cols_to_keep] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment