Skip to content

Instantly share code, notes, and snippets.

@abehmiel
Created April 18, 2017 23:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abehmiel/c62d3064ccc3704bb01cea850079110d to your computer and use it in GitHub Desktop.
Save abehmiel/c62d3064ccc3704bb01cea850079110d to your computer and use it in GitHub Desktop.
Useful Pandas csv import functions. Original by Chris Albon
# Thanks to Chris Albon. Shamelessly lifted from: https://chrisalbon.com/python/pandas_dataframe_importing_csv.html
import pandas as pd
import numpy as np
# Create dataframe (that we will be importing)
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'last_name': ['Miller', 'Jacobson', ".", 'Milner', 'Cooze'],
'age': [42, 52, 36, 24, 73],
'preTestScore': [4, 24, 31, ".", "."],
'postTestScore': ["25,000", "94,000", 57, 62, 70]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'preTestScore', 'postTestScore'])
# Save dataframe as csv in the working directory
df.to_csv('../data/example.csv')
# Load a csv
df = pd.read_csv('../data/example.csv')
# Load a csv with no headers
df = pd.read_csv('../data/example.csv', header=None)
#Load a csv while specifying column names
df = pd.read_csv('../data/example.csv', names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
# Load a csv with setting the index column to UID
df = pd.read_csv('../data/example.csv', index_col='UID', names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
# Load a csv while setting the index columns to First Name and Last Name
df = pd.read_csv('../data/example.csv', index_col=['First Name', 'Last Name'], names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
# Load a csv while specifying "." as missing values
df = pd.read_csv('../data/example.csv', na_values=['.'])
#Load a csv while specifying "." and "NA" as missing values in the Last Name column and "." as missing values in Pre-Test Score column
sentinels = {'Last Name': ['.', 'NA'], 'Pre-Test Score': ['.']}
df = pd.read_csv('../data/example.csv', na_values=sentinels)
# Load a csv while skipping the top 3 rows
df = pd.read_csv('../data/example.csv', na_values=sentinels, skiprows=3)
# Load a csv while interpreting "," in strings around numbers as thousands seperators
df = pd.read_csv('../data/example.csv', thousands=',')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment