Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created September 5, 2020 03:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/2bb9a7d01666c9b30b2062d72ed2cb04 to your computer and use it in GitHub Desktop.
Save codecademydev/2bb9a7d01666c9b30b2062d72ed2cb04 to your computer and use it in GitHub Desktop.
Codecademy export
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import codecademylib3_seaborn
import glob
files = glob.glob('states*.csv')
df_list = []
for filename in files:
data = pd.read_csv(filename)
df_list.append(data)
us_census = pd.concat(df_list)
#print(us_census.head())
#print(us_census.columns)
us_census.Income = us_census['Income'].replace('[\$,]', '', regex=True)
#print(us_census.head())
us_census['pop_split'] = us_census['GenderPop'].str.split('_')
us_census['Men'] = us_census['pop_split'].str.get(0)
us_census['Men'] = us_census['Men'].str.split('(\d+)', expand=True)[1]
us_census['Men'] = pd.to_numeric(us_census['Men'])
us_census['Women'] = us_census['pop_split'].str.get(1)
us_census['Women'] = us_census['Women'].str.split('(\d+)', expand=True)[1]
us_census['Women'] = pd.to_numeric(us_census['Women'])
estimate_pop = us_census.TotalPop - us_census.Men
us_census['Women'] = us_census['Women'].fillna(value=estimate_pop)
us_census['Women'] = us_census['Women'].astype(int)
#print(us_census.Women)
#plt.scatter(us_census.Women, us_census.Income)
#plt.show()
us_census = us_census.drop('pop_split', 1)
#print(us_census.head())
duplicated = us_census.duplicated()
#print(duplicated)
us_census = us_census.drop_duplicates()
#plt.scatter(us_census.Women, us_census.Income)
#plt.show()
#print(us_census.columns)
us_census['Hispanic'] = us_census['Hispanic'].replace('[\%,]', '', regex=True)
us_census['Hispanic'] = pd.to_numeric(us_census.Hispanic)
us_census['White'] = us_census['White'].replace('[\%,]', '', regex=True)
us_census['White'] = pd.to_numeric(us_census.White)
us_census['Black'] = us_census['Black'].replace('[\%,]', '', regex=True)
us_census['Black'] = pd.to_numeric(us_census.Black)
us_census['Native'] = us_census['Native'].replace('[\%,]', '', regex=True)
us_census['Native'] = pd.to_numeric(us_census.Native)
us_census['Asian'] = us_census['Asian'].replace('[\%,]', '', regex=True)
us_census['Asian'] = pd.to_numeric(us_census.Asian)
us_census['Pacific'] = us_census['Pacific'].replace('[\%,]', '', regex=True)
us_census['Pacific'] = pd.to_numeric(us_census.Pacific)
print(us_census.head())
print(us_census.columns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment