Skip to content

Instantly share code, notes, and snippets.

@cab938
Last active April 21, 2018 17:20
Show Gist options
  • Save cab938/53a85c8d0112425ff714d8ce2d2f8f59 to your computer and use it in GitHub Desktop.
Save cab938/53a85c8d0112425ff714d8ce2d2f8f59 to your computer and use it in GitHub Desktop.
#!pip install html5lib #install html5lib, only needs to be run once
#You might need to restart kernel after running with the menu Kernel>Restart
import pandas as pd
import numpy as np
df_power=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https%3A%2F%2Fgist.github.com%2Fcab938%2Ffb463f56781fae4dd1fc171def0f1e94%2Fraw%2Fa6a7e255dadb09a29cf05de692fc16b4c09e941c%2Findia_power.csv')
df_states=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https%3A%2F%2Fgist.github.com%2Fcab938%2Ff8862f40901442ae61b458327d13ef9f%2Fraw%2F13dff6567589592828ee15778d0d5897cf09f335%2Findia_states.csv')
joined_df=pd.merge(df_states, df_power, left_on=["State or union territory"], right_on=["State"], how="inner") #join frames and only consider places we have data for both the state pop and renewables
def pct_renw_by_pop(row):
#calculate the ratio of population to that of sum(hydro,other renewables)
retval=0
try:
retval=((row["hydro"]+row["other_renew"])/row['Population'])
except ZeroDivisionError:
pass
return retval
joined_df['renw_by_pop']= joined_df.apply(pct_renw_by_pop ,axis=1)#apply the function above to create a new column
joined_df['renw_by_pop_rank']= joined_df['renw_by_pop'].rank(ascending=False) #determine the ranks for each state in a new column
joined_df.sort_values(by='renw_by_pop_rank').head(5) #print out the top five items
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment