Skip to content

Instantly share code, notes, and snippets.

View cab938's full-sized avatar

Christopher Brooks cab938

View GitHub Profile
#!pip install html5lib #install html5lib, only needs to be run once
import pandas as pd
import numpy as np
df=pd.read_html('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://en.wikipedia.org/wiki/List_of_natural_satellites', header=0)
moons=df[4][1:] #drop prehistoric moon sighting
moons=moons['Discovery year'] #we are only interested in the year discovered
moons=moons.apply(lambda x: x.split('/')[0]).astype(int) #clean dataframe to just years as ints
pre_2000=len(moons[moons<2000]) #select only that data from moons which is less than 2000
post_2000=len(moons[moons>=2000]) #select only that data from moons which is greater than or equal to 2000
#!pip install html5lib #install html5lib, only needs to be run once
import pandas as pd
import numpy as np
earthquake_data='https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FList_of_earthquakes_in_the_United_States'
df = pd.read_html(earthquake_data, header=0)[0]
df=df[df['Magnitude']!='Unknown'] #get rid of all the data where there is no known magnitude
df['Magnitude']=df['Magnitude'].apply(lambda x: x.split(", ")[0]) #for data where there are two values report with a comma, just take the first value
df['Magnitude']=df['Magnitude'].apply(lambda x: np.mean(np.array(x.split('–')).astype(float))) #average all ranges of values
print(len(df[df['Magnitude']>7])) #print out how many earthquakes on this list had values >7
#!pip install html5lib #install html5lib, only needs to be run once
import pandas as pd
import numpy as np
df=pd.read_html('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://en.wikipedia.org/wiki/List_of_highest-grossing_Indian_films', header=0)
bollywood=df[5].head(10)['Worldwide gross']
tollywood=df[13].head(10)['Worldwide gross']
bolly_top=np.sum(bollywood.apply(lambda x: x.split('₹')[1].split(' ')[0].replace(',','')).astype(float))
tolly_top=np.sum(tollywood.apply(lambda x: x[1:x.find(' ')].replace(',','')).astype(float))
state renewables in GWh total in GWh
Vermont 1898 1901
Idaho 12584 16011
Washington 86902 112784
Oregon 42122 59425
South Dakota 7280 10363
Maine 7408 11650
Montana 12334 28153
California 80208 199038
Iowa 21261 54793
state population
California 39536653
Texas 28304596
Florida 20984400
New York 19849399
Pennsylvania 12805537
Illinois 12802023
Ohio 11658609
Georgia 10429379
North Carolina 10273419
We can make this file beautiful and searchable if this error is corrected: Unclosed quoted field in line 6.
Rank,State or union territory,Population,Decadal growth (2001–2011),Rural pop.[16] (%),Urban pop.[16] (%),Area[17],Density,Sex ratio
1,Uttar Pradesh,207281477,20.1%,"7008155111022000000♠155,111,022 (77.72%)","7007444704550000000♠44,470,455 (22.28%)","7011240928000000000♠240,928 km2 (93,023 sq mi)","6996828000000000000♠828/km2 (2,140/sq mi)",908
2,Maharashtra,112372972,16.0%,"7007615454410000000♠61,545,441 (54.77%)","7007508275310000000♠50,827,531 (45.23%)","7011307713000000000♠307,713 km2 (118,809 sq mi)",6996365000000000000♠365/km2 (950/sq mi),946
3,Bihar,103804637,25.1%,"7007920750280000000♠92,075,028 (88.70%)","7007117296090000000♠11,729,609 (11.30%)","7010941630000000000♠94,163 km2 (36,357 sq mi)","6997110199999999999♠1,102/km2 (2,850/sq mi)",916
4,West Bengal,91347736,13.9%,"7007622136760000000♠62,213,676 (68.11%)","7007291340600000000♠29,134,060 (31.89%)","7010887520000000000♠88,752 km2 (34,267 sq mi)","6997102900000000000♠1,029/km2 (2,670/sq mi)",947
5,Madhya Pradesh,72597565,20.3%,"7007525378990000000
State coal gas diesel nuclear hydro other_renew
Maharashtra 24669.27 3475.93 0.0 690.14 3331.84 6205.65
Gujarat 16353.72 6806.09 0.0 559.32 772.00 4940.00
Madhya Pradesh 11126.39 257.18 0.0 273.24 3223.66 1670.34
Chhattisgarh 13193.49 0.0 0.0 47.52 120.00 327.18
Goa 326.17 48.00 0.0 25.80 0.0 0.05
Dadra and Nagar Haveli 44.37 27.10 0.0 8.46 0.0 0.0
Daman and Diu 36.71 4.20 0.0 7.38 0.0 0.0
Rajasthan 9400.72 825.03 0.0 573.00 1719.30 4710.50
Uttar Pradesh 11677.95 549.97 0.0 335.72 2168.30 989.86
#!pip install html5lib #install html5lib, only needs to be run once
#You might need to restart kernel after running with the menu Kernel>Restart
import pandas as pd
import numpy as np
df_power=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https%3A%2F%2Fgist.github.com%2Fcab938%2F71e8371ebc621a105afa2181efd78e75%2Fraw%2Ffafe9712373ab5a1d3b2fdb6ac09a28cbcfe8f82%2Fus_power.csv')
df_states=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https%3A%2F%2Fgist.github.com%2Fcab938%2Ffaedc9046a01b2170c0b252fbc4fc416%2Fraw%2Ff5fa5974e7fef6b996e8ff8583f8d5b47ce391c5%2Fus_states.csv')
joined_df=pd.merge(df_states, df_power, left_on=["state"], right_on=["state"], how="inner") #join frames and only consider places we have data for both the state pop and renewables
#!pip install html5lib #install html5lib, only needs to be run once
#You might need to restart kernel after running with the menu Kernel>Restart
import pandas as pd
import numpy as np
df_power=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https%3A%2F%2Fgist.github.com%2Fcab938%2Ffb463f56781fae4dd1fc171def0f1e94%2Fraw%2Fa6a7e255dadb09a29cf05de692fc16b4c09e941c%2Findia_power.csv')
df_states=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https%3A%2F%2Fgist.github.com%2Fcab938%2Ff8862f40901442ae61b458327d13ef9f%2Fraw%2F13dff6567589592828ee15778d0d5897cf09f335%2Findia_states.csv')
joined_df=pd.merge(df_states, df_power, left_on=["State or union territory"], right_on=["State"], how="inner") #join frames and only consider places we have data for both the state pop and renewables
performance processor
4355.0 Intel Xeon Platinum 8180M28x 2.50 GHz (3.80 GHz) HT
4355.0 Intel Xeon Platinum 818028x 2.50 GHz (3.80 GHz) HT
4068.0 AMD Epyc 760132x 2.20 GHz (3.20 GHz) HT
4002.0 Intel Xeon Platinum 816824x 2.70 GHz (3.70 GHz) HT
3912.0 AMD Epyc 750132x 2.00 GHz (3.00 GHz) HT
3873.0 Intel Xeon Platinum 817628x 2.10 GHz (3.80 GHz) HT
3873.0 Intel Xeon Platinum 8176M28x 2.10 GHz (3.80 GHz) HT
3873.0 Intel Xeon Platinum 8176F28x 2.10 GHz (3.80 GHz) HT
3838.0 AMD Epyc 755132x 2.00 GHz (3.00 GHz) HT