Last active
July 20, 2018 00:29
-
-
Save akrisanov/a4c2c413cbc876192f19f9fe4ca14460 to your computer and use it in GitHub Desktop.
DataCamp: Python Data Science Toolbox (Part 2) https://www.datacamp.com/courses/python-data-science-toolbox-part-2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define plot_pop() | |
def plot_pop(filename, country_code): | |
# Initialize reader object: urb_pop_reader | |
urb_pop_reader = pd.read_csv(filename, chunksize=1000) | |
# Initialize empty DataFrame: data | |
data = pd.DataFrame() | |
# Iterate over each DataFrame chunk | |
for df_urb_pop in urb_pop_reader: | |
# Check out specific country: df_pop_ceb | |
df_pop_ceb = df_urb_pop[df_urb_pop['CountryCode'] == country_code] | |
# Zip DataFrame columns of interest: pops | |
pops = zip(df_pop_ceb['Total Population'], | |
df_pop_ceb['Urban population (% of total)']) | |
# Turn zip object into list: pops_list | |
pops_list = list(pops) | |
# Use list comprehension to create new DataFrame column 'Total Urban Population' | |
df_pop_ceb['Total Urban Population'] = [int(tup[0] * tup[1]) for tup in pops_list] | |
# Append DataFrame chunk to data: data | |
data = data.append(df_pop_ceb) | |
# Plot urban population data | |
data.plot(kind='scatter', x='Year', y='Total Urban Population') | |
plt.show() | |
# Set the filename: fn | |
fn = 'ind_pop_data.csv' | |
# Call plot_pop for country code 'CEB' | |
plot_pop('ind_pop_data.csv', 'CEB') | |
# Call plot_pop for country code 'ARB' | |
plot_pop('ind_pop_data.csv', 'ARB') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment