Skip to content

Instantly share code, notes, and snippets.

@witmin
Last active November 25, 2018 09:12
Show Gist options
  • Save witmin/d2c8deffa0a8a0187876dc4f17e4a3b4 to your computer and use it in GitHub Desktop.
Save witmin/d2c8deffa0a8a0187876dc4f17e4a3b4 to your computer and use it in GitHub Desktop.

My code when taking the project in Jupyter notebook for DataQuest: https://www.dataquest.io/m/218/guided-project%3A-exploring-gun-deaths-in-the-us/9/next-steps

import csv
f = open("guns.csv", "r")
data = list(csv.reader(f))
print(data[0:4])
[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4']]
headers = data[0]
data = data[1:]
print(headers)
print(data[0:4])
['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]
years = []
year_counts = {}
for row in data:
    year = row[1]
    years.append(year)
    
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
    
print(year_counts)
{'2014': 33599, '2013': 33636, '2012': 33563}
import datetime

dates = []
for row in data:
    row_year = int(row[1])
    row_month = int(row[2])
    date = datetime.datetime(year=row_year, month=row_month, day=1)
    dates.append(date)
    
print(dates[0:4])

date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] +=1
    else:
        date_counts[date] = 1
        
print(date_counts)
    
    
[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0)]
{datetime.datetime(2014, 6, 1, 0, 0): 2931, datetime.datetime(2013, 6, 1, 0, 0): 2920, datetime.datetime(2014, 5, 1, 0, 0): 2864, datetime.datetime(2014, 3, 1, 0, 0): 2684, datetime.datetime(2012, 10, 1, 0, 0): 2733, datetime.datetime(2014, 9, 1, 0, 0): 2914, datetime.datetime(2013, 2, 1, 0, 0): 2375, datetime.datetime(2013, 11, 1, 0, 0): 2758, datetime.datetime(2013, 7, 1, 0, 0): 3079, datetime.datetime(2012, 7, 1, 0, 0): 3026, datetime.datetime(2012, 11, 1, 0, 0): 2729, datetime.datetime(2014, 1, 1, 0, 0): 2651, datetime.datetime(2014, 11, 1, 0, 0): 2756, datetime.datetime(2014, 10, 1, 0, 0): 2865, datetime.datetime(2014, 2, 1, 0, 0): 2361, datetime.datetime(2013, 5, 1, 0, 0): 2806, datetime.datetime(2014, 7, 1, 0, 0): 2884, datetime.datetime(2013, 12, 1, 0, 0): 2765, datetime.datetime(2012, 4, 1, 0, 0): 2795, datetime.datetime(2012, 3, 1, 0, 0): 2743, datetime.datetime(2013, 1, 1, 0, 0): 2864, datetime.datetime(2013, 4, 1, 0, 0): 2798, datetime.datetime(2012, 6, 1, 0, 0): 2826, datetime.datetime(2014, 8, 1, 0, 0): 2970, datetime.datetime(2013, 3, 1, 0, 0): 2862, datetime.datetime(2012, 12, 1, 0, 0): 2791, datetime.datetime(2013, 9, 1, 0, 0): 2742, datetime.datetime(2012, 8, 1, 0, 0): 2954, datetime.datetime(2013, 10, 1, 0, 0): 2808, datetime.datetime(2014, 4, 1, 0, 0): 2862, datetime.datetime(2012, 1, 1, 0, 0): 2758, datetime.datetime(2012, 5, 1, 0, 0): 2999, datetime.datetime(2013, 8, 1, 0, 0): 2859, datetime.datetime(2012, 2, 1, 0, 0): 2357, datetime.datetime(2012, 9, 1, 0, 0): 2852, datetime.datetime(2014, 12, 1, 0, 0): 2857}
sex_counts = {}
race_counts = {}
for row in data:
    sex = row[5]
    if sex in sex_counts:
        sex_counts[sex] +=1
    else:
        sex_counts[sex] =1
    
    race = row[7]
    if race in race_counts:
        race_counts[race] +=1
    else:
        race_counts[race] = 1
        
print(sex_counts)
print(race_counts)

        
    
{'M': 86349, 'F': 14449}
{'Native American/Native Alaskan': 917, 'White': 66237, 'Asian/Pacific Islander': 1326, 'Hispanic': 9022, 'Black': 23296}

What I have learned

I've learned how to use python to get data from a csv file and clean up data through varies method to get useful data as needed.

Need further examination

The syntax for datetime is a bit complex and need more practicing and examination.

import csv
census_file = open("census.csv", "r")
census = list(csv.reader(census_file))
print(census)
[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]
census_race = {'Asian/Pacific Islander': 15834141, 'Hispanic': 44618105, 'Black': 40250635, 'Native American/Native Alaskan': 3739506, 'White': 197318956}
race_per_hundredk = {}

for key in race_counts:
    race_per_hundredk[key] = race_counts[key]/census_race[key]*100000

print(race_per_hundredk)
{'Native American/Native Alaskan': 24.521955573811088, 'White': 33.56849303419181, 'Asian/Pacific Islander': 8.374309664161762, 'Hispanic': 20.220491210910907, 'Black': 57.8773477735196}
intends = [row[3] for row in data]
races = [row[7] for row in data]

homicide_race_counts = {}
for i, race in enumerate(races):
    if intends[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] +=1
        else:
            homicide_race_counts[race] =1
            
for key in homicide_race_counts:
    homicide_race_counts[key] = homicide_race_counts[key]/census_race[key]*100000
    
print(homicide_race_counts)
{'Black': 48.471284987180944, 'White': 4.6356417981453335, 'Asian/Pacific Islander': 3.530346230970155, 'Hispanic': 12.627161104219914, 'Native American/Native Alaskan': 8.717729026240365}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment