jackgolding/pollutantmean.py

## pollutantmean.py
'''
R Programming Programming Assignment 1 in Python 3.4 using only Standard Libraries

Example Usage:

pollutantmean('/Users/jackgolding/documents/specdata', 'sulfate', range(1,11))
4.064

'''

#imports two libaries used
import os, csv

def pollutantmean(directory, pollutant, id = range(1,333)):
    #Two numbers we will use later to measure the number of (non NA) records and the sum of all the records to calculate the
    #mean
    pollutant_sum = 0.0
    number_of_records = 0
    for location in id:
        #This changes the number 1 into 'directory/001.csv' so you can open the file
        location_path = os.path.join(directory, str('%03d' % location)+'.csv')
        #Next two lines just read the file
        with open(location, 'r') as location_file:
            location_reader = csv.DictReader(location_file)
            #Read the file line by line, if the pollutant record is not NA add it to pollutant sum and increase the number
            #of records read
            for row in location_reader:
                if row[pollutant] !='NA':
                    pollutant_sum += float(row[pollutant])
                    number_of_records += 1
    #Calculate the mean and round it to 3 so the formatting is the same as R
    return round((pollutant_sum/number_of_records),3)
	'''
	R Programming Programming Assignment 1 in Python 3.4 using only Standard Libraries

	Example Usage:

	pollutantmean('/Users/jackgolding/documents/specdata', 'sulfate', range(1,11))
	4.064

	'''

	#imports two libaries used
	import os, csv

	def pollutantmean(directory, pollutant, id = range(1,333)):
	#Two numbers we will use later to measure the number of (non NA) records and the sum of all the records to calculate the
	#mean
	pollutant_sum = 0.0
	number_of_records = 0
	for location in id:
	#This changes the number 1 into 'directory/001.csv' so you can open the file
	location_path = os.path.join(directory, str('%03d' % location)+'.csv')
	#Next two lines just read the file
	with open(location, 'r') as location_file:
	location_reader = csv.DictReader(location_file)
	#Read the file line by line, if the pollutant record is not NA add it to pollutant sum and increase the number
	#of records read
	for row in location_reader:
	if row[pollutant] !='NA':
	pollutant_sum += float(row[pollutant])
	number_of_records += 1
	#Calculate the mean and round it to 3 so the formatting is the same as R
	return round((pollutant_sum/number_of_records),3)