davidcesarino/compile_aesa.py

## compile_aesa.py
# Copyright 2018 David Cesarino de Sousa
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import csv
import statistics
import locale

# You must place all AESA csv data under this directory.
path = '../dados/'
months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']


def write_csv_values(contents, headers, file_name):
    with open(file_name, 'w') as file:
        writer = csv.writer(file, delimiter=';', quotechar='"')
        writer.writerow(headers)
        writer.writerows(contents)
        file.close()


def get_cities():
    cities_list = []
    for y in range(2013, 2017):
        for m in months:
            in_file_name = str(y) + m + '.csv'
            skip = True
            with open(path + in_file_name, 'r') as file:
                r = csv.reader(file, delimiter=',', quotechar='"')
                for row in r:
                    if skip:  # Skip the header.
                        skip = False
                        continue
                    test_city = row[0]
                    if cities_list.count(test_city) == 0:
                        cities_list.append(test_city)
                file.close()
    cities_list.sort(key=lambda x: locale.strxfrm(x))
    return cities_list


def get_averages_in_month(month_file_name):
    # Convert the CSV file, eliminating dup cities by keeping all the values in a set.
    rain_values = {}
    skip = True
    with open(month_file_name, 'r') as file:
        r = csv.reader(file, delimiter=',', quotechar='"')
        for row in r:
            if skip:
                skip = False
                continue
            rain_values.setdefault(row[0], [])
            rain_values[row[0]].append(float(row[2]))
        file.close()

    # Average all the rain values to a final data structure.
    final_data = []
    for city in rain_values:
        average = statistics.mean(rain_values[city])
        final_data.append([city, average])
        final_data.sort(key=lambda x: locale.strxfrm(x[0]))
        # final_data[city] = average  # Used if final_data is dictionary.

    return final_data


def get_averages(cities_list):
    year_range = range(2013, 2017)
    slots_per_city = len(year_range) * len(months)
    full_precipitation_list = []
    for x in range(len(cities_list)):
        full_precipitation_list.append([None, [None] * slots_per_city])
    for n in cities_list:
        index = cities_list.index(n)
        full_precipitation_list[index][0] = n

    month = -1
    for y in year_range:
        for m in months:
            month += 1
            in_file_name = str(y) + m + '.csv'
            list_of_averages = get_averages_in_month(path + in_file_name)
            for entry in list_of_averages:
                city_name = entry[0]
                city_average = entry[1]
                index = cities_list.index(city_name)
                full_precipitation_list[index][1][month] = city_average
    return full_precipitation_list


def main():
    default_locale = locale.getlocale(locale.LC_COLLATE)
    locale.setlocale(locale.LC_COLLATE, "pt_BR.UTF-8")
    cities = get_cities()
    full_list = get_averages(cities)

    csv_list = []
    for entry in full_list:
        city_name = entry[0]
        averages = entry[1]
        temp_list = [city_name]
        for average in averages:
            temp_list.append(average)
        csv_list.append(temp_list)

    headers = ['city']
    for i in range(48):
        headers.append('month' + str(i+1))

    for entry in csv_list:
        for n, i in enumerate(entry):
            if i is None:
                entry[n] = -99

    write_csv_values(csv_list, headers, path + 'finished.csv')

    locale.setlocale(locale.LC_COLLATE, default_locale)


main()
	# Copyright 2018 David Cesarino de Sousa
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	import csv
	import statistics
	import locale

	# You must place all AESA csv data under this directory.
	path = '../dados/'
	months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']


	def write_csv_values(contents, headers, file_name):
	with open(file_name, 'w') as file:
	writer = csv.writer(file, delimiter=';', quotechar='"')
	writer.writerow(headers)
	writer.writerows(contents)
	file.close()


	def get_cities():
	cities_list = []
	for y in range(2013, 2017):
	for m in months:
	in_file_name = str(y) + m + '.csv'
	skip = True
	with open(path + in_file_name, 'r') as file:
	r = csv.reader(file, delimiter=',', quotechar='"')
	for row in r:
	if skip: # Skip the header.
	skip = False
	continue
	test_city = row[0]
	if cities_list.count(test_city) == 0:
	cities_list.append(test_city)
	file.close()
	cities_list.sort(key=lambda x: locale.strxfrm(x))
	return cities_list


	def get_averages_in_month(month_file_name):
	# Convert the CSV file, eliminating dup cities by keeping all the values in a set.
	rain_values = {}
	skip = True
	with open(month_file_name, 'r') as file:
	r = csv.reader(file, delimiter=',', quotechar='"')
	for row in r:
	if skip:
	skip = False
	continue
	rain_values.setdefault(row[0], [])
	rain_values[row[0]].append(float(row[2]))
	file.close()

	# Average all the rain values to a final data structure.
	final_data = []
	for city in rain_values:
	average = statistics.mean(rain_values[city])
	final_data.append([city, average])
	final_data.sort(key=lambda x: locale.strxfrm(x[0]))
	# final_data[city] = average # Used if final_data is dictionary.

	return final_data


	def get_averages(cities_list):
	year_range = range(2013, 2017)
	slots_per_city = len(year_range) * len(months)
	full_precipitation_list = []
	for x in range(len(cities_list)):
	full_precipitation_list.append([None, [None] * slots_per_city])
	for n in cities_list:
	index = cities_list.index(n)
	full_precipitation_list[index][0] = n

	month = -1
	for y in year_range:
	for m in months:
	month += 1
	in_file_name = str(y) + m + '.csv'
	list_of_averages = get_averages_in_month(path + in_file_name)
	for entry in list_of_averages:
	city_name = entry[0]
	city_average = entry[1]
	index = cities_list.index(city_name)
	full_precipitation_list[index][1][month] = city_average
	return full_precipitation_list


	def main():
	default_locale = locale.getlocale(locale.LC_COLLATE)
	locale.setlocale(locale.LC_COLLATE, "pt_BR.UTF-8")
	cities = get_cities()
	full_list = get_averages(cities)

	csv_list = []
	for entry in full_list:
	city_name = entry[0]
	averages = entry[1]
	temp_list = [city_name]
	for average in averages:
	temp_list.append(average)
	csv_list.append(temp_list)

	headers = ['city']
	for i in range(48):
	headers.append('month' + str(i+1))

	for entry in csv_list:
	for n, i in enumerate(entry):
	if i is None:
	entry[n] = -99

	write_csv_values(csv_list, headers, path + 'finished.csv')

	locale.setlocale(locale.LC_COLLATE, default_locale)


	main()