anjesh/location-spliter.py

## location-spliter.py

"""
Basically breaks down the row containing the locations (separated by \n) into multiple rows

e.g. location field contain the following text and the script gives 4 rows for that, and copies the same id and title in all 4 rows
" - Banke (Nepalgunj)
 - Dhanusa (Janakpur)
 - Rupandehi (Bhairahawa)
 - Sunsari (Inaruwa)"

"""

import csv
import re

"""
location = '- Sunsari (Inaruwa)'
first is district, second is name of town
"""
def organiseLocation(location):
	if '(' in location:
		district, city = location.split('(');
		district = district.replace("-","").strip();
		city = city.replace(")","").strip();
		return [district, city]
	return location

outfile = open('ampdata-location.csv', 'w')
locwriter = csv.writer(outfile, delimiter=',',quotechar='"')

with open('ampdata-unorganised.csv', 'rU') as infile:
	ampreader = csv.reader(infile, delimiter=',')
	header = True;
	locwriter.writerow(['AMP ID', 'Project Title', 'District', 'City'])
	for row in ampreader:
		if not header:
			ampId = row[0]
			projectTitle = row[1]
			districts = row[6]
			locations = districts.split('\n')
			for location in locations:
				if location.strip():
					districtLoc = organiseLocation(location)
					locwriter.writerow([ampId, projectTitle, districtLoc[0], districtLoc[1]]);
				else:
					#location missing, so just write id and title
					locwriter.writerow([ampId, projectTitle]);
		header = False

	"""
	Basically breaks down the row containing the locations (separated by \n) into multiple rows

	e.g. location field contain the following text and the script gives 4 rows for that, and copies the same id and title in all 4 rows
	" - Banke (Nepalgunj)
	- Dhanusa (Janakpur)
	- Rupandehi (Bhairahawa)
	- Sunsari (Inaruwa)"

	"""

	import csv
	import re

	"""
	location = '- Sunsari (Inaruwa)'
	first is district, second is name of town
	"""
	def organiseLocation(location):
	if '(' in location:
	district, city = location.split('(');
	district = district.replace("-","").strip();
	city = city.replace(")","").strip();
	return [district, city]
	return location

	outfile = open('ampdata-location.csv', 'w')
	locwriter = csv.writer(outfile, delimiter=',',quotechar='"')

	with open('ampdata-unorganised.csv', 'rU') as infile:
	ampreader = csv.reader(infile, delimiter=',')
	header = True;
	locwriter.writerow(['AMP ID', 'Project Title', 'District', 'City'])
	for row in ampreader:
	if not header:
	ampId = row[0]
	projectTitle = row[1]
	districts = row[6]
	locations = districts.split('\n')
	for location in locations:
	if location.strip():
	districtLoc = organiseLocation(location)
	locwriter.writerow([ampId, projectTitle, districtLoc[0], districtLoc[1]]);
	else:
	#location missing, so just write id and title
	locwriter.writerow([ampId, projectTitle]);
	header = False