Created
April 20, 2014 10:29
-
-
Save anjesh/11110737 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Basically breaks down the row containing the locations (separated by \n) into multiple rows | |
e.g. location field contain the following text and the script gives 4 rows for that, and copies the same id and title in all 4 rows | |
" - Banke (Nepalgunj) | |
- Dhanusa (Janakpur) | |
- Rupandehi (Bhairahawa) | |
- Sunsari (Inaruwa)" | |
""" | |
import csv | |
import re | |
""" | |
location = '- Sunsari (Inaruwa)' | |
first is district, second is name of town | |
""" | |
def organiseLocation(location): | |
if '(' in location: | |
district, city = location.split('('); | |
district = district.replace("-","").strip(); | |
city = city.replace(")","").strip(); | |
return [district, city] | |
return location | |
outfile = open('ampdata-location.csv', 'w') | |
locwriter = csv.writer(outfile, delimiter=',',quotechar='"') | |
with open('ampdata-unorganised.csv', 'rU') as infile: | |
ampreader = csv.reader(infile, delimiter=',') | |
header = True; | |
locwriter.writerow(['AMP ID', 'Project Title', 'District', 'City']) | |
for row in ampreader: | |
if not header: | |
ampId = row[0] | |
projectTitle = row[1] | |
districts = row[6] | |
locations = districts.split('\n') | |
for location in locations: | |
if location.strip(): | |
districtLoc = organiseLocation(location) | |
locwriter.writerow([ampId, projectTitle, districtLoc[0], districtLoc[1]]); | |
else: | |
#location missing, so just write id and title | |
locwriter.writerow([ampId, projectTitle]); | |
header = False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment