Skip to content

Instantly share code, notes, and snippets.

@jvalen
Last active December 5, 2015 08:31
Show Gist options
  • Save jvalen/68d804ef6235f0a9b56f to your computer and use it in GitHub Desktop.
Save jvalen/68d804ef6235f0a9b56f to your computer and use it in GitHub Desktop.
Parse XLS file to JSON with python
import xlrd
import json
# Example data extracted from: http://esa.un.org/unmigration/TIMSO2013/data/subsheets/UN_MigrantStockByOriginAndDestination_2013T10.xls
# Open the workbook
wb = xlrd.open_workbook('UN_MigrantStockByOriginAndDestination_2013T10.xls')
# Create migrate data structure
migrate = []
# Select sheets that we want: 1, 4, 7, 10 index and include appropiate year
valid_sheets = [{'id': 1, 'year': 1900}, {'id': 4, 'year': 2000}, {'id': 7, 'year': 2010}, {'id': 10, 'year': 2013}]
# Loop over the sheets
for current_sheet in valid_sheets:
# Get the current sheet
sh = wb.sheet_by_index(current_sheet['id'])
# Init countries list
countries = []
second_column = sh.col_values(1)
starting_row = 15
starting_column = 9
for colnum in range(starting_column, sh.ncols):
# Create an aux dict to store the relation between the current country and the others
country_name = sh.col_values(colnum)[starting_row]
aux_dic = {'name': country_name, 'destinationList' : []}
for rownum in range(starting_row, sh.nrows - 1):
# Store every country destination in the current dict
aux_dic['destinationList'].append({'countryDest': second_column[rownum], 'number': sh.col_values(colnum)[rownum]})
# Append the current country to the collection
countries.append(aux_dic)
migrate.append({'year': current_sheet['year'], 'countries_relation': countries})
# Dump the data into a json file
with open('data.json', 'w') as outfile:
json.dump(migrate, outfile)
"""
JSON structure created:
[
{
"countries_relation": [{
"name": "Afghanistan",
"destinationList": [{
"countryDest": "Major area, region, country or area of destination",
"number": "Afghanistan"
}, {
"countryDest": "WORLD",
"number": 7295267.0
}, {
"countryDest": "More developed regions",
"number": 119772.0
}, {
"countryDest": "Sub-Saharan Africa",
"number": 33.0
}, {
"countryDest": "AFRICA",
"number": 964.0
{
...
"name": "Ghana",
"destinationList": [{
"countryDest": "Major area, region, country or area of destination",
"number": "Ghana"
}, {
"countryDest": "WORLD",
"number": 388872.0
}, {
"countryDest": "More developed regions",
"number": 130633.0
}, {
...
]
"year": 1900
},
...
{
"countries_relation": [{
"name": "Afghanistan",
"destinationList": [{
"countryDest": "Major area, region, country or area of destination",
"number": "Afghanistan"
...
]
"year": 2000
},
]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment