Created
June 3, 2015 17:44
-
-
Save psaia/26398d9a4dec2f8b2e5f to your computer and use it in GitHub Desktop.
parse
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
var fs = require('fs'); | |
var path = require('path'); | |
var async = require('async'); | |
var glob = require('glob'); | |
var slugify = require('slugify'); | |
var parseCSV = require('csv-parse'); | |
var rimraf = require('rimraf'); | |
var d3 = require('d3'); | |
var iconv = require('iconv-lite'); | |
var gm = require('gm').subClass({ imageMagick: true }); | |
var _ = require('lodash'); | |
var join = path.join; | |
var steps = []; | |
var PROCESSED_FOLDER = join(__dirname, 'processed'); | |
var LOCAL_RAW_FILES = join(__dirname, 'raw'); | |
function createProcessedFolder(done) { | |
async.waterfall([ | |
function(callback) { | |
fs.exists(PROCESSED_FOLDER, function(exists) { | |
callback(null, exists); | |
}); | |
}, | |
function(exists, callback) { | |
if (exists) { | |
return rimraf(PROCESSED_FOLDER, callback); | |
} | |
callback(); | |
}, | |
async.apply(fs.mkdir.bind(fs, PROCESSED_FOLDER), done) | |
]); | |
} | |
function heatmapdData(done) { | |
var data = {}; | |
var DEST_FILE = join(PROCESSED_FOLDER, 'heatmap-data.json'); | |
// Map data keys to one letter values for smaller file size. | |
var valMap = { | |
'urbanization': 'u', | |
'fertility-rates': 'f', | |
'life-expectancy': 'l' | |
}; | |
var addTheHeat = function() { | |
var getScale = function(arr, colors) { | |
var scale = d3.scale.linear(); | |
var min = Math.min.apply(Math, arr); | |
var max = Math.max.apply(Math, arr); | |
if (Math.abs(min) !== Infinity && Math.abs(max) !== Infinity) { | |
scale.domain([min, max]); | |
scale.range(colors); | |
return scale; | |
} | |
return false; | |
}; | |
var isNumber = function(num) { | |
return _.isNumber(num) && Math.abs(num) !== Infinity && !isNaN(num); | |
}; | |
// Drop empty states. | |
_.forEach(data, function(stateList, yearKey) { // Year year. | |
_.forEach(stateList, function(state, stateISOKey) { | |
if (!isNumber(state.u) && !isNumber(state.f) && !isNumber(state.l) | |
&& state.u !== undefined && state.f !== undefined && state.l !== undefined) { | |
delete data[yearKey][stateISOKey]; | |
} | |
}); | |
}); | |
_.forEach(data, function(stateList, yearKey) { // Year year. | |
var urbanizationVals = []; | |
var fertilityVals = []; | |
var lifeVals = []; | |
var urbanScale; | |
var lifeScale; | |
var fertilityScale; | |
_.forEach(stateList, function(state, stateISOKey) { | |
if (isNumber(state.u)) { | |
urbanizationVals.push(state.u); | |
} | |
if (isNumber(state.f)) { | |
fertilityVals.push(state.f); | |
} | |
if (isNumber(state.l)) { | |
lifeVals.push(state.l); | |
} | |
}); | |
urbanScale = getScale(_.compact(urbanizationVals), ['#edf8b1', '#7fcdbb', '#2c7fb8']); | |
lifeScale = getScale(_.compact(lifeVals), ['#ece2f0', '#a6bddb', '#1c9099']); | |
fertilityScale = getScale(_.compact(fertilityVals), ['#e5f5f9', '#99d8c9', '#2ca25f']); | |
_.forEach(stateList, function(state, stateKey) { | |
if (state.l !== null) { | |
if (lifeScale) { | |
data[yearKey][stateKey].lC = lifeScale(state.l); | |
} | |
} | |
if (state.f !== null) { | |
if (fertilityScale) { | |
data[yearKey][stateKey].fC = fertilityScale(state.f); | |
} | |
} | |
if (state.u !== null) { | |
if (urbanScale) { | |
data[yearKey][stateKey].uC = urbanScale(state.u); | |
} | |
} | |
}); | |
}); | |
}; | |
var normalizeYear = function(year) { | |
var y = parseInt(year, 10); | |
if (String(y).length === 2) { | |
if (y < 55) { | |
y = 2000 + y; | |
} else { | |
y = 1900 + y; | |
} | |
} | |
return y; | |
}; | |
var appendValue = function(years, countryISO, dataType, val) { | |
var cur; | |
var from; | |
var to; | |
var type = valMap[dataType]; | |
var value = val === 'NULL' ? null : val; | |
if (!type) { | |
throw new Error('Data type does not exist: ' + type); | |
} | |
// If a year span, append value for each year. | |
if (years[0] !== years[1] && years[1]) { | |
from = years[0]; | |
to = years[1]; | |
cur = from; | |
while (cur < to) { | |
if (!data[cur]) { | |
data[cur] = {}; | |
} | |
if (!data[cur][countryISO]) { | |
data[cur][countryISO] = {}; | |
} | |
data[cur][countryISO][type] = value; | |
cur++; | |
} | |
} else if (years[0] !== null) { | |
cur = years[0]; | |
if (!data[cur]) { | |
data[cur] = {}; | |
} | |
if (!data[cur][countryISO]) { | |
data[cur][countryISO] = {}; | |
} | |
data[cur][countryISO][type] = value; | |
} | |
}; | |
var populateDataForRow = function(dataType, row) { | |
var key; | |
var years; | |
var countryISO = row['ISO 2 Code']; | |
// normalize antartica | |
if (countryISO === 'ATA') { | |
countryISO = 'AQ'; | |
} | |
// Loop through keys to find the ones like, 2000-10, 1900, 1900-60, ect... | |
for (key in row) { | |
if (/^[0-9-]+$/.test(key)) { | |
years = key.split('-').map(normalizeYear); | |
appendValue(years, countryISO, dataType, row[key]); | |
} | |
} | |
}; | |
var readFile = function(file, callback) { | |
var dataType = path.basename(file).replace(/\..+$/, ''); | |
fs.readFile(file, { encoding: 'utf8' }, function(err, contents) { | |
if (err) { | |
throw err; | |
} | |
parseCSV(contents, { | |
delimiter: ',', | |
skip_empty_lines: 1, | |
trim: 1, | |
columns: true, | |
auto_parse: 1 | |
}, function(err2, rows) { | |
if (err2) { | |
throw err2; | |
} | |
rows.forEach(populateDataForRow.bind(null, dataType)); | |
addTheHeat(); | |
process.nextTick(callback); | |
}); | |
}); | |
}; | |
async.waterfall([ | |
async.apply(glob, join(LOCAL_RAW_FILES, 'heatmap/*.csv')), | |
function(files, callback) { | |
async.each(files, readFile, callback); | |
} | |
], function(err) { | |
if (err) { | |
throw err; | |
} | |
fs.writeFile( | |
DEST_FILE, | |
JSON.stringify(data), | |
{ encoding: 'utf8' }, | |
function() { | |
console.log('Completed all heatmap data.'); | |
done(); | |
} | |
); | |
}); | |
} | |
function co2Data(done) { | |
var data = {}; | |
var precision = 4; | |
// Setup projection that cna be used to translate. | |
var width = 360; | |
var height = 180; | |
var prj = d3.geo.mercator() | |
.scale((width + 1) / 2 / Math.PI) | |
.translate([width / 2, height / 2]); | |
var allValues = []; | |
var shortenNum = function(num) { | |
return parseFloat(Number(num).toFixed(2)); | |
}; | |
var filterValues = function(val) { | |
return /[0-9]/.test(val) && val !== '' && !isNaN(parseFloat(val)); | |
}; | |
var accumulateValues = function(filedata) { | |
allValues.push( | |
Math.max.apply(null, | |
filedata | |
.split('\n') | |
.filter(filterValues) | |
.map(shortenNum) | |
) | |
); | |
}; | |
var processData = function(filedata, year, callback) { | |
var grid = filedata.split('\n').filter(filterValues).map(shortenNum); | |
var chunked = _.chunk(grid, 360); // 360 lat x 180 lng | |
var total = Math.max.apply(null, allValues); | |
var opacityScale = d3.scale.linear() | |
.domain([0, total]) | |
.range([0, 1]); | |
var colorScale = d3.scale.linear() | |
.domain([0, total]) | |
.range(['#B8DFFF', '#C70000']); | |
var x = 0; | |
var y = 0; | |
var opacity; | |
var coord; | |
chunked.forEach(function(row, rowIndex) { | |
x = 0; | |
if (rowIndex % precision === 0) { | |
row.forEach(function(col, index) { | |
opacity = shortenNum(opacityScale(col)); | |
if (opacity > 0 && index !== 0 && index % precision === 0) { | |
coord = prj.invert([x, y]); | |
coord[0] = shortenNum(coord[0]); | |
coord[1] = shortenNum(coord[1]); | |
year.push({ | |
coord: coord, | |
fill: colorScale(col), | |
opacity: opacity | |
}); | |
} | |
x++; | |
}); | |
} | |
y++; | |
}); | |
callback(null); | |
}; | |
var readFileAccumulate = function(file, callback) { | |
fs.readFile(file, { encoding: 'utf8' }, function(err, contents) { | |
if (err) { | |
throw err; | |
} | |
accumulateValues(contents); | |
process.nextTick(callback); | |
}); | |
}; | |
var readFileProcess = function(file, callback) { | |
var year = file.match(/[\d]+$/)[0]; | |
data[year] = []; | |
fs.readFile(file, { encoding: 'utf8' }, function(err, contents) { | |
if (err) { | |
throw err; | |
} | |
processData(contents, data[year], callback); | |
}); | |
}; | |
var _files; | |
async.waterfall([ | |
async.apply(glob, join(LOCAL_RAW_FILES, 'fossil-fuel/gridcar.*')), | |
function(files, callback) { | |
_files = files; | |
async.each(files, readFileAccumulate, callback); | |
}, | |
function(callback) { | |
async.each(_files, readFileProcess, callback); | |
} | |
], function(err) { | |
if (err) { | |
throw err; | |
} | |
fs.writeFile( | |
join(PROCESSED_FOLDER, 'fossil-fuel-circles.json'), | |
JSON.stringify(data), | |
{ encoding: 'utf8' }, | |
function() { | |
console.info('Completed fossil fuel data.'); | |
done(); | |
} | |
); | |
}); | |
} | |
function pointData(done) { | |
var DEST_FILE = join(PROCESSED_FOLDER, 'population-points.json'); | |
var data = []; | |
var convert = function(parseErr, rows) { | |
rows.forEach(function(row) { | |
var obj = { | |
coord: [row[4], row[5]], | |
name: row[2], | |
location: row[7], | |
description: row[6], | |
year: row[3] | |
}; | |
if (obj.name && | |
typeof obj.coord[0] === 'number' && | |
typeof obj.coord[1] === 'number') { | |
data.push(obj); | |
} | |
}); | |
fs.writeFile( | |
DEST_FILE, | |
JSON.stringify(data), | |
function() { | |
console.info('Completed population points.'); | |
done(); | |
} | |
); | |
}; | |
fs.createReadStream(join( | |
LOCAL_RAW_FILES, | |
'population-points/points.csv' | |
)).pipe(iconv.decodeStream('iso-8859-1')).pipe(parseCSV({ | |
delimiter: ',', | |
skip_empty_lines: 1, | |
trim: 1, | |
auto_parse: 1 | |
}, convert)); | |
} | |
function milestoneData(done) { | |
var parser = parseCSV({ | |
delimiter: ',', | |
skip_empty_lines: 1, | |
trim: 1, | |
auto_parse: 1 | |
}, function _parserCb(err, rows) { | |
var data = []; | |
var cachedImages; | |
var DEST_FILE = join(PROCESSED_FOLDER, 'timeline-milestones.json'); | |
var IMAGE_DIR = join(LOCAL_RAW_FILES, 'timeline-milestone', 'images'); | |
var superSlugify = function(str) { | |
return slugify(path.basename(str)) | |
.toLowerCase() | |
.replace('_', '-') | |
.replace(/[-]+/g, '-'); | |
}; | |
var allImages = function(allImagesComplete) { | |
glob( | |
join(IMAGE_DIR, '/*(*.jpg|*.jpeg|*.png|*.gif)'), | |
{ nocase: true }, | |
allImagesComplete | |
); | |
}; | |
var normalizeImages = function(imgNormalizationComplete) { | |
var normalize = function(img, normalized) { | |
var dir = path.dirname(img); | |
var filename = superSlugify(path.basename(img)); | |
var newPath = join(dir, filename); | |
var gmImg; | |
fs.rename(img, newPath, function(err2) { | |
if (err2) { | |
throw err2; | |
} | |
gmImg = gm(newPath); | |
gmImg.size(function(err3, size) { | |
if (err3) { | |
throw err3; | |
} | |
gmImg = gm(newPath); | |
// gmImg.compress(80); | |
if (size.width > 1000) { | |
gmImg.resize(800, null); | |
gmImg.write(newPath, normalized); | |
} else if (size.height > 1000) { | |
gmImg.resize(null, 800); | |
gmImg.write(newPath, normalized); | |
} else { | |
normalized(); | |
} | |
}); | |
}); | |
}; | |
allImages(function(globErr, images) { | |
async.each(images, normalize, imgNormalizationComplete); | |
}); | |
}; | |
var categoryMap = function(cat) { | |
switch (cat.toLowerCase().trim().replace(';', '')) { | |
case 's&t': return 'Science & Technology'; | |
case 'f&a': return 'Food & Agriculture'; | |
case 'env': return 'Environment'; | |
case 'p&s': return 'People & Society'; | |
default: return cat; | |
} | |
}; | |
var associatedImage = function(obj, imageComplete) { | |
var escapedTitle = superSlugify(obj.title) | |
.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); | |
var i; | |
var len; | |
var img; | |
var exp; | |
var search = function() { | |
i = 0; | |
len = cachedImages.length; | |
for (; i < len; i++) { | |
img = cachedImages[i]; | |
exp = new RegExp( | |
obj.year_begin + | |
'(?:-' + | |
obj.year_end + | |
')?' + | |
'-' + | |
escapedTitle + | |
'.+$' | |
); | |
if (exp.test(img)) { | |
return imageComplete(null, img); | |
} | |
} | |
imageComplete(null, false); | |
}; | |
process.nextTick(search); | |
}; | |
var processRow = function(row, callback) { | |
var obj = { | |
year_begin: row[0], | |
year_end: row[1], | |
category: row[2].split(' ').map(categoryMap), | |
title: row[3] || null, | |
description: row[4] || null, | |
image: null, | |
location: row[row.length - 1] || null | |
}; | |
if (typeof obj.year_end === 'number' && | |
typeof obj.year_begin === 'number' && | |
obj.title) { | |
associatedImage(obj, function(err2, img) { | |
if (img) { | |
obj.image = img; | |
} | |
data.push(obj); | |
callback(); | |
}); | |
} else { | |
callback(); | |
} | |
}; | |
var onComplete = function(err2) { | |
if (err2) { | |
throw err2; | |
} | |
fs.writeFile(DEST_FILE, JSON.stringify(data), function(writeErr) { | |
if (writeErr) { | |
throw writeErr; | |
} | |
console.info('Completed timeline milestones.'); | |
done(); | |
} | |
); | |
}; | |
if (err) { | |
return console.error(err); | |
} | |
normalizeImages(function(imageErr) { | |
if (imageErr) { | |
throw imageErr; | |
} | |
allImages(function(globErr, images) { | |
if (globErr) { | |
throw globErr; | |
} | |
cachedImages = images; | |
async.each(rows, processRow, onComplete); | |
}); | |
}); | |
}); | |
fs.createReadStream(join( | |
LOCAL_RAW_FILES, | |
'timeline-milestone/timeline-milestones.csv' | |
)).pipe(iconv.decodeStream('iso-8859-1')).pipe(parser); | |
} | |
steps.push(createProcessedFolder); | |
steps.push(milestoneData); | |
steps.push(heatmapdData); | |
steps.push(pointData); | |
steps.push(co2Data); | |
async.waterfall(steps, function _done(err) { | |
if (err) { | |
console.error(err); | |
} | |
console.info('Done!'); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment