This Gist was automatically created by Carbide, a free online programming environment.
You can view a live, interactive version of this Gist here.(http://alpha.trycarbide.com/anonymous/0d4c73367f3234f45a39980d1ec7d2f1).
import './system_config.js' | |
var xlsx = require('xlsx') /// Let's start by defining a function that can get us an xls file remotely. | |
export default function getXls(url){ | |
var f = new FileReader() | |
fetch(url) | |
.then(x => x.blob()) | |
.then(x => f.readAsBinaryString(x)) | |
return new Promise((resolve, reject) => { | |
f.onloadend = () => resolve(xlsx.read(f.result, {type:"binary"})) | |
}) | |
} |
import getXls from './cell0.js'; | |
window.sheet; /// Now, we grab the file and stick it in a global variable 'sheet' | |
var res = await getXls('https://commondatastorage.googleapis.com/ckannet-storage/2012-08-14T085537/GRAIN---Land-grab-deals---Jan-2012.xls') | |
window.sheet = res.Sheets.Sheet1 |
window.cols = {} /// 'sheet' is a giant object right now, but we'd really like it as a json list. to get it there, let's start by extracting the names of the columns. | |
Object.keys(window.sheet) | |
.filter(key => key.slice(1) === '1') | |
.forEach(key => cols[key[0]] = sheet[key].v) | |
window.cols |
window.json = [] /// Now that we have the column names, let's go ahead and fill in the rest of our json list. | |
Object.keys(window.sheet).forEach(key => { | |
var row = Number(key.slice(1)) | |
if(!row || row < 2) return; | |
var rowIndex = row - 2 | |
var colIndex = key[0] | |
json[rowIndex] = json[rowIndex] || {} | |
json[rowIndex][window.cols[colIndex]] = window.sheet[key].v | |
}) | |
json |
var investmentRules = [] /// Now let's normalize the invenstments! | |
window.normalizeInvestment = function normalizeInvestment(investment){ | |
for (var rule of investmentRules){ | |
if(rule(investment)) return rule(investment) | |
} | |
return false | |
} | |
function normalized(){ | |
return json.map(row => row['Projected investment']) | |
.filter(x=>x) | |
.map(normalizeInvestment) | |
} | |
function stillAbnormal(){ | |
return json.map(row => row['Projected investment']) | |
.filter(x=>x) | |
.filter(x => !normalizeInvestment(x)) | |
} | |
stillAbnormal() | |
investmentRules.push( investment => { | |
var match = investment.match(/^US\$(\d+(\.\d+)?)\s+million/) | |
return match ? Number(match[1]) : false | |
}) | |
stillAbnormal() | |
investmentRules.push( investment => { | |
var match = investment.match(/^US\$(\d)+,(\d+)\s+million/) | |
return match ? Number(match[1] + match[2]) : false | |
}) | |
stillAbnormal() | |
investmentRules.push( investment => { | |
var match = investment.match(/^US\$(\d+(\.\d+)?)\s+billion/) | |
return match ? Number(match[1])*1000 : false | |
}) | |
stillAbnormal().join('\n') | |
json.filter(row => row['Projected investment'] && row['Projected investment'].match(/\/yr/)) | |
investmentRules.push( investment => { /// Let's do some special cases!! Woohoo!! | |
switch(investment){ | |
case "US$30-35 million": | |
return 32 | |
case "US$4/ha/yr (lease)": | |
return 7 /// $35 yrs \*50000 ha | |
case "US$57,600 (US$0.80/ha)": | |
return .0576 | |
case "US$125,000/yr (land lease)": | |
return 3 /// $125,000 a year for 25 years | |
case "US$8/ha/yr (lease)": | |
return 5.4 | |
case "US$1.2/ha/yr (after first 7 years) in Gambela and US$8/ha/yr (after first 6 years) in Bako": | |
return 10 /// I am not patient enough to figure this one out lol. Hopefully it's around 10 | |
} | |
return false | |
}) | |
stillAbnormal() |
window.cleaned = [] | |
window.json.forEach(row => { | |
var investment = row['Projected investment'] | |
var cleaned_row = {...row} | |
delete cleaned_row['Projected investment'] | |
cleaned_row['Projected Investment'] = 'unknown' | |
if(investment) { | |
cleaned_row['Projected Investment'] = normalizeInvestment(investment) | |
} | |
cleaned.push(cleaned_row) | |
}) | |
window.cleaned |
This Gist was automatically created by Carbide, a free online programming environment.
You can view a live, interactive version of this Gist here.(http://alpha.trycarbide.com/anonymous/0d4c73367f3234f45a39980d1ec7d2f1).
import {say} from 'cowsay' | |
say({text: "hello there!\n \nWelcome to the Carbide Alpha\nRelease Data Cleaning\nExample Notebook!\n \nJoin us for an interactive\njourney through an excel file!\n\nRun each of the following\ncells in order!\n \nSend bugs to\nbugs@trycarbide.com!\n \nSend hellos to \nhello@trycarbide.com! "}) |
System.config({ | |
"meta": { | |
'https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.8.0/xlsx.full.min.js': { | |
exports: 'XLSX', | |
format: 'global' | |
} | |
}, | |
"map": { | |
"xlsx": 'https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.8.0/xlsx.full.min.js' | |
} | |
}) |