Skip to content

Instantly share code, notes, and snippets.

@cigrainger
Last active December 28, 2015 18:59
Show Gist options
  • Save cigrainger/7546558 to your computer and use it in GitHub Desktop.
Save cigrainger/7546558 to your computer and use it in GitHub Desktop.
Clean up world development indicator data for analysis (esp in Stata)
prep.wdi <- function(x,cache=FALSE,filename=paste(Sys.time()),country='name',format='wide'){
require(reshape2)
if (!is.character(x)) {stop('x must be a string.')}
df <- read.csv(x)
if (names(df)[1] != 'Country.Name' |
names(df)[2] != 'Country.Code' |
names(df)[3] != 'Indicator.Name' |
names(df)[4] != 'Indicator.Code') {
stop('This is not a properly formatted WDI csv.')
}
if (country != 'name' & country != 'code') {
stop('Undefined country option. Must be name, code, or both.')
}
if (format != 'wide' & format != 'long') {
stop('Undefined format option. Must be wide or long.')
}
if (format == 'wide') {
if (country == 'name') {
df <- df[,c(1,3,5:ncol(df))]
df <- melt(df,id.vars=c('Country.Name','Indicator.Name'))
names(df)[3] <- "year"
df$year <- substr(df$year,2,5)
df <- dcast(df,Country.Name + year ~ Indicator.Name)
}
if (country == 'code') {
df <- df[,c(2,3,5:ncol(df))]
df <- melt(df,id.vars=c('Country.Code','Indicator.Name'))
names(df)[3] <- "year"
df$year <- substr(df$year,2,5)
df <- dcast(df,Country.Code + year ~ Indicator.Name)
}
if (country == 'both') {
df <- df[,c(1:3,5:ncol(df))]
df <- melt(df,id.vars=c('Country.Name','Country.Code','Indicator.Name'))
names(df)[4] <- "year"
df$year <- substr(df$year,2,5)
df <- dcast(df,Country.Name + Country.Code + year ~ Indicator.Name)
}
}
if (format == 'long') {
if (country == 'name') {
df <- df[,c(1,3,5:ncol(df))]
df <- melt(df,id.vars=c('Country.Name','Indicator.Name'))
names(df)[3] <- "year"
df$year <- substr(df$year,2,5)
}
if (country == 'code') {
df <- df[,c(2,3,5:ncol(df))]
df <- melt(df,id.vars=c('Country.Code','Indicator.Name'))
names(df)[3] <- "year"
df$year <- substr(df$year,2,5)
}
if (country == 'both') {
df <- df[,c(1:3,5:ncol(df))]
df <- melt(df,id.vars=c('Country.Name','Country.Code','Indicator.Name'))
names(df)[4] <- "year"
df$year <- substr(df$year,2,5)
}
}
if (cache == TRUE) {
write.csv(df, file=paste(filename,'csv',sep='.'), row.names=FALSE)
}
return(df)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment