Last active
December 28, 2015 18:59
-
-
Save cigrainger/7546558 to your computer and use it in GitHub Desktop.
Clean up world development indicator data for analysis (esp in Stata)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
prep.wdi <- function(x,cache=FALSE,filename=paste(Sys.time()),country='name',format='wide'){ | |
require(reshape2) | |
if (!is.character(x)) {stop('x must be a string.')} | |
df <- read.csv(x) | |
if (names(df)[1] != 'Country.Name' | | |
names(df)[2] != 'Country.Code' | | |
names(df)[3] != 'Indicator.Name' | | |
names(df)[4] != 'Indicator.Code') { | |
stop('This is not a properly formatted WDI csv.') | |
} | |
if (country != 'name' & country != 'code') { | |
stop('Undefined country option. Must be name, code, or both.') | |
} | |
if (format != 'wide' & format != 'long') { | |
stop('Undefined format option. Must be wide or long.') | |
} | |
if (format == 'wide') { | |
if (country == 'name') { | |
df <- df[,c(1,3,5:ncol(df))] | |
df <- melt(df,id.vars=c('Country.Name','Indicator.Name')) | |
names(df)[3] <- "year" | |
df$year <- substr(df$year,2,5) | |
df <- dcast(df,Country.Name + year ~ Indicator.Name) | |
} | |
if (country == 'code') { | |
df <- df[,c(2,3,5:ncol(df))] | |
df <- melt(df,id.vars=c('Country.Code','Indicator.Name')) | |
names(df)[3] <- "year" | |
df$year <- substr(df$year,2,5) | |
df <- dcast(df,Country.Code + year ~ Indicator.Name) | |
} | |
if (country == 'both') { | |
df <- df[,c(1:3,5:ncol(df))] | |
df <- melt(df,id.vars=c('Country.Name','Country.Code','Indicator.Name')) | |
names(df)[4] <- "year" | |
df$year <- substr(df$year,2,5) | |
df <- dcast(df,Country.Name + Country.Code + year ~ Indicator.Name) | |
} | |
} | |
if (format == 'long') { | |
if (country == 'name') { | |
df <- df[,c(1,3,5:ncol(df))] | |
df <- melt(df,id.vars=c('Country.Name','Indicator.Name')) | |
names(df)[3] <- "year" | |
df$year <- substr(df$year,2,5) | |
} | |
if (country == 'code') { | |
df <- df[,c(2,3,5:ncol(df))] | |
df <- melt(df,id.vars=c('Country.Code','Indicator.Name')) | |
names(df)[3] <- "year" | |
df$year <- substr(df$year,2,5) | |
} | |
if (country == 'both') { | |
df <- df[,c(1:3,5:ncol(df))] | |
df <- melt(df,id.vars=c('Country.Name','Country.Code','Indicator.Name')) | |
names(df)[4] <- "year" | |
df$year <- substr(df$year,2,5) | |
} | |
} | |
if (cache == TRUE) { | |
write.csv(df, file=paste(filename,'csv',sep='.'), row.names=FALSE) | |
} | |
return(df) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment