Instantly share code, notes, and snippets.

Embed
What would you like to do?
rm(list=ls())
setwd("~/Documents/Python/so")
library(tidyverse)
# import
so = read.csv('so-salaries.csv', stringsAsFactors = F)
# clean
colnames(so) = c('Location', 'Education', 'Years of Experience',
'25th_Percentile', '50th_Percentile','75th_Percentile')
so$Education = ifelse(so$Education == 1, "Less than Bachelors",
ifelse(so$Education == 2, "Bachelors Degree",
ifelse(so$Education == 3, "Graduate Degree",
ifelse(so$Education == 4, "Post-Graduate Degree", "None"))))
so$`25th_Percentile` = as.numeric(gsub(",", "", so$`25th_Percentile`))
so$`50th_Percentile` = as.numeric(gsub(",", "", so$`50th_Percentile`))
so$`75th_Percentile` = as.numeric(gsub(",", "", so$`75th_Percentile`))
so$Location = gsub(',.*$', '', so$Location)
# reshape
so = so %>%
gather("Percentile", "Salary", -Location, -Education, -`Years of Experience`)
so$Percentile = substr(so$Percentile, 1, 4)
#export
write.csv(so, 'so-salaries-clean.csv', row.names = F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment