rm(list=ls()) | |
setwd("~/Documents/Python/so") | |
library(tidyverse) | |
# import | |
so = read.csv('so-salaries.csv', stringsAsFactors = F) | |
# clean | |
colnames(so) = c('Location', 'Education', 'Years of Experience', | |
'25th_Percentile', '50th_Percentile','75th_Percentile') | |
so$Education = ifelse(so$Education == 1, "Less than Bachelors", | |
ifelse(so$Education == 2, "Bachelors Degree", | |
ifelse(so$Education == 3, "Graduate Degree", | |
ifelse(so$Education == 4, "Post-Graduate Degree", "None")))) | |
so$`25th_Percentile` = as.numeric(gsub(",", "", so$`25th_Percentile`)) | |
so$`50th_Percentile` = as.numeric(gsub(",", "", so$`50th_Percentile`)) | |
so$`75th_Percentile` = as.numeric(gsub(",", "", so$`75th_Percentile`)) | |
so$Location = gsub(',.*$', '', so$Location) | |
# reshape | |
so = so %>% | |
gather("Percentile", "Salary", -Location, -Education, -`Years of Experience`) | |
so$Percentile = substr(so$Percentile, 1, 4) | |
#export | |
write.csv(so, 'so-salaries-clean.csv', row.names = F) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment