Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Last active February 12, 2019 03:00
Show Gist options
  • Save erikgregorywebb/b59788ff403b5016063c920297255912 to your computer and use it in GitHub Desktop.
Save erikgregorywebb/b59788ff403b5016063c920297255912 to your computer and use it in GitHub Desktop.
rm(list=ls())
setwd("~/Documents/Python/so")
library(tidyverse)
# import
so = read.csv('so-salaries.csv', stringsAsFactors = F)
# clean
colnames(so) = c('Location', 'Education', 'Years of Experience',
'25th_Percentile', '50th_Percentile','75th_Percentile')
so$Education = ifelse(so$Education == 1, "Less than Bachelors",
ifelse(so$Education == 2, "Bachelors Degree",
ifelse(so$Education == 3, "Graduate Degree",
ifelse(so$Education == 4, "Post-Graduate Degree", "None"))))
so$`25th_Percentile` = as.numeric(gsub(",", "", so$`25th_Percentile`))
so$`50th_Percentile` = as.numeric(gsub(",", "", so$`50th_Percentile`))
so$`75th_Percentile` = as.numeric(gsub(",", "", so$`75th_Percentile`))
so$Location = gsub(',.*$', '', so$Location)
# reshape
so = so %>%
gather("Percentile", "Salary", -Location, -Education, -`Years of Experience`)
so$Percentile = substr(so$Percentile, 1, 4)
#export
write.csv(so, 'so-salaries-clean.csv', row.names = F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment