sellorm/sortinghat-test.R

## sortinghat-test.R
# run unique baby names from the babynames package through the sorting hat
# used in http://blog.sellorm.com/2017/12/21/command-line-utilities-in-r-pt-4/
# to check the distribution of results

# house lookup ------------------------------------------------------------
houses <- c("0" = "Hufflepuff",
            "1" = "Gryffindor",
            "2" = "Ravenclaw",
            "3" = "Slytherin",
            "4" = "Hufflepuff",
            "5" = "Gryffindor",
            "6" = "Ravenclaw",
            "7" = "Slytherin",
            "8" = "Hufflepuff",
            "9" = "Gryffindor",
            "a" = "Ravenclaw",
            "b" = "Slytherin",
            "c" = "Hufflepuff",
            "d" = "Gryffindor",
            "e" = "Ravenclaw",
            "f" = "Slytherin"
)


# get unique names --------------------------------------------------------
student_names <- unique(babynames::babynames$name)


# Get house name ----------------------------------------------------------
get_house <- function(student_name){
  name_hash <- digest::sha1(tolower(student_name))
  house_index <- substr(name_hash, 1, 1)
  house <- houses[house_index]
  return(house)
}


# main --------------------------------------------------------------------
house_results <- lapply(student_names, get_house)
housedf <- data.frame(cbind(house_results), stringsAsFactors = TRUE)
dplyr::count(housedf, unlist(housedf$house_results))
	# run unique baby names from the babynames package through the sorting hat
	# used in http://blog.sellorm.com/2017/12/21/command-line-utilities-in-r-pt-4/
	# to check the distribution of results

	# house lookup ------------------------------------------------------------
	houses <- c("0" = "Hufflepuff",
	"1" = "Gryffindor",
	"2" = "Ravenclaw",
	"3" = "Slytherin",
	"4" = "Hufflepuff",
	"5" = "Gryffindor",
	"6" = "Ravenclaw",
	"7" = "Slytherin",
	"8" = "Hufflepuff",
	"9" = "Gryffindor",
	"a" = "Ravenclaw",
	"b" = "Slytherin",
	"c" = "Hufflepuff",
	"d" = "Gryffindor",
	"e" = "Ravenclaw",
	"f" = "Slytherin"
	)


	# get unique names --------------------------------------------------------
	student_names <- unique(babynames::babynames$name)


	# Get house name ----------------------------------------------------------
	get_house <- function(student_name){
	name_hash <- digest::sha1(tolower(student_name))
	house_index <- substr(name_hash, 1, 1)
	house <- houses[house_index]
	return(house)
	}


	# main --------------------------------------------------------------------
	house_results <- lapply(student_names, get_house)
	housedf <- data.frame(cbind(house_results), stringsAsFactors = TRUE)
	dplyr::count(housedf, unlist(housedf$house_results))