Skip to content

Instantly share code, notes, and snippets.

@rcdilorenzo
Last active February 16, 2018 16:25
Show Gist options
  • Save rcdilorenzo/6af6b1f1c9460ae264bd754511c6ff86 to your computer and use it in GitHub Desktop.
Save rcdilorenzo/6af6b1f1c9460ae264bd754511c6ff86 to your computer and use it in GitHub Desktop.
(GIF: https://git.io/vAC88, Data: http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data) Label columns as factors using a bit of Vim/Spacemacs foo
# Assumes an existing data frame called `raw`
# that is directly imported from the URL and
# has not converted the "?" values to NA's
colnames(raw) <- c("Type", "CapShape", "CapSurface", "CapColor", "Bruises",
"Odor", "GillAttachment", "GillSpacing", "GillSize",
"GillColor", "StalkShape", "StalkRoot",
"StalkSurfaceAboveRing", "StalkSurfaceBelowRing",
"StalkColorAboveRing", "StalkColorBelowRing", "VeilType",
"VeilColor", "RingNumber", "RingType", "SporePrintColor",
"Population", "Habitat")
raw$Type <- factor(raw$Type,
levels = c("e", "p"),
labels = c("edible", "poisonous"))
raw$CapShape <- factor(raw$CapShape,
levels = c("b", "c", "x", "f", "k", "s"),
labels = c("bell", "conical", "convex", "flat",
"knobbed", "sunken"))
raw$CapSurface <- factor(raw$CapSurface,
levels = c("f", "g", "y", "s"),
labels = c("fibrous", "grooves", "scaly", "smooth"))
raw$CapColor <- factor(raw$CapColor,
levels = c("n", "b", "c", "g", "r", "p", "u", "e", "w",
"y"),
labels = c("brown", "buff", "cinnamon", "gray", "green",
"pink", "purple", "red", "white", "yellow"))
raw$Bruises <- factor(raw$Bruises,
levels = c("t", "f"),
labels = c("bruises", "no"))
raw$Odor <- factor(raw$Odor,
levels = c("a", "l", "c", "y", "f", "m", "n", "p", "s"),
labels = c("almond", "anise", "creosote", "fishy", "foul",
"musty", "none", "pungent", "spicy"))
raw$GillAttachment <- factor(raw$GillAttachment,
levels = c("a", "d", "f", "n"),
labels = c("attached", "descending", "free",
"notched"))
raw$GillSpacing <- factor(raw$GillSpacing,
levels = c("c", "w", "d"),
labels = c("close", "crowded", "distant"))
raw$GillSize <- factor(raw$GillSize,
levels = c("b", "n"),
labels = c("broad", "narrow"))
raw$GillColor <- factor(raw$GillColor,
levels = c("k", "n", "b", "h", "g", "r", "o", "p", "u",
"e", "w", "y"),
labels = c("black", "brown", "buff", "chocolate", "gray",
"green", "orange", "pink", "purple", "red",
"white", "yellow"))
raw$StalkShape <- factor(raw$StalkShape,
levels = c("e", "t"),
labels = c("enlarging", "tapering"))
raw$StalkRoot <- factor(raw$StalkRoot,
levels = c("b", "c", "u", "e", "z", "r", "?"),
labels = c("bulbous", "club", "cup", "equal",
"rhizomorphs", "rooted", "missing"))
raw$StalkSurfaceAboveRing <- factor(raw$StalkSurfaceAboveRing,
levels = c("f", "y", "k", "s"),
labels = c("fibrous", "scaly", "silky",
"smooth"))
raw$StalkSurfaceBelowRing <- factor(raw$StalkSurfaceBelowRing,
levels = c("f", "y", "k", "s"),
labels = c("fibrous", "scaly", "silky",
"smooth"))
raw$StalkColorAboveRing <- factor(raw$StalkColorAboveRing,
levels = c("n", "b", "c", "g", "o", "p", "e",
"w", "y"),
labels = c("brown", "buff", "cinnamon", "gray",
"orange", "pink", "red", "white",
"yellow"))
raw$StalkColorBelowRing <- factor(raw$StalkColorBelowRing,
levels = c("n", "b", "c", "g", "o", "p", "e",
"w", "y"),
labels = c("brown", "buff", "cinnamon",
"gray", "orange", "pink", "red",
"white", "yellow"))
raw$VeilType <- factor(raw$VeilType,
levels = c("p", "u"),
labels = c("partial", "universal"))
raw$VeilColor <- factor(raw$VeilColor,
levels = c("n", "o", "w", "y"),
labels = c("brown", "orange", "white", "yellow"))
raw$RingNumber <- factor(raw$RingNumber,
levels = c("n", "o", "t"),
labels = c("none", "one", "two"))
raw$RingType <- factor(raw$RingType,
levels = c("c", "e", "f", "l", "n", "p", "s", "z"),
labels = c("cobwebby", "evanescent", "flaring",
"large", "none", "pendant", "sheathing",
"zone"))
raw$SporePrintColor <- factor(raw$SporePrintColor,
levels = c("k", "n", "b", "h", "r", "o", "u",
"w", "y"),
labels = c("black", "brown", "buff", "chocolate",
"green", "orange", "purple", "white",
"yellow"))
raw$Population <- factor(raw$Population,
levels = c("a", "c", "n", "s", "v", "y"),
labels = c("abundant", "clustered", "numerous",
"scattered", "several", "solitary"))
raw$Habitat <- factor(raw$Habitat,
levels = c("g", "l", "m", "p", "u", "w", "d"),
labels = c("grasses", "leaves", "meadows", "paths",
"urban", "waste", "woods"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment