R Cheatsheet
General
getwd()
Get Working Directory
setwd('~/Downloads')
Set Working Directory
ls()
List variables on Environment
dir()
List directories on Working Directory
list.files()
List files on Working Directory
rm('variable1')
Remove variable1 from Environment
rm(list = ls())
Remove all variables on Environment
identical(data1, data2)
colnames(data)
Get column names (also names(df)
on data frames)
rownames(data)
Get row names
data(name_dataset)
Load data set into Environment
Execute script from terminal: Rscript my_script.R
Load Data
read.csv('file.csv')
Read from CSV to data.frame
read.csv('file.tsv', sep = '\t')
Readm from TSV to data.frame
alumni <- read.csv(path_alumni, na.strings = c('-'), colClasses = c('character', 'character', 'numeric', 'numeric'))
Data Frames
subset(df, <condition>)
Example: subset(statesInfo, state.region == 1)
df[ROWS, COLUMNS]
Example: statesInfo[statesInfo$state.region == 1, ]
Example2: statesInfo[statesInfo$state.region == 1 & statesInfo$population > 3000, ]
nrow(df)
ncol(df)
by(data, factorlist, function)
Ex: by(pf$friend_count, pf$gender, summary)
Data Overview
str(data)
Structure of the data
summary(data)
Summary of the data
head(data)
tail(data)
For factor variables (categoricals)
table(variable)
levels(variable)
reddit$age.range <- ordered(reddit$age.range, levels = c('Under 18', '18-24', '25-34', '35-44', '45-54', '55-64', '65 or Above'))
reddit$income.range <- factor(reddit$income.range, levels = c("Under $20,000", "$20,000 - $29,999", "$30,000 - $39,999", "$40,000 - $49,999", "$50,000 - $69,999", "$70,000 - $99,999", "$100,000 - $149,999", "$150,000 or more"), ordered = T)
Update packages
update.packages(ask=FALSE, checkBuilt = TRUE)
Load R script from GitHub gists
library(devtools)
source_gist("524eade46135f6348140", filename = "ggplot_smooth_func.R")