Skip to content

Instantly share code, notes, and snippets.

@agalea91
Last active September 8, 2016 05:12
Show Gist options
  • Save agalea91/6fe51335315b69d908583de44cb5e3a0 to your computer and use it in GitHub Desktop.
Save agalea91/6fe51335315b69d908583de44cb5e3a0 to your computer and use it in GitHub Desktop.
Tips and tricks with R
### Miscellaneous tips for using R
# Select a set of columns from a dataframe
wage_red = wage %>% select(-sibs, -brthord, -meduc, -feduc)
wage_subset = wage %>% select(hours, lwage)
# Use q + dist type to get confidence interval
beta_iq_mean = summary(lm.wage_1)$coefficients[2]
beta_iq_std = summary(lm.wage_1)$coefficients[4]
qt(p = c(0.025, 0.975), df = 933)*beta_iq_std + beta_iq_mean # t = student-t dist., can also use e.g. qnorm
# Alternate method for confidence intervel (unsure about this one, but worth trying)
ci = confint(BPM_pred_lwage, param="pred") # see bas.lm example below, BPM_pred_lwage is basically a linear model
top_y = which.max(BPM_pred_lwage$fit)
ci[top_y, ]
# Adjust plotting environment to display multiple side-by-side
par(mfrow = c(1,3)) # plot 3 in a row
# Model prediction with BAS library lm
lm.wage = bas.lm(formula = lwage ~ . - wage,
data = na.omit(wage),
prior = "BIC", modelprior = uniform())
lm.wage
summary(lm.wage)
BPM_pred_lwage = predict(bma_lwage, estimator="BPM", # BPM = "best predictive model", alt. MPM (median probability), HPP (highest posterior)
se.fit=TRUE)
bma_lwage$namesx[BPM_pred_lwage$bestmodel+1]
# Print easy to read dataframe
t( put dataframe here )
# Getting features correspond to max element (this trick would work for numpy arrays in python as well)
y = BPM_pred_lwage$fit # Getting predicted values of y for training features
top_y = which.max(y)
X = na.omit(wage) # X is a dataframe contining the features
t(X[top_y, ])
# Useful dplyr functions
df.diamonds_ideal <- filter(diamonds, cut=="Ideal") # filter on a given column
df.diamonds_ideal <- select(df.diamonds_ideal, carat, cut, color, price, clarity) # select a set of columns
df.diamonds_ideal <- mutate(df.diamonds_ideal, price_per_carat = price/carat) # create a new column
arrange(df.disordered_data, num_var) # sort by a given column
arrange(df.disordered_data, desc(num_var)) # sort in reversed order (big to small)
df.diamonds_ideal_chained <- diamonds %>% # chaining where the output of each command becomes the input for the next
filter(cut=="Ideal") %>%
select(carat, cut, color, price, clarity) %>%
mutate(price_per_carat = price/carat)
# Equivalent of sns pairplot
library(ggplot2)
library(GGally)
ggpairs(iris, colour='Species', alpha=0.4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment