Last active
September 8, 2016 05:12
-
-
Save agalea91/6fe51335315b69d908583de44cb5e3a0 to your computer and use it in GitHub Desktop.
Tips and tricks with R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Miscellaneous tips for using R | |
# Select a set of columns from a dataframe | |
wage_red = wage %>% select(-sibs, -brthord, -meduc, -feduc) | |
wage_subset = wage %>% select(hours, lwage) | |
# Use q + dist type to get confidence interval | |
beta_iq_mean = summary(lm.wage_1)$coefficients[2] | |
beta_iq_std = summary(lm.wage_1)$coefficients[4] | |
qt(p = c(0.025, 0.975), df = 933)*beta_iq_std + beta_iq_mean # t = student-t dist., can also use e.g. qnorm | |
# Alternate method for confidence intervel (unsure about this one, but worth trying) | |
ci = confint(BPM_pred_lwage, param="pred") # see bas.lm example below, BPM_pred_lwage is basically a linear model | |
top_y = which.max(BPM_pred_lwage$fit) | |
ci[top_y, ] | |
# Adjust plotting environment to display multiple side-by-side | |
par(mfrow = c(1,3)) # plot 3 in a row | |
# Model prediction with BAS library lm | |
lm.wage = bas.lm(formula = lwage ~ . - wage, | |
data = na.omit(wage), | |
prior = "BIC", modelprior = uniform()) | |
lm.wage | |
summary(lm.wage) | |
BPM_pred_lwage = predict(bma_lwage, estimator="BPM", # BPM = "best predictive model", alt. MPM (median probability), HPP (highest posterior) | |
se.fit=TRUE) | |
bma_lwage$namesx[BPM_pred_lwage$bestmodel+1] | |
# Print easy to read dataframe | |
t( put dataframe here ) | |
# Getting features correspond to max element (this trick would work for numpy arrays in python as well) | |
y = BPM_pred_lwage$fit # Getting predicted values of y for training features | |
top_y = which.max(y) | |
X = na.omit(wage) # X is a dataframe contining the features | |
t(X[top_y, ]) | |
# Useful dplyr functions | |
df.diamonds_ideal <- filter(diamonds, cut=="Ideal") # filter on a given column | |
df.diamonds_ideal <- select(df.diamonds_ideal, carat, cut, color, price, clarity) # select a set of columns | |
df.diamonds_ideal <- mutate(df.diamonds_ideal, price_per_carat = price/carat) # create a new column | |
arrange(df.disordered_data, num_var) # sort by a given column | |
arrange(df.disordered_data, desc(num_var)) # sort in reversed order (big to small) | |
df.diamonds_ideal_chained <- diamonds %>% # chaining where the output of each command becomes the input for the next | |
filter(cut=="Ideal") %>% | |
select(carat, cut, color, price, clarity) %>% | |
mutate(price_per_carat = price/carat) | |
# Equivalent of sns pairplot | |
library(ggplot2) | |
library(GGally) | |
ggpairs(iris, colour='Species', alpha=0.4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment