Skip to content

Instantly share code, notes, and snippets.

@variani
variani / r-read.R
Created August 21, 2020 13:00
R tidyverse
library(tidyverse)
# read selected columns: specify columns through 'col_types'
readr::read_csv("iris.csv", col_types = cols_only( Species = col_factor(c("setosa", "versicolor", "virginica"))))
@variani
variani / r-adjacency-matrix.R
Last active October 6, 2020 20:05
Grpahs with R package igraph
# Sparse Matrices in R: https://slowkow.com/notes/sparse-matrix/
library(igraph)
library(Matrix)
library(tidyverse)
#-------------------------------
# Adjacency matrix using Matrix
#-------------------------------
@variani
variani / commands.sh
Last active August 17, 2020 17:53
Bash scripts
#--------------------------------
# file operations with patterns
#--------------------------------
# list/delete files output/{chr}.out such that {chr} = 1-10 (other files with {chr} = 11-22 are untouched)
ls output/{1..10}.*
#--------------------
# misc. caveats
@variani
variani / convert-formats.sh
Last active April 2, 2024 18:08
plink commands
# ------------------
# from bgen to bed
# ------------------
plink2 --bgen myfile.bgen \
# filter by chr or by snps names
--chr 1 --extract {params.snps} \
--make-bed --out myoutput
# 1. plink will create temporary/intermediate files such as *-temporary.{pgen,psam,pvar}
# 2. sample ids in .fam will be `ID1_ID1`
@variani
variani / formattable-formatters.R
Last active April 22, 2020 13:57
r-print-tables.R
# print NAs as blank: https://stackoverflow.com/a/44348232
@variani
variani / r-query-tables.R
Last active March 26, 2020 20:40
r-query.R
# Sorting keys before look up speeds up queries.
# data.table outperforms over base R and tidyverse
#
# Copied from: https://appsilon.com/fast-data-lookups-in-r-dplyr-vs-data-table/
#
# Benchmarks:
#Unit: microseconds
# expr min lq mean median uq
# look_up_base() 50762.103 53147.945 105500.0550 60221.954 62802.136
# look_up_dplyr() 47527.238 49218.827 99142.9265 53923.058 58269.192
@variani
variani / r-lm-pval.R
Created March 24, 2020 20:22
Linear regression in R
library(magrittr)
data(mtcars)
lm(mpg ~ am, mtcars) %>% summary %$% coefficients
# Estimate Std. Error t value Pr(>|t|)
#(Intercept) 17.147368 1.124603 15.247492 1.133983e-15
#am 7.244939 1.764422 4.106127 2.850207e-04
z <- 4.106127
drop <- function(X)
{
qr <- qr(X)
r <- qr$rank # matrix rank
cols_keep <- qr$pivot[seq(r)] # first r indices in `qr$pivot`
#cols_drop <- qr$pivot[-seq(r)] # last (ncol(X) - r) indices in `qr$pivot`
X[, cols_keep, drop = FALSE] # use drop = FALSE in single-column case
}
# References
# - http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/118-principal-component-analysis-in-r-prcomp-vs-princomp/
## inc
library(tidyverse)
library(pls)
library(ggplot2)
library(cowplot)
theme_set(theme_cowplot())
## examples to read tables by data.table::fread
# read selected columns
fread(my_file, select = c("ID", "FID"))
# read selected columns & specify colunm classes
fread(my_file, select = c("ID", "FID"), colClasses = c(IID = "character"))