Skip to content

Instantly share code, notes, and snippets.

View Shians's full-sized avatar

Shian Su Shians

View GitHub Profile
# parallel iteration pattern for processing chunks of data in parallel
fetch_and_process <- function(get_data_chunk, process_func, reduce_func) {
n_workers <- future::nbrOfWorkers()
results <- list()
i <- 1
out_of_data <- FALSE
while(TRUE) {
for (. in 1:n_workers) {
@Shians
Shians / split_n_chunks.R
Created February 24, 2020 22:39
Function to split a data.frame in to n chunks
split_n_chunks <- function(x, n) {
stopifnot(is(x, "data.frame"))
f <- ceiling(seq_len(nrow(x)) / (nrow(x) / n))
split(x, f)
}
@Shians
Shians / extract_bam_tag.sh
Last active October 11, 2024 01:38
Extract BAM tag as tsv
#!/usr/bin/env bash
# called by
# sh extract_bam_tag.sh input.bam BC
# to print read_id and BC tag value
# two arguments, a bam file and the tag to extract
BAM=$1
TAG=$2
library(GenomicAlignments)
alignments <- GAlignments(
seqnames = Rle(factor(rep("chrX", 3))),
pos = c(37701723L, 37753335L, 37753335L),
cigar = c(
"24H6M1I13M2D5M1D12M1I16M3I6M2D20M1D18M1I34M2I116M2D9M1D9M1I30M1D9M1D44M6I3M2I4M2D19M9D7M1I5M1D18M1I17M2D1M1D16M1D32M1D3M1D18M1I3M1D49M2D34M2D8M1D7M2D1M1D5M1I2M5D4M1D2M1I3M2D10M1I21M1D20M2D1M1D2M1D18M2I3M2D74M1D9M1D5M2D34M1D12M2D20M1I2M1I35M1D10M2D16M1I55M3D82M1D6M1I65M2I8M1I30M1D7M2I15M1D15M1D20M1I2M1I14M1D29M3D12M2D3M1D5M1D20M1D21M2D56M1D7M4D2M1I60M2I9M1D49M2I5M2I28M13206H",
"6994H26M2I6M1I1M1I50M1D15M2I4M4D35M1D9M1D5M2D37M1I54M1D59M3I27M1I3M1D5M1I8M1I4M2I10M1I47M1D25M1I84M1D8M7D8M1D69M1I23M3D6M1D1M1D11M2D8M1I17M1D43M1I17M1D60M1I1M3D17M1D17M2D1M1D4M1I12M1D28M1D13M1D7M2I44M5D30M1D2M2D1M1D20M1D23M1I7M1D22M1D3M2D73M1I4M5D23M3D4M2D23M1D13M1D9M3D12M1I9M1D11M1D1M3I31M2I56M1D5M6461H",
"308H103M1D16M2D47M1D1M1D18M1D36M1I26M3I51M1D2M1I8M1D9M1D17M3I53M1D15M1D7M3I22M1D60M3I10M2D20M7D8M1D65M2I16M1D12M1D6M1D11M3D4M1D4M1I8M1I31M3I7M2I5M1I10
@Shians
Shians / generate_nanopolish_index.R
Last active August 29, 2019 01:14
Generate nanopolish index in parallel
#!/usr/bin/env Rscript
library(fs)
library(rhdf5)
library(parallel)
library(purrr)
library(stringr)
library(dplyr)
library(tidyr)
index_fast5 <- function(fastq_file, fast5_dir) {
@Shians
Shians / Roxygen2_tags.md
Last active August 9, 2019 07:43
Roxygen2 Tags

This gist is a summary of roxygen2 tags, see roxygen2 docs for source of information.

Header Section

The initial three sections are automatically placed into title, description and details fields of the documentation. The details field is optional.

#' Title goes here
#'
#' Description goes here
@Shians
Shians / gist:827a20ad8a4896cab76cde95a0dd3d05
Last active July 12, 2019 07:37
Zsh-fish style command prompt
echo 'export PS1="\W \[\e[1;31m\]❯\[\e[1;33m\]❯\[\e[1;32m\]❯\[\e[0m\] "' >> ~/.bash_profile
. ~/.bash_profile
@Shians
Shians / ggplot2_custom_theme.R
Last active March 8, 2019 00:47
My preferred options for ggplot2
theme_set(
theme_classic() +
theme(
panel.grid.major = element_line(colour = "gray"),
plot.title = element_text(
face = "plain",
size = rel(20/12),
hjust = 1/2,
margin = margin(t = 10, b = 20)
)
@Shians
Shians / lorem_ipsum.R
Last active March 1, 2019 02:50
Some gibberish that looks like code
lorem <- function(ipsum) {
dolor <- sit(amet, consectetur)
dolor <- adipiscing(dolor, elit, sed, do)
# aliqua Ut enim ad minim veniam, quis nostrud exercitation
eiusmod <- tempor(incididunt, ut)
labore <- et(dolor, magna)
# ullamco laboris nisi ut aliquip ex ea commodo consequat duis aute
reprehenderit <- numeric()
@Shians
Shians / google.R
Created March 1, 2019 00:29
Function to search google from R
# search google for query
google <- function(query) {
browseURL(glue::glue("https://www.google.com/search?q={URLencode(query)}"))
}