Ming Tang crazyhottommy

## data-ggplot-evolution-vol1.r
library(tidyverse)

df_students <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-07/student_teacher_ratio.csv")

df_world_tile <- readr::read_csv("https://gist.githubusercontent.com/maartenzam/787498bbc07ae06b637447dbd430ea0a/raw/9a9dafafb44d8990f85243a9c7ca349acd3a0d07/worldtilegrid.csv") %>%
  mutate(
    ## Namibias two-digit country code is handled as `NA` - let us fix that
    alpha.2 = if_else(name == "Namibia", "NA", alpha.2),
    ## We are going to split "Americas" into "North America" and "Sout America"
    region = if_else(region == "Americas", sub.region, region),

## example_modulefile.lua
--[[
## Modulefile in lua syntax
## Author: Samir Amin

## Read about Lmod
## https://lmod.readthedocs.io/en/latest/015_writing_modules.html
## https://lmod.readthedocs.io/en/latest/050_lua_modulefiles.html
## https://lmod.readthedocs.io/en/latest/020_advanced.html
--]]

## auto-insert-python.el
(defun new-copyright ()
  "Generate new copyright string."
  (format-time-string "Copyright %Y Michael M. Hoffman <michael.hoffman@utoronto.ca>"))

(define-auto-insert 'python-mode
  `(
    "Description: "
    "#!/usr/bin/env python3.6" \n
    "\"\"\"" (setq basename (buffer-file-basename)) ": " str \n
    "\"\"\"" \n \n

## merge_then_call_consensus.sh
#!/bin/bash
#
# This script will find the consensus peak regions from peak files (in
# BED format) of multiple samples by:
#
# 1. Converting the peak file of each sample into non-overlapping 3
# cols BED file and concatenating them;
#
# 2. Sorting the concatenated file and Building a genome coverage
# track in BedGraph, of which the value (the 3rd col) indicates the

## iterm2-solarized.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                crazyhottommy
                / iterm2-solarized.md
            
            
              Created
              March 26, 2020 04:16
                — forked from kevin-smets/iterm2-solarized.md
            
              
                iTerm2 + Oh My Zsh + Solarized color scheme + Source Code Pro Powerline + Font Awesome + [Powerlevel10k] - (macOS)
              
          
    Default


Powerlevel10k


## Pkgdown deploy from Travis.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                crazyhottommy
                / Pkgdown deploy from Travis.md
            
            
              Created
              February 13, 2020 18:38
                — forked from gaborcsardi/Pkgdown deploy from Travis.md
            
              
                Walkthrough: deploy a pkgdown site from Travis 
              
          
    Run usethis::use_pkgdown_travis()

❯ usethis::use_pkgdown_travis()
✔ Setting active project to '/Users/gaborcsardi/works/ps'
✔ Adding 'docs/' to '.gitignore'
● Set up deploy keys by running `travis::use_travis_deploy()`
● Insert the following code in '.travis.yml'
  before_cache: Rscript -e 'remotes::install_cran("pkgdown")'

  
## BSdownload.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                crazyhottommy
                / BSdownload.md
            
            
              Created
              August 23, 2019 03:51
                — forked from lh3/BSdownload.md
            
              
                Download files from Illumina's BaseSpace
              
          
    References:

https://support.basespace.illumina.com/knowledgebase/articles/403618-python-run-downloader
https://developer.basespace.illumina.com/docs/content/documentation/rest-api/api-reference

Steps:

Follow steps 1-5 in the first link above to acquire access_token. This will take a while, but you only need to do this once. Never share this token!!
Find the file you want to download. Copy the link, which looks something like:
https://basespace.illumina.com/sample/9804795/files/tree/NA12878-L1_S1_L001_R1_001.fastq.gz?id=515013503. The "id" is the unique file identifier.
Download the file with: wget -O filename 'https://api.basespace.illumina.com/v1pre3/files/{id}/content?access_token={token}', where {token} is from step 1 and {id} from step 2.


## alevin.snk
DATASETS = ["PBMC_8K", "PBMC_4k"]

SALMON = "$BINS/salmon-0.14.0_linux_x86_64/bin/salmon"

rule all:
  input: expand("quants/{dataset}/alevin/quants_mat.gz", dataset=DATASETS)

rule salmon_quant:
    input:
        r1 = "reads/{sample}_1.fastq",

## check_phred.sh
# https://www.biostars.org/p/63225/

FILE=VLI9_AA_S60_L008_R1_001.fastq.gz

zcat $FILE | head -n 40 | awk '{if(NR%4==0) printf("%s",$0);}' |  od -A n -t u1 | awk 'BEGIN{min=100;max=0;}{for(i=1;i<=NF;i++) {if($i>max) max=$i; if($i<min) min=$i;}}END{if(max<=74 && min<59) print "Phred+33"; else if(max>73 && min>=64) print "Phred+64"; else if(min>=59 && min<64 && max>73) print "Solexa+64"; else print "Unknown score encoding";}'

## plot_aligned_series.R
#' When plotting multiple data series that share a common x axis but different y axes,
#' we can just plot each graph separately. This suffers from the drawback that the shared axis will typically
#' not align across graphs due to different plot margins.
#' One easy solution is to reshape2::melt() the data and use ggplot2's facet_grid() mapping. However, there is
#' no way to label individual y axes.
#' facet_grid() and facet_wrap() were designed to plot small multiples, where both x- and y-axis ranges are
#' shared acros all plots in the facetting. While the facet_ calls allow us to use different scales with
#' the \code{scales = "free"} argument, they should not be used this way.
#' A more robust approach is to the grid package grid.draw(), rbind() and ggplotGrob() to create a grid of
#' individual plots where the plot axes are properly aligned within the grid.
	library(tidyverse)

	df_students <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-07/student_teacher_ratio.csv")

	df_world_tile <- readr::read_csv("https://gist.githubusercontent.com/maartenzam/787498bbc07ae06b637447dbd430ea0a/raw/9a9dafafb44d8990f85243a9c7ca349acd3a0d07/worldtilegrid.csv") %>%
	mutate(
	## Namibias two-digit country code is handled as `NA` - let us fix that
	alpha.2 = if_else(name == "Namibia", "NA", alpha.2),
	## We are going to split "Americas" into "North America" and "Sout America"
	region = if_else(region == "Americas", sub.region, region),
	--[[
	## Modulefile in lua syntax
	## Author: Samir Amin

	## Read about Lmod
	## https://lmod.readthedocs.io/en/latest/015_writing_modules.html
	## https://lmod.readthedocs.io/en/latest/050_lua_modulefiles.html
	## https://lmod.readthedocs.io/en/latest/020_advanced.html
	--]]
	#!/bin/bash
	#
	# This script will find the consensus peak regions from peak files (in
	# BED format) of multiple samples by:
	#
	# 1. Converting the peak file of each sample into non-overlapping 3
	# cols BED file and concatenating them;
	#
	# 2. Sorting the concatenated file and Building a genome coverage
	# track in BedGraph, of which the value (the 3rd col) indicates the
	DATASETS = ["PBMC_8K", "PBMC_4k"]

	SALMON = "$BINS/salmon-0.14.0_linux_x86_64/bin/salmon"

	rule all:
	input: expand("quants/{dataset}/alevin/quants_mat.gz", dataset=DATASETS)

	rule salmon_quant:
	input:
	r1 = "reads/{sample}_1.fastq",
	# https://www.biostars.org/p/63225/

	FILE=VLI9_AA_S60_L008_R1_001.fastq.gz

	zcat $FILE \| head -n 40 \| awk '{if(NR%4==0) printf("%s",$0);}' \| od -A n -t u1 \| awk 'BEGIN{min=100;max=0;}{for(i=1;i<=NF;i++) {if($i>max) max=$i; if($i<min) min=$i;}}END{if(max<=74 && min<59) print "Phred+33"; else if(max>73 && min>=64) print "Phred+64"; else if(min>=59 && min<64 && max>73) print "Solexa+64"; else print "Unknown score encoding";}'
	#' When plotting multiple data series that share a common x axis but different y axes,
	#' we can just plot each graph separately. This suffers from the drawback that the shared axis will typically
	#' not align across graphs due to different plot margins.
	#' One easy solution is to reshape2::melt() the data and use ggplot2's facet_grid() mapping. However, there is
	#' no way to label individual y axes.
	#' facet_grid() and facet_wrap() were designed to plot small multiples, where both x- and y-axis ranges are
	#' shared acros all plots in the facetting. While the facet_ calls allow us to use different scales with
	#' the \code{scales = "free"} argument, they should not be used this way.
	#' A more robust approach is to the grid package grid.draw(), rbind() and ggplotGrob() to create a grid of
	#' individual plots where the plot axes are properly aligned within the grid.