Skip to content

Instantly share code, notes, and snippets.

View mbk0asis's full-sized avatar

Byungkuk Min mbk0asis

  • Korea Research Institute of Bioscience and Biotechnology (KRIBB)
  • Daejeon, S.Korea
  • 04:54 (UTC +09:00)
View GitHub Profile
@mbk0asis
mbk0asis / enzymeFinder.R
Last active June 29, 2023 09:19
Find restriction enzyme sites in a genome
## Find restriction enzyme sites in a genome
library(Biostrings)
library(GenomicRanges)
# Read the genome sequence from a FASTA file
genome <- readDNAStringSet("/home/sc/00--NGS/ANNOTATION/Homo_sapiens_UCSC_GRCh38/Genome/genome.fa")
head(genome)
# enzyme sequence
@mbk0asis
mbk0asis / Add a space in a boxplot
Created August 10, 2020 01:13
To add a space to separate groups
df <- data.frame(cntNorm2$id_1,
mean_WT_PBMC_2m,mean_WT_PBMC_24m,mean_WT_PBMC_28m,
mean_HD_PBMC_Young_F, mean_HD_PBMC_Young_M, mean_HD_PBMC_Old_F, mean_HD_PBMC_Old_M)
# add a fake column with extremely large value
df$empty <- c(rep(100000,nrow(df)))
# turns outliers (> upper quatile * 5) into NAs
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages(c("BiocManager","dplyr","gplots"))
BiocManager::install(c("GEOquery"))
#########################################################################
# load libraries
library(Biobase)
library(GEOquery)
library(limma)
library(reshape2)
library(ggplot2)
library(ggpubr)
library(tidyverse)
library(gridExtra)
library(Biobase)
library(GEOquery)
library(limma)
library(matrixStats)
#!/bin/bash
# Vatiants calling using 'bcftools mpileup - htslib'
export LC_ALL=C
printf "*** SNVcounter\n
*** 'samtools, bcftools, and bedtools' must be installed to run this script\n
*** Prepare 'reference fasta' and 'SNV list' in BED format\n
*** merge consecutive SNVs using\n
*** 'sort -k1,1 -k2,2n SNV.bed | mergeBed -c 4 -o collapse'\n
BiocUpgrade()
source("http://bioconductor.org/biocLite.R")
biocLite(c("DESeq2","gplots","pcaExplorer","bovine.db","calibrate","AnnotationFuncs","gage","Rtsne","ggrepel"))
#library(TCGAbiolinks)
library(DESeq2)
library(pcaExplorer)
library(ggfortify)
library(ggplot2)
library(gplots)
@mbk0asis
mbk0asis / TCGAbiolinks - DESeq2 pipeline
Created May 20, 2019 08:20
TCGAbiolinks - DESeq2 pipeline
library(ggplot2)
library(ggpmisc)
library(ggpubr)
library(ggrepel)
library(reshape2)
setwd("~/BIO2/DAM_ID/DamID_2018/SE/mm9")
setwd("~/BIO2/DAM_ID/RNAseq_shDnmt1/mm9")
te = "LINE.5kb"
####################################################
# #
# to report base composition at all mapped sites #
# #
####################################################
# only when NM tag is missing in BAM
$ samtools calmd BAM ref.fasta | samtools view -b - -o BAM.NMtag.bam
# bam-readcount (use "-l" for regions of interest, "-w 0" for no warning)
# index the genome
$ bwa index hs37d5.fa
# create a genome dictionary file
$ picard-tools CreateSequenceDictionary R=genome.fa O=genome.dict
# align reads using 'bwa mem'
$ls -1 *.gz | cut -d. -f1 | sort | uniq | while read l; do bwa mem -t 40 ~/00-NGS/Annotation/HipSci/hs37d5.fa $l.1.val.1.fq.gz $l.2.val.2.fq.gz > $l.PE.sam ; done &
# sam to bam