Skip to content

Instantly share code, notes, and snippets.

@sp00nman
sp00nman / zip_files.sh
Created March 28, 2017 11:06
Zip *.fastq (or any file) in directory
# Credits to Michael Schuster for this useful onliner
# find . -name '*.fastq' finds all fastqs in current directory
# -print --> print the processed files
# -exec gzip --best {} --> the gzip command for each file; gzips the file (keeps timestamp) and removes the orginial
# \ --> escapes the semicolon from bash
# ; --> end of exec command line
find . -name '*.fastq' -print -exec gzip --best {} \;
@sp00nman
sp00nman / id_conversion.R
Created January 15, 2016 17:36
Convert cufflinks gene ids to ensembl ids
#! /usr/bin/env Rscript
suppressPackageStartupMessages(expr = library(package = "rtracklayer"))
reference_granges <- import(con ='file1.gtf')
reference_frame <- unique(x = data.frame(
ensembl_gene_id = reference_granges$gene_id,
ensembl_transcript_id = reference_granges$transcript_id,
stringsAsFactors = FALSE))
rm(reference_granges)
@sp00nman
sp00nman / sort_bed.sh
Created October 23, 2015 12:09
sort bed file by chromosome; then by position
#!/bin/bash
BED_FILE=$1
sort -k1,1 -k2,2n $BED_FILE >${BED_FILE}_sorted.bed
## format
#chr1 12
#chr2 56
#chr10 13
@sp00nman
sp00nman / rsync_repository.sh
Last active July 29, 2023 08:42
rsync to synchronize repositories before committing to github
# Gist kindly provided by M.Schuster (https://github.com/mkschuster)
rsync \
--verbose \
--recursive \
--delete \
--times \
--rsh 'ssh' \
--exclude '*~' \
--exclude '.DS_Store' \
@sp00nman
sp00nman / reformat_bed_chr
Last active July 29, 2023 08:42
Reformat bed file (add chr to chromosome)
#Example_input.bed
#ENST00000456328 1 + 11868 14409 11868 11868 3 11868,12612,13220, 12227,12721,14409, DDX11L1
awk '{chrom=$2; gsub(chrom,"chr"chrom,$2)}{print $0}' Example_input.bed >Example_output.bed
#Example_output.bed
#ENST00000456328 chr1 + 11868 14409 11868 11868 3 11868,12612,13220, 12227,12721,14409, DDX11L1
@sp00nman
sp00nman / merge_mulitplexed_samples
Last active July 29, 2023 08:43
Finds files with the same ID and merges them into one single file.
#!/bin/bash
# UPD...unique patient ID; file with UPDs
# eg. H_0047C\nH_0060A\nH_0062D\n... \n newline separated
UPD=$1
# SPATH...search path
SPATH=$2
# FEXTENSION...file extension to search for [eg. .fastq.gz]
@sp00nman
sp00nman / calculate_intron_length_gtf
Last active July 29, 2023 08:42
Calculate intron length from gtf
#!/bin/awk -f
BEGIN{OFS="\t"}
{ start[NR]=$4;
end[NR]=$5;
strand[NR]=$7;
ID[NR]=$11;
chr[NR]=$1;
ens[NR]=$9;
symb[NR]=$10}
@sp00nman
sp00nman / merge_files_column
Created April 10, 2014 11:53
Merge different files by column with awk
awk -F"\t" 'NR==FNR {f1[$2]=$0;next}($4 in f1){print $0"\t"f1[$4]}' file1 file2
@sp00nman
sp00nman / gtf_transcript_length
Last active February 9, 2024 07:03
Calculate transcript length of GTF file
# resource: http://seqanswers.com/forums/showthread.php?t=4914
# Calculate length for each transcript for a GTF file
awk -F"\t" '
$3=="exon"
{
ID=substr($9, length($9)-16, 15);
L[ID]+=$5-$4+1
}
END{
for(i in L)