Skip to content

Instantly share code, notes, and snippets.

View dinovski's full-sized avatar

Dina dinovski

  • New York Genome Center
  • New York, NY
View GitHub Profile
@dinovski
dinovski / addallgt.py
Last active September 7, 2018 20:25
edit genotype in gvcf
#!/usr/bin/env python
## get individual genotype from raw vcf
## output is tab-delimited chr \t start \t end \t ref \t alt \t GT \t ref,alt
usage="""
read in tab-delimited bedfile: chr,start,end,ref,alt,gt
usage: python addallgt.py <infile> <outfile>
"""
import sys
@dinovski
dinovski / sanger2fastq.py
Last active February 4, 2023 16:59
convert sanger output to fastq
#! /usr/bin/env python
import os
import sys
try:
SEQ_DIR = sys.argv[1]
except:
sys.stderr.write("Usage: python sanger2fastq.py <directory with.seq files>\n")
sys.exit(1)
@dinovski
dinovski / n50.py
Last active June 21, 2022 00:25
calculate N50 from fasta/contigs file
#!/usr/bin/env python
## calculate N50 from fasta file
## N50 = contig length such that half of the contigs are longer and 1/2 of contigs are shorter
import commands
import sys
import os
from itertools import groupby
import numpy
@dinovski
dinovski / 96well_alpha2numeric.py
Created February 16, 2018 14:01
convert 96 well name to numeric position
#!/usr/bin/env python
import csv
import string
import datetime
import random
import json
import re
import sys
import os
@dinovski
dinovski / getTSS.sh
Last active October 17, 2018 12:16
TSS +/- N bp from GTF > BED6
#!/bin/bash
BEDTOOLS=/usr/bin/bedtools/bin
ODIR=/data/dbase/
GENOME=hg19
SLOP=3000
SLOP_OPTS='-l 3000 -r 3000'
GENE_BED=${IDIR}/${GENOME}_3kb.bed
@dinovski
dinovski / rnaseq_report.Rmd
Last active August 12, 2018 14:21
differential expression analysis markdown
---
title: "__RNAseq Summary__"
output: pdf_document
date: '`r Sys.Date()`'
documentclass: article
classoption: a4paper
geometry: margin=2cm
---
```{r setup, include=FALSE}
@dinovski
dinovski / dbSNPclean.py
Last active September 8, 2018 21:40
get common variants from dbSNP
#/usr/bin/env/ python
usage="""
# get dbSNP file
wget --timestamping 'ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/snp147Common.txt.gz' -O snp147Common.txt.gz
python dbsnp_clean.py <snp147Common.txt> <snp147common_clean.bed>
"""
if len(sys.argv) != 3:
print usage
@dinovski
dinovski / iupac_change.py
Last active September 7, 2018 20:03
change IUPAC nucleotide code to nucleotide
#!/usr/bin/env python
usage="""
iupac_change.py <infile> <outfile>
change IUPAC nucleotide codes for tab delimited file with CHR, ID, REF, ALT columns
"""
import sys
import os
@dinovski
dinovski / tstv.sh
Last active April 12, 2023 14:19
compute transition/transversion rate across all sites in a VCF file
#!/bin/bash
## calculate Ts/Tv across all sites (includes AC=0)
## file must be b/gzipped
## ./tstv.sh file.vcf.gz
VCF=$1
# get count for transitions:
ag=$(zcat ${VCF} | awk '! /\#/' | awk '{if(length($4) == 1 && length($5) == 1) print}' | \
@dinovski
dinovski / qualDist.py
Last active September 8, 2018 21:38
quality score distribution from fastq
#!/usr/bin/env python
usage="""
## quality score distribution from fastq
gunzip -c fastq.gz | python qualDist.py
"""
import sys
num = 0