André F. Rendeiro afrendeiro

## mast2tsv.py
#!/usr/bin/env python

from argparse import ArgumentParser
from BeautifulSoup import BeautifulSoup
import csv

# argparser
parser = ArgumentParser(description = 'Parses MEME-MAST xml output.',
    usage = 'python mast2tsv.py mast.output.xml mast.output.tsv')
# positional arguments

## cosmic.py
#!/usr/bin/env python

# Mining "COSMIC: Catalogue Of Somatic Mutations In Cancer" database

import os
from subprocess import call
import pandas as pd

# Cosmic DB has requires login to download data, so neither BioMart or wget actually work.
call(["wget", "http://cancer.sanger.ac.uk/files/cosmic/current_release/CosmicCompleteExport.tsv.gz"])

## hdf5_to_csv.py
#!/usr/env/python

import os
import h5py
import pandas as pd

projectDir = '/home/afr/workspace/cellprofiler/'

# open hdf5
hdf5 = h5py.File(projectDir + '1315001__2014-01-25T18_26_59-Measurement1.h5', 'r')

## read_distances.py
#!/usr/env python

from argparse import ArgumentParser
import os, re
from pybedtools import BedTool
import HTSeq
import numpy as np
import pandas as pd
import string
import itertools

## divideAndSlurm.py
class DivideAndSlurm(object):
    """
    DivideAndSlurm is a class to handle a map-reduce style submission of jobs to a Slurm cluster.

    Add a particula task to the object (though a specific function) and it will divide the input data
    into pools, which will be submitted (use the submit() function) in parallel to the cluster.
    Tasks can also further process its input in parallel, taking advantage of all processors.
    """
    def __init__(self, tmpDir="/fhgfs/scratch/users/user/", logDir="/home/user/logs", queue="shortq", userMail=""):
        super(DivideAndSlurm, self).__init__()

## taxon_distribution_interpro_domains.py
import pandas as pd
from biomart import BiomartServer, BiomartDataset
from Bio import Entrez


def get_tax_id(specie):
    """Get taxon ID for specie."""
    specie = specie.replace(" ", "+").strip()
    search = Entrez.esearch(term=specie, db="taxonomy", retmode="xml")
    record = Entrez.read(search)

## ubuntu_fresh_install.sh
# New users
sudo adduser username

# Grant the new user sudo privileges
sudo visudo
# username ALL=(ALL:ALL) ALL
# add this line ^^

# change to that user
su - username

## get_bioinfo_files.sh
for GENOME in hg19 mm10 danRer10
do
    # Static files
    mkdir -p resources/${GENOME}
    cd resources/${GENOME}
    ### Genome
    wget http://hgdownload.cse.ucsc.edu/goldenPath/${GENOME}/bigZips/${GENOME}.2bit
    twoBitToFa ${GENOME}.2bit ${GENOME}.fa
    samtools faidx ${GENOME}.fa
    cd ../..

## run_zinba.R
# Install zimba
R
install.packages(c("R.oo"))
install.packages(c("R.utils", "quantreg","doParallel","doMC","foreach"))  # for R>3.0
# only version that works with R 3.0:
# get it from here: https://code.google.com/p/zinba/issues/detail?id=69
install.packages("zinba_2.03.1.tar.gz", repos=NULL)

# Make bed files from bams
# system("bedtools bamtobed -i /data/mapped/sample.bam > ~/zinba/reads/sample.bed")

## ngs_101.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                afrendeiro
                / ngs_101.md
            
            
              Last active
              September 26, 2022 13:19
            
              
                NGS for dummies
              
          
    Introduction to next-generation sequencing (NGS)

General workflow

The current used technology for next generation sequencing is Illumina sequencing - all others cannot compete with its speed, price and output power - they have therefore specialized in niche applications (not discussed here).
Nevertheless, no sequencing technology cannot simply start sequencing one end of a chromosome until the other end.
The approach therefore is:
	#!/usr/bin/env python

	from argparse import ArgumentParser
	from BeautifulSoup import BeautifulSoup
	import csv

	# argparser
	parser = ArgumentParser(description = 'Parses MEME-MAST xml output.',
	usage = 'python mast2tsv.py mast.output.xml mast.output.tsv')
	# positional arguments
	#!/usr/bin/env python

	# Mining "COSMIC: Catalogue Of Somatic Mutations In Cancer" database

	import os
	from subprocess import call
	import pandas as pd

	# Cosmic DB has requires login to download data, so neither BioMart or wget actually work.
	call(["wget", "http://cancer.sanger.ac.uk/files/cosmic/current_release/CosmicCompleteExport.tsv.gz"])
	#!/usr/env/python

	import os
	import h5py
	import pandas as pd

	projectDir = '/home/afr/workspace/cellprofiler/'

	# open hdf5
	hdf5 = h5py.File(projectDir + '1315001__2014-01-25T18_26_59-Measurement1.h5', 'r')
	#!/usr/env python

	from argparse import ArgumentParser
	import os, re
	from pybedtools import BedTool
	import HTSeq
	import numpy as np
	import pandas as pd
	import string
	import itertools
	class DivideAndSlurm(object):
	"""
	DivideAndSlurm is a class to handle a map-reduce style submission of jobs to a Slurm cluster.

	Add a particula task to the object (though a specific function) and it will divide the input data
	into pools, which will be submitted (use the submit() function) in parallel to the cluster.
	Tasks can also further process its input in parallel, taking advantage of all processors.
	"""
	def __init__(self, tmpDir="/fhgfs/scratch/users/user/", logDir="/home/user/logs", queue="shortq", userMail=""):
	super(DivideAndSlurm, self).__init__()
	import pandas as pd
	from biomart import BiomartServer, BiomartDataset
	from Bio import Entrez


	def get_tax_id(specie):
	"""Get taxon ID for specie."""
	specie = specie.replace(" ", "+").strip()
	search = Entrez.esearch(term=specie, db="taxonomy", retmode="xml")
	record = Entrez.read(search)
	# New users
	sudo adduser username

	# Grant the new user sudo privileges
	sudo visudo
	# username ALL=(ALL:ALL) ALL
	# add this line ^^

	# change to that user
	su - username
	for GENOME in hg19 mm10 danRer10
	do
	# Static files
	mkdir -p resources/${GENOME}
	cd resources/${GENOME}
	### Genome
	wget http://hgdownload.cse.ucsc.edu/goldenPath/${GENOME}/bigZips/${GENOME}.2bit
	twoBitToFa ${GENOME}.2bit ${GENOME}.fa
	samtools faidx ${GENOME}.fa
	cd ../..
	# Install zimba
	R
	install.packages(c("R.oo"))
	install.packages(c("R.utils", "quantreg","doParallel","doMC","foreach")) # for R>3.0
	# only version that works with R 3.0:
	# get it from here: https://code.google.com/p/zinba/issues/detail?id=69
	install.packages("zinba_2.03.1.tar.gz", repos=NULL)

	# Make bed files from bams
	# system("bedtools bamtobed -i /data/mapped/sample.bam > ~/zinba/reads/sample.bed")