zychen2016

## main.cpp
#include <iostream>
#include <vector>
#include <assert.h>
#include <cmath>
#include <png++/png.hpp>

using namespace std;

typedef vector<double> Array;
typedef vector<Array> Matrix;

## jupyter_notebooks.md

      
              1 file
            
          
              6 forks
            
          
              1 comment
            
          
              20 stars
            
          
                afrendeiro
                / jupyter_notebooks.md
            
            
              Last active
              November 18, 2023 04:11
            
              
                A list of jupyter notebooks in bioinformatics
              
          
    Introduction to linux command line/ipython/git/R for bioinformatics:


https://github.com/biom262/biom262-2016
https://github.com/ricket-sjtu/bi028
https://github.com/YeoLab/single-cell-bioinformatics/
https://github.com/YeoLab/single-cell-bioinformatics/tree/master/notebooks
https://github.com/ltminty/Bioinformatics-and-R/blob/master/Rbioinformatics.ipynb

RNA-seq:


https://github.com/Caroldm/RNA-Seq-Tutorial---Jupyter-notebook/blob/master/RNAseq%20tutorial.ipynb


## counts_to_tpm.R
#' Convert counts to transcripts per million (TPM).
#'
#' Convert a numeric matrix of features (rows) and conditions (columns) with
#' raw feature counts to transcripts per million.
#'
#'    Lior Pachter. Models for transcript quantification from RNA-Seq.
#'    arXiv:1104.3889v2
#'
#'    Wagner, et al. Measurement of mRNA abundance using RNA-seq data:
#'    RPKM measure is inconsistent among samples. Theory Biosci. 24 July 2012.

## deseq2-analysis-template.R
## RNA-seq analysis with DESeq2
## Stephen Turner, @genetics_blog

# RNA-seq data from GSE52202
# http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=gse52202. All patients with
# ALS, 4 with C9 expansion ("exp"), 4 controls without expansion ("ctl")

# Import & pre-process ----------------------------------------------------

# Import data from featureCounts

## parseSNPs.py
import argparse
import textwrap
import os
import sys
from datetime import timedelta, datetime


# function for reading a multifasta file
# returns a dictionary with sequence headers and nucleotide sequences
def get_seqs_from_fasta(filepath):

## bin_chromosome.py
#!/usr/bin/env python
# Time-stamp: <2013-09-24 15:23:09 Tao Liu>

import os
import sys
# ------------------------------------
# Main function
# ------------------------------------
def main():
    if len( sys.argv ) < 4:

## RPKM-TPM.r
# Script to compare Reads per Kilobase per Million mapped reads (RPKM) to Transcripts per Million (TPM) for gene expression count data
# Wagner et al. 2012 "Measurement of mRNA abundance using RNA-seq data: RPKM measure
# is inconsistent among samples" Theory Biosci. 131:281-285

library(plyr)


## Worked example from http://blog.nextgenetics.net/?e=51

X <- data.frame(gene=c("A","B","C","D","E"), count=c(80, 10, 6, 3, 1),

## get_snps_from_msas.py

aln_snps = {}
for aln in aln_files:
    recs = [f for f in SeqIO.parse(aln, 'fasta')]
    # strain names should be the last dash delimited element in fasta header
    strains = [rec.name.split('-')[-1] for rec in recs]
    # get a dictionary of strain names and sequences
    strain_seq = {rec.name.split('-')[-1]:''.join([nt for nt in rec.seq]) \
        for rec in recs}
    # get length of the MSA and check that all of the seq are the same length

## calcFPKMs.R
setGeneric("calcFPKMs", function(counts, ...) {standardGeneric("calcFPKMs")})

setMethod("calcFPKMs", c("GRanges"),
    function(counts, verbose = TRUE)
{
    counts.df <- as.data.frame(counts)
    counts.cols <- metadata(counts)[["counts.cols"]] + 5

    # Only use read counts from the known transcriptome.
    counts.df <- counts.df[counts.df[, "type"] %in% c("exon", "junction"), ]
	#include <iostream>
	#include <vector>
	#include <assert.h>
	#include <cmath>
	#include <png++/png.hpp>

	using namespace std;

	typedef vector<double> Array;
	typedef vector<Array> Matrix;
	#' Convert counts to transcripts per million (TPM).
	#'
	#' Convert a numeric matrix of features (rows) and conditions (columns) with
	#' raw feature counts to transcripts per million.
	#'
	#' Lior Pachter. Models for transcript quantification from RNA-Seq.
	#' arXiv:1104.3889v2
	#'
	#' Wagner, et al. Measurement of mRNA abundance using RNA-seq data:
	#' RPKM measure is inconsistent among samples. Theory Biosci. 24 July 2012.
	## RNA-seq analysis with DESeq2
	## Stephen Turner, @genetics_blog

	# RNA-seq data from GSE52202
	# http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=gse52202. All patients with
	# ALS, 4 with C9 expansion ("exp"), 4 controls without expansion ("ctl")

	# Import & pre-process ----------------------------------------------------

	# Import data from featureCounts
	import argparse
	import textwrap
	import os
	import sys
	from datetime import timedelta, datetime


	# function for reading a multifasta file
	# returns a dictionary with sequence headers and nucleotide sequences
	def get_seqs_from_fasta(filepath):
	#!/usr/bin/env python
	# Time-stamp: <2013-09-24 15:23:09 Tao Liu>

	import os
	import sys
	# ------------------------------------
	# Main function
	# ------------------------------------
	def main():
	if len( sys.argv ) < 4:
	# Script to compare Reads per Kilobase per Million mapped reads (RPKM) to Transcripts per Million (TPM) for gene expression count data
	# Wagner et al. 2012 "Measurement of mRNA abundance using RNA-seq data: RPKM measure
	# is inconsistent among samples" Theory Biosci. 131:281-285

	library(plyr)


	## Worked example from http://blog.nextgenetics.net/?e=51

	X <- data.frame(gene=c("A","B","C","D","E"), count=c(80, 10, 6, 3, 1),

	aln_snps = {}
	for aln in aln_files:
	recs = [f for f in SeqIO.parse(aln, 'fasta')]
	# strain names should be the last dash delimited element in fasta header
	strains = [rec.name.split('-')[-1] for rec in recs]
	# get a dictionary of strain names and sequences
	strain_seq = {rec.name.split('-')[-1]:''.join([nt for nt in rec.seq]) \
	for rec in recs}
	# get length of the MSA and check that all of the seq are the same length
	setGeneric("calcFPKMs", function(counts, ...) {standardGeneric("calcFPKMs")})

	setMethod("calcFPKMs", c("GRanges"),
	function(counts, verbose = TRUE)
	{
	counts.df <- as.data.frame(counts)
	counts.cols <- metadata(counts)[["counts.cols"]] + 5

	# Only use read counts from the known transcriptome.
	counts.df <- counts.df[counts.df[, "type"] %in% c("exon", "junction"), ]