Fengyuan Hu boboppie

## remove_duplicate_sequence.shx
#!bin/bash
# remove_duplicate_sequence.shx is a short bash to remove multiple copies of sequences from an input fasta file and saves the result in an output fasta file.
# the bash script was based on Pierre Lindenbaum's script: https://www.biostars.org/p/3003/#3008
# input:
# -f | --file  : fasta file
# -o | --output : output file after removing all the sequences
#
while [ "$1" != "" ]; do
        case $1 in
                -f | --file ) shift

## beanplots.R
## reproduce the figures from http://www.jstatsoft.org/v28/c01/paper using ggplot2

library(ggplot2)

## parameters
set.seed(2710)

## Figure 1
d <- rnorm(50)

## coord_map.py
# Copyright 2012-2014 Lenna X. Peterson
# arklenna@gmail.com

# The first step to using the mapper is to get the exons from a GenBank or similar file.
# The mapper will accept exons as a sequence of pairs, a SeqRecord with a CDS feature, or a CDS SeqFeature.
# The file used in this example is located in the Tests directory of the Biopython source code.

from Bio.SeqUtils.Mapper import CoordinateMapper
from Bio import SeqIO


## readBAM.R
# install the Rsamtools package if necessary
source("http://bioconductor.org/biocLite.R")
biocLite("Rsamtools")

# load the library
library(Rsamtools)

# specify the bam file you want to import
bamFile <- "test.bam"

## plot_aligned_series.R
#' When plotting multiple data series that share a common x axis but different y axes,
#' we can just plot each graph separately. This suffers from the drawback that the shared axis will typically
#' not align across graphs due to different plot margins.
#' One easy solution is to reshape2::melt() the data and use ggplot2's facet_grid() mapping. However, there is
#' no way to label individual y axes.
#' facet_grid() and facet_wrap() were designed to plot small multiples, where both x- and y-axis ranges are
#' shared acros all plots in the facetting. While the facet_ calls allow us to use different scales with
#' the \code{scales = "free"} argument, they should not be used this way.
#' A more robust approach is to the grid package grid.draw(), rbind() and ggplotGrob() to create a grid of
#' individual plots where the plot axes are properly aligned within the grid.

## bamfilter_oneliners.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                boboppie
                / bamfilter_oneliners.md
            
            
              Created
              January 28, 2016 09:49
                — forked from davfre/bamfilter_oneliners.md
            
              
                SAM and BAM filtering oneliners
              
          
    SAM and BAM filtering one-liners
@author: David Fredman, david.fredmanAAAAAA@gmail.com (sans poly-A tail)

@dependencies: http://sourceforge.net/projects/bamtools/ and http://samtools.sourceforge.net/
Please comment or extend with additional/faster/better solutions.
BWA mapping (using piping for minimal disk I/O)

  
## installation of Cufflinks
Follow the steps at http://cufflinks.cbcb.umd.edu/tutorial.html

1. Download Cufflinks version 2.0.2 (BETA), Source code
2. tar -zxvf cufflinks-2.0.2.tar.gz (before final installation, make sure related tools are installed! See below)

Installing Boost C++ libraries
1. Download Boost at http://www.boost.org/users/download/
2. The bjam boost engine is also required. If MacPort is installed, then use the command sudo port install boost. It will take care of everything.
3. If #2 won't work, try (preferred way!)
  tar -xzf boost_1_50_0.tar.gz

## run_cnvnator_on_assembly.pl
#!/usr/local/bin/perl

=head1 NAME

    run_cnvnator_on_assembly.pl

=head1 SYNOPSIS

    run_cnvnator_on_assembly.pl input_fasta input_bam output outputdir path_to_cnvnator windowsize
        where input_fasta is the input fasta file,

## vioplot2
vioplot2 <- function (x, ..., range = 1.5, h = NULL, ylim = NULL, names = NULL,
                      horizontal = FALSE, col = "magenta", border = "black", lty = 1,
                      lwd = 1, rectCol = "black", colMed = "white", pchMed = 19,
                      at, add = FALSE, wex = 1, drawRect = TRUE, side="both")
{
  datas <- list(x, ...)
  n <- length(datas)
  if (missing(at))
    at <- 1:n
  upper <- vector(mode = "numeric", length = n)

## simulation1.R
myData=c(1,1,1,1,1,1,1,1,1,1,1,0,0,0)
# 尝试1万个不同的参数
tryn = 1e4
Theta = sort(runif(tryn))
pTheta = 1/tryn
z = sum( myData==1 )
N = length( myData )
# 似然函数
pDataGivenTheta = Theta^z * (1-Theta)^(N-z)
pData = sum( pDataGivenTheta * pTheta )
	#!bin/bash
	# remove_duplicate_sequence.shx is a short bash to remove multiple copies of sequences from an input fasta file and saves the result in an output fasta file.
	# the bash script was based on Pierre Lindenbaum's script: https://www.biostars.org/p/3003/#3008
	# input:
	# -f \| --file : fasta file
	# -o \| --output : output file after removing all the sequences
	#
	while [ "$1" != "" ]; do
	case $1 in
	-f \| --file ) shift
	## reproduce the figures from http://www.jstatsoft.org/v28/c01/paper using ggplot2

	library(ggplot2)

	## parameters
	set.seed(2710)

	## Figure 1
	d <- rnorm(50)
	# Copyright 2012-2014 Lenna X. Peterson
	# arklenna@gmail.com

	# The first step to using the mapper is to get the exons from a GenBank or similar file.
	# The mapper will accept exons as a sequence of pairs, a SeqRecord with a CDS feature, or a CDS SeqFeature.
	# The file used in this example is located in the Tests directory of the Biopython source code.

	from Bio.SeqUtils.Mapper import CoordinateMapper
	from Bio import SeqIO
	# install the Rsamtools package if necessary
	source("http://bioconductor.org/biocLite.R")
	biocLite("Rsamtools")

	# load the library
	library(Rsamtools)

	# specify the bam file you want to import
	bamFile <- "test.bam"
	#' When plotting multiple data series that share a common x axis but different y axes,
	#' we can just plot each graph separately. This suffers from the drawback that the shared axis will typically
	#' not align across graphs due to different plot margins.
	#' One easy solution is to reshape2::melt() the data and use ggplot2's facet_grid() mapping. However, there is
	#' no way to label individual y axes.
	#' facet_grid() and facet_wrap() were designed to plot small multiples, where both x- and y-axis ranges are
	#' shared acros all plots in the facetting. While the facet_ calls allow us to use different scales with
	#' the \code{scales = "free"} argument, they should not be used this way.
	#' A more robust approach is to the grid package grid.draw(), rbind() and ggplotGrob() to create a grid of
	#' individual plots where the plot axes are properly aligned within the grid.
	Follow the steps at http://cufflinks.cbcb.umd.edu/tutorial.html

	1. Download Cufflinks version 2.0.2 (BETA), Source code
	2. tar -zxvf cufflinks-2.0.2.tar.gz (before final installation, make sure related tools are installed! See below)

	Installing Boost C++ libraries
	1. Download Boost at http://www.boost.org/users/download/
	2. The bjam boost engine is also required. If MacPort is installed, then use the command sudo port install boost. It will take care of everything.
	3. If #2 won't work, try (preferred way!)
	tar -xzf boost_1_50_0.tar.gz
	#!/usr/local/bin/perl

	=head1 NAME

	run_cnvnator_on_assembly.pl

	=head1 SYNOPSIS

	run_cnvnator_on_assembly.pl input_fasta input_bam output outputdir path_to_cnvnator windowsize
	where input_fasta is the input fasta file,
	vioplot2 <- function (x, ..., range = 1.5, h = NULL, ylim = NULL, names = NULL,
	horizontal = FALSE, col = "magenta", border = "black", lty = 1,
	lwd = 1, rectCol = "black", colMed = "white", pchMed = 19,
	at, add = FALSE, wex = 1, drawRect = TRUE, side="both")
	{
	datas <- list(x, ...)
	n <- length(datas)
	if (missing(at))
	at <- 1:n
	upper <- vector(mode = "numeric", length = n)
	myData=c(1,1,1,1,1,1,1,1,1,1,1,0,0,0)
	# 尝试1万个不同的参数
	tryn = 1e4
	Theta = sort(runif(tryn))
	pTheta = 1/tryn
	z = sum( myData==1 )
	N = length( myData )
	# 似然函数
	pDataGivenTheta = Theta^z * (1-Theta)^(N-z)
	pData = sum( pDataGivenTheta * pTheta )