rhallPB

## CCS_kinetics.sh
# A few notes on running CCS with kinetics output, including native format and links
# for converting to a format compatible with previous tools.

# requires pbccs, available from bioconda https://anaconda.org/bioconda/pbccs

conda install -c bioconda pbccs

ccs <IN.subreads.bam> <OUT.ccs.bam> --mean-kinetics

# output ccs file will have the following extra tags:

## DupReads.py
#!/usr/bin/env python
# coding: utf-8

# In[173]:


import pysam
import argparse
import sys
import os.path

## gist:34ce4a1cab72d861684928f618406863
cat $fin | awk '{if (/^>/) {print $1"_"$4} else {print}}' | sed 's/Stat=/_/g'

## missedAdapter.sh
#!/bin/bash

fastarevcomp ${1%%.*}.fasta > ${1%%.*}_revc.fasta
sdpMatcher ${1%%.*}.fasta ${1%%.*}_revc.fasta 10 -local > pal.out
~rhall/projects/MDA/findPal/parseSDP.pl pal.out

## vec.pl
#!/usr/bin/env perl

# First Calculate the lengths of all the vectors with the shell command
# "fastalength D276_linearizedVectors.fa > vectorLengths.list"
# Then run this small perl script to subset the alignment and calculate consensus / variants.


while(<>){
    split;

## mothur bash
### /home/UNIXHOME/asethuraman/projects/jgi/cami/cami_rDna

# Now we'd like to go ahead and classify all of our sequences from the different
# libraries and the mock community reference using the RDP, greengenes, and
# SILVA training sets.

mothur "#classify.seqs(fasta=rDna/cami_ROI.good.pick.filter.unique.precluster.fasta, reference=/home/UNIXHOME/asethuraman/projects/schloss/16S_Schloss/references/trainset10_082014.pds.fasta, taxonomy=/home/UNIXHOME/asethuraman/projects/s
chloss/16S_Schloss/references/trainset10_082014.pds.tax, processors=8);
  classify.seqs(fasta=rDna/cami_ROI.good.pick.filter.unique.precluster.fasta, reference=/home/UNIXHOME/asethuraman/projects/schloss/16S_Schloss/references/gg_13_8_99.fasta, taxonomy=/home/UNIXHOME/asethuraman/projects/schloss/16S_Schloss
/references/gg_13_8_99.gg.tax, processors=8);

## getReads.pl
#!/usr/bin/env perl

open FIN, $ARGV[0];

while(<FIN>){
    if (/$ARGV[1]\t/){}
    else{
        split;
        print $_[0]."\n";

## RS_HGAP_Assembly_BAC.3.xml
<?xml version="1.0" encoding="utf-8"?><smrtpipeSettings>
  <protocol id="RS_HGAP_Assembly.3" version="2.2.0" editable="false">
    <application>De novo assembly</application>
    <param name="name" label="Protocol Name">
      <value>RS_HGAP_Assembly_3</value>
      <input type="text"/>
      <rule required="true"/>
    </param>
    <param name="description">
      <value>(BETA) HGAP version 3. PacBio de novo assembler optimized for speed.</value>

## PreAssemblerBacHGAP.3.xml
<?xml version="1.0" ?>
<smrtpipeSettings>
    <module id="P_PreAssemblerDagcon" label="PreAssembler v2" editableInJob="true">
        <title>Using DAG-based consensus algorithm, pre-assemble long reads as the first step of the Hierarchical Genome Assembly process (HGAP). Version 2 is a stepping stone for scaling to much larger genomes.</title>
        <param name="computeLengthCutoff" label="Compute Minimum Seed Read Length" editable="true">
            <title>Specify whether or not to compute the minimum seed read length that results in at least 30X target genome coverage, by the longest subreads. This is based on the genome size you specified.</title>
            <value>True</value>
            <input type="checkbox" />
        </param>
        <param name="minLongReadLength" label="Minimum Seed Read Length">

## P_PreAssemblerDagconBAC.py
import os
import re
import logging

from SMRTpipe.engine.SmrtPipeFiles import (SMRTFile, SMRTDataFile,
                                                SMRTReportFile, cmdLineInput,
                                                SMRTJsonReportFile)
from SMRTpipe.engine.SmrtPipeTasks import task
from SMRTpipe.engine.DistributableTasks import (DistributableTask,
                                                     LocallyDistributableTask)
	# A few notes on running CCS with kinetics output, including native format and links
	# for converting to a format compatible with previous tools.

	# requires pbccs, available from bioconda https://anaconda.org/bioconda/pbccs

	conda install -c bioconda pbccs

	ccs <IN.subreads.bam> <OUT.ccs.bam> --mean-kinetics

	# output ccs file will have the following extra tags:
	#!/usr/bin/env python
	# coding: utf-8

	# In[173]:


	import pysam
	import argparse
	import sys
	import os.path
	#!/bin/bash

	fastarevcomp ${1%%.}.fasta > ${1%%.}_revc.fasta
	sdpMatcher ${1%%.}.fasta ${1%%.}_revc.fasta 10 -local > pal.out
	~rhall/projects/MDA/findPal/parseSDP.pl pal.out
	#!/usr/bin/env perl

	# First Calculate the lengths of all the vectors with the shell command
	# "fastalength D276_linearizedVectors.fa > vectorLengths.list"
	# Then run this small perl script to subset the alignment and calculate consensus / variants.



	while(<>){
	split;
	### /home/UNIXHOME/asethuraman/projects/jgi/cami/cami_rDna

	# Now we'd like to go ahead and classify all of our sequences from the different
	# libraries and the mock community reference using the RDP, greengenes, and
	# SILVA training sets.

	mothur "#classify.seqs(fasta=rDna/cami_ROI.good.pick.filter.unique.precluster.fasta, reference=/home/UNIXHOME/asethuraman/projects/schloss/16S_Schloss/references/trainset10_082014.pds.fasta, taxonomy=/home/UNIXHOME/asethuraman/projects/s
	chloss/16S_Schloss/references/trainset10_082014.pds.tax, processors=8);
	classify.seqs(fasta=rDna/cami_ROI.good.pick.filter.unique.precluster.fasta, reference=/home/UNIXHOME/asethuraman/projects/schloss/16S_Schloss/references/gg_13_8_99.fasta, taxonomy=/home/UNIXHOME/asethuraman/projects/schloss/16S_Schloss
	/references/gg_13_8_99.gg.tax, processors=8);
	#!/usr/bin/env perl

	open FIN, $ARGV[0];

	while(<FIN>){
	if (/$ARGV[1]\t/){}
	else{
	split;
	print $_[0]."\n";
	<?xml version="1.0" encoding="utf-8"?><smrtpipeSettings>
	<protocol id="RS_HGAP_Assembly.3" version="2.2.0" editable="false">
	<application>De novo assembly</application>
	<param name="name" label="Protocol Name">
	<value>RS_HGAP_Assembly_3</value>
	<input type="text"/>
	<rule required="true"/>
	</param>
	<param name="description">
	<value>(BETA) HGAP version 3. PacBio de novo assembler optimized for speed.</value>
	<?xml version="1.0" ?>
	<smrtpipeSettings>
	<module id="P_PreAssemblerDagcon" label="PreAssembler v2" editableInJob="true">
	<title>Using DAG-based consensus algorithm, pre-assemble long reads as the first step of the Hierarchical Genome Assembly process (HGAP). Version 2 is a stepping stone for scaling to much larger genomes.</title>
	<param name="computeLengthCutoff" label="Compute Minimum Seed Read Length" editable="true">
	<title>Specify whether or not to compute the minimum seed read length that results in at least 30X target genome coverage, by the longest subreads. This is based on the genome size you specified.</title>
	<value>True</value>
	<input type="checkbox" />
	</param>
	<param name="minLongReadLength" label="Minimum Seed Read Length">
	import os
	import re
	import logging

	from SMRTpipe.engine.SmrtPipeFiles import (SMRTFile, SMRTDataFile,
	SMRTReportFile, cmdLineInput,
	SMRTJsonReportFile)
	from SMRTpipe.engine.SmrtPipeTasks import task
	from SMRTpipe.engine.DistributableTasks import (DistributableTask,
	LocallyDistributableTask)