Kevin Lee cyklee

## coordinates_conversion.R
# Quick conversion from latitude/longitude formats ("degrees decimal minutes" or "degrees minutes seconds") to decimal coordinates.
# This conversion needs to be verified since I know little of geodesy

# Load the list data
LL <- read.delim("~/Dropbox/POLAR GRADIENTS/latitude_longitude.tsv", quote="")
library(sp)

as.numeric(char2dms(as.character(LL$Latitude), chd="d", chm = "'", chs = "\""))
as.numeric(char2dms(as.character(LL$Longitude), chd="d", chm = "'", chs = "\""))

## no_qiime.R
# This is an exercise in 16S rRNA gene sequence processing in R

library(phyloseq)
library(dada2)
library(Biostrings)

# Load OTU table generated by USEARCH with phyloseq
# Load the "classic" OTU table converted from .uc to .txt via uc2otutab.py
otuFile <- read.delim("example_readmap.txt", header=TRUE, row.names=1)
otu <- otu_table(otuFile,taxa_are_rows = TRUE)

## unpack.sh
mv ./*/**/*(.D) ./

## compare_alpha_plot.sh
# In order to using the plotting script in `compare_alpha_diversity.py` we need to convert the output from `alpha_diversity.py`
# to resemble that coming out of `collate_alpha.py`, which involves transposing the column of sample name and ouput to rows.
# In theory, we will have only two rows, one for site names and one for the alpha diversity.

# Here's the transposition workflow for my own reproducibility purposes:

awk '
{
    for (i=1; i<=NF; i++)  {
            a[NR,i] = $i

## tmux.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                cyklee
                / tmux.md
            
            
              Created
              July 27, 2016 06:18
                — forked from andreyvit/tmux.md
            
              
                tmux cheatsheet
              
          
    tmux cheat sheet

(C-x means ctrl+x, M-x means alt+x)
Prefix key

The default prefix is C-b. If you (or your muscle memory) prefer C-a, you need to add this to ~/.tmux.conf:
remap prefix to Control + a


## biom2phyloseq.sh
#!/bin/zsh
# Load OTU table generated by fastq2biom into phyloseq
# I'm converting from HDF5 to JSON format because otherwise phyloseq would drop low abundance OTU for me
biom convert -i ${project}_tax.biom -o ${project}_tax.json --to-json

# I have problem where the "id" and "type" field values are malformed:
# My conversion output: "b'No Table ID'" & "b'OTU table'"
# Should be: "No Table ID" & "OTU table"
# Fixing this with sed:
sed -i "s/b'//g; s/'//g" ${project}_tax.json

## manjaro_vboxsf.sh
su
systemctl enable vboxservice
systemctl start vboxservice
groupadd vboxsf
gpasswd -a $USER vboxsf
exit

sudo usermod -aG vboxsf $(whoami)
# Log off & log back in

## Subset_FASTA.sh
# https://www.biostars.org/p/49820/
# https://github.com/mdshw5/pyfaidx can be used as a drop-in replacement
xargs samtools faidx test.fa < names.txt

## import_biom2.R
import_biom2 <- function(x,
                         treefilename=NULL, refseqfilename=NULL, refseqFunction=readDNAStringSet, refseqArgs=NULL,
                         parseFunction=parse_taxonomy_default, parallel=FALSE, version=1.0, ...){

  # initialize the argument-list for phyloseq. Start empty.
  argumentlist <- list()

  x = read_biom(x)
  b_data = biom_data(x)
  b_data_mat = as(b_data, "matrix")

## prokkagff2gtf.sh
#!/bin/bash

infile=$1

if [ "$infile" == "" ] ; then
    echo "Usage: prokkagff2gtf.sh <PROKKA gff file>"
    exit 0
fi

grep -v "#" $infile | grep "ID=" | cut -f1 -d ';' | sed 's/ID=//g' | cut -f1,4,5,7,9 |  awk -v OFS='\t' '{print $1,"PROKKA","CDS",$2,$3,".",$4,".","gene_id " $5}'
	# Quick conversion from latitude/longitude formats ("degrees decimal minutes" or "degrees minutes seconds") to decimal coordinates.
	# This conversion needs to be verified since I know little of geodesy

	# Load the list data
	LL <- read.delim("~/Dropbox/POLAR GRADIENTS/latitude_longitude.tsv", quote="")
	library(sp)

	as.numeric(char2dms(as.character(LL$Latitude), chd="d", chm = "'", chs = "\""))
	as.numeric(char2dms(as.character(LL$Longitude), chd="d", chm = "'", chs = "\""))
	# This is an exercise in 16S rRNA gene sequence processing in R

	library(phyloseq)
	library(dada2)
	library(Biostrings)

	# Load OTU table generated by USEARCH with phyloseq
	# Load the "classic" OTU table converted from .uc to .txt via uc2otutab.py
	otuFile <- read.delim("example_readmap.txt", header=TRUE, row.names=1)
	otu <- otu_table(otuFile,taxa_are_rows = TRUE)
	# In order to using the plotting script in `compare_alpha_diversity.py` we need to convert the output from `alpha_diversity.py`
	# to resemble that coming out of `collate_alpha.py`, which involves transposing the column of sample name and ouput to rows.
	# In theory, we will have only two rows, one for site names and one for the alpha diversity.

	# Here's the transposition workflow for my own reproducibility purposes:

	awk '
	{
	for (i=1; i<=NF; i++) {
	a[NR,i] = $i
	#!/bin/zsh
	# Load OTU table generated by fastq2biom into phyloseq
	# I'm converting from HDF5 to JSON format because otherwise phyloseq would drop low abundance OTU for me
	biom convert -i ${project}_tax.biom -o ${project}_tax.json --to-json

	# I have problem where the "id" and "type" field values are malformed:
	# My conversion output: "b'No Table ID'" & "b'OTU table'"
	# Should be: "No Table ID" & "OTU table"
	# Fixing this with sed:
	sed -i "s/b'//g; s/'//g" ${project}_tax.json
	su
	systemctl enable vboxservice
	systemctl start vboxservice
	groupadd vboxsf
	gpasswd -a $USER vboxsf
	exit

	sudo usermod -aG vboxsf $(whoami)
	# Log off & log back in
	# https://www.biostars.org/p/49820/
	# https://github.com/mdshw5/pyfaidx can be used as a drop-in replacement
	xargs samtools faidx test.fa < names.txt
	import_biom2 <- function(x,
	treefilename=NULL, refseqfilename=NULL, refseqFunction=readDNAStringSet, refseqArgs=NULL,
	parseFunction=parse_taxonomy_default, parallel=FALSE, version=1.0, ...){

	# initialize the argument-list for phyloseq. Start empty.
	argumentlist <- list()

	x = read_biom(x)
	b_data = biom_data(x)
	b_data_mat = as(b_data, "matrix")
	#!/bin/bash

	infile=$1

	if [ "$infile" == "" ] ; then
	echo "Usage: prokkagff2gtf.sh <PROKKA gff file>"
	exit 0
	fi

	grep -v "#" $infile \| grep "ID=" \| cut -f1 -d ';' \| sed 's/ID=//g' \| cut -f1,4,5,7,9 \| awk -v OFS='\t' '{print $1,"PROKKA","CDS",$2,$3,".",$4,".","gene_id " $5}'