Kevin Lee cyklee

## biom2phyloseq.sh
#!/bin/zsh
# Load OTU table generated by fastq2biom into phyloseq
# I'm converting from HDF5 to JSON format because otherwise phyloseq would drop low abundance OTU for me
biom convert -i ${project}_tax.biom -o ${project}_tax.json --to-json

# I have problem where the "id" and "type" field values are malformed:
# My conversion output: "b'No Table ID'" & "b'OTU table'"
# Should be: "No Table ID" & "OTU table"
# Fixing this with sed:
sed -i "s/b'//g; s/'//g" ${project}_tax.json

## tmux.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                cyklee
                / tmux.md
            
            
              Created
              July 27, 2016 06:18
                — forked from andreyvit/tmux.md
            
              
                tmux cheatsheet
              
          
    tmux cheat sheet

(C-x means ctrl+x, M-x means alt+x)
Prefix key

The default prefix is C-b. If you (or your muscle memory) prefer C-a, you need to add this to ~/.tmux.conf:
remap prefix to Control + a


## compare_alpha_plot.sh
# In order to using the plotting script in `compare_alpha_diversity.py` we need to convert the output from `alpha_diversity.py`
# to resemble that coming out of `collate_alpha.py`, which involves transposing the column of sample name and ouput to rows.
# In theory, we will have only two rows, one for site names and one for the alpha diversity.

# Here's the transposition workflow for my own reproducibility purposes:

awk '
{
    for (i=1; i<=NF; i++)  {
            a[NR,i] = $i

## unpack.sh
mv ./*/**/*(.D) ./

## no_qiime.R
# This is an exercise in 16S rRNA gene sequence processing in R

library(phyloseq)
library(dada2)
library(Biostrings)

# Load OTU table generated by USEARCH with phyloseq
# Load the "classic" OTU table converted from .uc to .txt via uc2otutab.py
otuFile <- read.delim("example_readmap.txt", header=TRUE, row.names=1)
otu <- otu_table(otuFile,taxa_are_rows = TRUE)

## fastx_sequence_length.sh
# Run me first to count the lengths of sequences in FASTQ files in the directory.
# This script is linebreak dependent, therefore no interleaving is allowed.
# NR%4==2 means print every 4th line starting from 2nd
# NR%2==0 means print every 2nd line starting from 2st (note the 0)

for i in *.fastq; do
cat $i | awk '{if(NR%4==2) print length($1)}' >  ${i}.readslength.txt
done

for i in *.fasta; do

## coordinates_conversion.R
# Quick conversion from latitude/longitude formats ("degrees decimal minutes" or "degrees minutes seconds") to decimal coordinates.
# This conversion needs to be verified since I know little of geodesy

# Load the list data
LL <- read.delim("~/Dropbox/POLAR GRADIENTS/latitude_longitude.tsv", quote="")
library(sp)

as.numeric(char2dms(as.character(LL$Latitude), chd="d", chm = "'", chs = "\""))
as.numeric(char2dms(as.character(LL$Longitude), chd="d", chm = "'", chs = "\""))
	#!/bin/zsh
	# Load OTU table generated by fastq2biom into phyloseq
	# I'm converting from HDF5 to JSON format because otherwise phyloseq would drop low abundance OTU for me
	biom convert -i ${project}_tax.biom -o ${project}_tax.json --to-json

	# I have problem where the "id" and "type" field values are malformed:
	# My conversion output: "b'No Table ID'" & "b'OTU table'"
	# Should be: "No Table ID" & "OTU table"
	# Fixing this with sed:
	sed -i "s/b'//g; s/'//g" ${project}_tax.json
	# In order to using the plotting script in `compare_alpha_diversity.py` we need to convert the output from `alpha_diversity.py`
	# to resemble that coming out of `collate_alpha.py`, which involves transposing the column of sample name and ouput to rows.
	# In theory, we will have only two rows, one for site names and one for the alpha diversity.

	# Here's the transposition workflow for my own reproducibility purposes:

	awk '
	{
	for (i=1; i<=NF; i++) {
	a[NR,i] = $i
	# This is an exercise in 16S rRNA gene sequence processing in R

	library(phyloseq)
	library(dada2)
	library(Biostrings)

	# Load OTU table generated by USEARCH with phyloseq
	# Load the "classic" OTU table converted from .uc to .txt via uc2otutab.py
	otuFile <- read.delim("example_readmap.txt", header=TRUE, row.names=1)
	otu <- otu_table(otuFile,taxa_are_rows = TRUE)
	# Run me first to count the lengths of sequences in FASTQ files in the directory.
	# This script is linebreak dependent, therefore no interleaving is allowed.
	# NR%4==2 means print every 4th line starting from 2nd
	# NR%2==0 means print every 2nd line starting from 2st (note the 0)

	for i in *.fastq; do
	cat $i \| awk '{if(NR%4==2) print length($1)}' > ${i}.readslength.txt
	done

	for i in *.fasta; do
	# Quick conversion from latitude/longitude formats ("degrees decimal minutes" or "degrees minutes seconds") to decimal coordinates.
	# This conversion needs to be verified since I know little of geodesy

	# Load the list data
	LL <- read.delim("~/Dropbox/POLAR GRADIENTS/latitude_longitude.tsv", quote="")
	library(sp)

	as.numeric(char2dms(as.character(LL$Latitude), chd="d", chm = "'", chs = "\""))
	as.numeric(char2dms(as.character(LL$Longitude), chd="d", chm = "'", chs = "\""))