Mikael Huss hussius

## Random Incanter commands (memory notes)
# Plot histogram

(def d (read-dataset "/Users/mikaelhuss/Desktop/SAGE-liver-CVD/deliver_clinical_traits.txt" :header true :delim \tab))
(view (histogram (filter number? (sel d :cols 5))))

# Or the same with with-data

(with-data
    (read-dataset "/Users/mikaelhuss/Desktop/SAGE-liver-CVD/deliver_clinical_traits.txt" :header true :delim \tab)
    (view (histogram (filter number? ($ 5 )))))

## kallisto_setup.sh
# Download Kallisto and sratools (the latter to be able to download from SRA)
wget https://github.com/pachterlab/kallisto/releases/download/v0.42.3/kallisto_mac-v0.42.3.tar.gz
tar zvxf kallisto_mac-v0.42.3.tar.gz
wget http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.5.2/sratoolkit.2.5.2-mac64.tar.gz
tar zxvf sratoolkit.2.5.2-mac64.tar.gz

# Download and merge human cDNA and ncDNA files from Ensembl for the index.
wget ftp://ftp.ensembl.org/pub/current_fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
wget ftp://ftp.ensembl.org/pub/current_fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz
cat Homo_sapiens.GRCh38.cdna.all.fa.gz Homo_sapiens.GRCh38.ncrna.fa.gz > Homo_sapiens.GRCh38.rna.fa.gz

## sleuth_commands.R
# Installation (only needs to be done once)
source("http://bioconductor.org/biocLite.R")
biocLite("rhdf5")
install.packages("devtools")
devtools::install_github("pachterlab/sleuth")

# Now load the package
library("sleuth")

# A function (borrowed from the Sleuth documentation) for connecting Ensembl transcript names to common gene names

## ae_toy_example.py
import tensorflow as tf
import numpy as np
import math
#import pandas as pd
#import sys

input = np.array([[2.0, 1.0, 1.0, 2.0],
                 [-2.0, 1.0, -1.0, 2.0],
                 [0.0, 1.0, 0.0, 2.0],
                 [0.0, -1.0, 0.0, -2.0],

## get_activities.py
import sys

for line in open(sys.argv[1]):
    if 'layer' in line:
        fname = '_'.join(line.strip().split()) + '_activities.txt'
        outf = open(fname,'w')
    else:
        outf.write(line)

## get_weights_and_bias.py
import sys

f = open(sys.argv[1])

wts = [] # This will be a list of list of lists (=list of matrices) with weights
bias_in = [] # List of lists of bias values in the input part of each sublayer
bias_out = [] # List of lists of bias values in the output part of each sublayer
prefs = [] # Prefixes for file names

state = None # state can be None, 'weight', 'bias_in' or 'bias_out'

## merge_kallisto_TPM.R
args = commandArgs(trailingOnly=TRUE)
path=args[1]
files=Sys.glob(paste0(path,"/*/abundance.tsv"))
#print(files)
merge_two <- function(x,y){
	  #print(dim(x))
	  if ("tpm" %in% colnames(x)){
	     x_ <- x[,c(1,5)]
	     }
	   else{

## sum_by_gene.py
import sys
import gzip

if len(sys.argv)<3:
    sys.exit("python sum_per_gene.py <cDNA FASTA file> <TPM table>")

ensg = {}

mapf = gzip.open(sys.argv[1])
ctr = 0

## tf_linear_regression.R
library(tensorflow)

tf$reset_default_graph()

x_data <- runif(100, min=0, max=1)
y_data <- x_data * 0.1 + 0.3 + rnorm(n, mean=0, sd=0.025)

W <- tf$Variable(tf$random_uniform(shape(1L), -1.0, 1.0))
b <- tf$Variable(tf$zeros(shape(1L)))
y <- W * x_data + b

## Installing Tensorflow
1. Install appropriate version of the Tensorflow (Python) framework from https://www.tensorflow.org/versions/r0.12/get_started/os_setup.html

In my case (Mac OS X 10.11), I did:

- Get the .whl file (this is more likely to work than a direct pip install)
wget https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl

- Install using non-Anaconda pip
/usr/local/bin/pip3 install tensorflow-0.11.0-py3-none-any.whl_
	# Plot histogram

	(def d (read-dataset "/Users/mikaelhuss/Desktop/SAGE-liver-CVD/deliver_clinical_traits.txt" :header true :delim \tab))
	(view (histogram (filter number? (sel d :cols 5))))

	# Or the same with with-data

	(with-data
	(read-dataset "/Users/mikaelhuss/Desktop/SAGE-liver-CVD/deliver_clinical_traits.txt" :header true :delim \tab)
	(view (histogram (filter number? ($ 5 )))))
	# Download Kallisto and sratools (the latter to be able to download from SRA)
	wget https://github.com/pachterlab/kallisto/releases/download/v0.42.3/kallisto_mac-v0.42.3.tar.gz
	tar zvxf kallisto_mac-v0.42.3.tar.gz
	wget http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.5.2/sratoolkit.2.5.2-mac64.tar.gz
	tar zxvf sratoolkit.2.5.2-mac64.tar.gz

	# Download and merge human cDNA and ncDNA files from Ensembl for the index.
	wget ftp://ftp.ensembl.org/pub/current_fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
	wget ftp://ftp.ensembl.org/pub/current_fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz
	cat Homo_sapiens.GRCh38.cdna.all.fa.gz Homo_sapiens.GRCh38.ncrna.fa.gz > Homo_sapiens.GRCh38.rna.fa.gz
	# Installation (only needs to be done once)
	source("http://bioconductor.org/biocLite.R")
	biocLite("rhdf5")
	install.packages("devtools")
	devtools::install_github("pachterlab/sleuth")

	# Now load the package
	library("sleuth")

	# A function (borrowed from the Sleuth documentation) for connecting Ensembl transcript names to common gene names
	import tensorflow as tf
	import numpy as np
	import math
	#import pandas as pd
	#import sys

	input = np.array([[2.0, 1.0, 1.0, 2.0],
	[-2.0, 1.0, -1.0, 2.0],
	[0.0, 1.0, 0.0, 2.0],
	[0.0, -1.0, 0.0, -2.0],
	import sys

	for line in open(sys.argv[1]):
	if 'layer' in line:
	fname = '_'.join(line.strip().split()) + '_activities.txt'
	outf = open(fname,'w')
	else:
	outf.write(line)
	import sys

	f = open(sys.argv[1])

	wts = [] # This will be a list of list of lists (=list of matrices) with weights
	bias_in = [] # List of lists of bias values in the input part of each sublayer
	bias_out = [] # List of lists of bias values in the output part of each sublayer
	prefs = [] # Prefixes for file names

	state = None # state can be None, 'weight', 'bias_in' or 'bias_out'
	args = commandArgs(trailingOnly=TRUE)
	path=args[1]
	files=Sys.glob(paste0(path,"/*/abundance.tsv"))
	#print(files)
	merge_two <- function(x,y){
	#print(dim(x))
	if ("tpm" %in% colnames(x)){
	x_ <- x[,c(1,5)]
	}
	else{
	import sys
	import gzip

	if len(sys.argv)<3:
	sys.exit("python sum_per_gene.py <cDNA FASTA file> <TPM table>")

	ensg = {}

	mapf = gzip.open(sys.argv[1])
	ctr = 0
	library(tensorflow)

	tf$reset_default_graph()

	x_data <- runif(100, min=0, max=1)
	y_data <- x_data * 0.1 + 0.3 + rnorm(n, mean=0, sd=0.025)

	W <- tf$Variable(tf$random_uniform(shape(1L), -1.0, 1.0))
	b <- tf$Variable(tf$zeros(shape(1L)))
	y <- W * x_data + b
	1. Install appropriate version of the Tensorflow (Python) framework from https://www.tensorflow.org/versions/r0.12/get_started/os_setup.html

	In my case (Mac OS X 10.11), I did:

	- Get the .whl file (this is more likely to work than a direct pip install)
	wget https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl

	- Install using non-Anaconda pip
	/usr/local/bin/pip3 install tensorflow-0.11.0-py3-none-any.whl_