flashton2003

## find_leaf_parent.py
from Bio import Phylo

'''
I want to get a dictionary where the keys are every leaf name
and the value is the parental (internal) node of that leaf
'''
tree = Phylo.read(tree_handle, 'newick')
res_dict = {}
for node in tree.find_clades():
    ## if the node is a leaf, the name will be in node.name

## default_dict_eg.py
from collections import Counter, defaultdict

food = [{'date':'2014', 'address':'sesame street'},{'date':'2012', 'address':'eversholt street'},{'date':'2013', 'address':'eversholt street'},{'date':'2014', 'address':'eversholt street'},{'date':'2012', 'address':'sesame street'},{'date':'2013', 'address':'sesame street'},{'date':'2014', 'address':'sesame street'},{'date':'2014', 'address':'eversholt street'},{'date':'2014', 'address':'sesame street'}]

def short_way(food):
	## a default dict sets the value type to the
	by_year = defaultdict(Counter)
	for row in food:
		# print by_year
		## simultaneously adds the date as a key to the dict and incremements the counter value for the address to += 1

## convert_distance_matrix.py
### this script takes in a distance matrix produced by https://github.com/tseemann/nullarbor/blob/master/bin/afa-pairwise.pl which is a 2d matrix

#   a  b  c
# a 0  1  2
# b 1  0  1
# c 2  1  0

# and prints out the half matrix in three column format, with no self-self comparisons

# a b 1

## amazon_book_info_example.txt
Order detailsOrdered on 30 October 2014 (1 item)
The Forest Unseen: A Year's Watch in Nature (Kindle Books)
Haskell, David George
Sold by: Amazon Media EU Sarl

Order detailsOrdered on 27 October 2014 (1 item)
The Psychopath Test (Kindle Books)
Ronson, Jon
Sold by: Amazon Media EU Sarl


## amazon_plot_book_info.R
library(reshape2)
library(ggplot2)
#library(scales)

# from http://stackoverflow.com/questions/3550341/gantt-charts-with-r

books <- c("All the Light We Cannot See", "13 Things That Don t Make Sense", "Why The Allies Won", "The Third Policeman", "Just Kids", "Hackers", "The Black Swan", "Prisoners of Geography", "Benjamin Franklin - Biography", "Stuff Matters", "Pale Fire", "Use Of Weapons", "Lustrum", "Microbe Hunters", "Perfume", "Winston s War", "Being Mortal", "The Man Who Mistook His Wife for a Hat", "The Life You Can Save", "The Ghost Map", "Hyperion", "Chaos - Making a New Science", "The Realm", "A Scientist in Wonderland", "Good Omens", "White Teeth", "The Sports Gene", "The Inimitable Jeeves", "The Illustrated Man", "How We Got to Now", "Gone Girl", "A Dance With Dragons", "H is for Hawk", "Green Mars", "Pompeii - Life of a Roman Town", "The Grapes of Wrath", "The Forest Unseen", "The Psychopath Test", "The Selfish Gene", "The Difference Engine", "Managing Your Boss", "Empire - How Britain Made the M

## amazon_get_book_info.py
import sys
import re
import amazonproduct
import pprint
import pickle
import lxml.objectify
import numpy as np
from lxml import etree
from datetime import datetime
import socket

## sort_out_dates_for_path-o-gen.py
from __future__ import division
import datetime
import random
## inhandle is formatted 'sample_id\tdate', no header
inhandle = '/Users/flashton/Desktop/sample_dates'

def read_file(inhandle):
	res_dict = {}
	with open(inhandle) as fi:
		lines = fi.readlines()

## parse_nctc3000.py
import os
from Bio import SeqIO
from BCBio import GFF

root_dir = '/Users/flashton/projects/nctc3000/2016.01.17'

def main(root_dir):
	for each in os.listdir(root_dir):
		with open('%s/%s' % (root_dir, each)) as fi:
			basename = each.split('.')[0]

## download_from_ena.py
import ftplib

# ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR038/ERR038787/ERR038787_1.fastq.gz
todo_list = ['ERR024633'] # ERR accessions
target_dir = '/where/you/want/the/data'

def download_from_ena(todo_list, target_dir):
	ftp = ftplib.FTP('ftp.sra.ebi.ac.uk')
	ftp.login()
	ftp.cwd('vol1')

## change_pwd_to_matrix.r
library(reshape)
library(ape)

# read in the data
f <- read.delim("~/Dropbox/mash_project/2015.09.23.all_vs_all.txt", header=F)
# use reshape's cast function to change to matrix
m <- cast(f, V1 ~ V2)
# set the row names
rownames(m) <- m[,1]
# get rid of a couple of rows
	from Bio import Phylo

	'''
	I want to get a dictionary where the keys are every leaf name
	and the value is the parental (internal) node of that leaf
	'''
	tree = Phylo.read(tree_handle, 'newick')
	res_dict = {}
	for node in tree.find_clades():
	## if the node is a leaf, the name will be in node.name
	from collections import Counter, defaultdict

	food = [{'date':'2014', 'address':'sesame street'},{'date':'2012', 'address':'eversholt street'},{'date':'2013', 'address':'eversholt street'},{'date':'2014', 'address':'eversholt street'},{'date':'2012', 'address':'sesame street'},{'date':'2013', 'address':'sesame street'},{'date':'2014', 'address':'sesame street'},{'date':'2014', 'address':'eversholt street'},{'date':'2014', 'address':'sesame street'}]

	def short_way(food):
	## a default dict sets the value type to the
	by_year = defaultdict(Counter)
	for row in food:
	# print by_year
	## simultaneously adds the date as a key to the dict and incremements the counter value for the address to += 1
	### this script takes in a distance matrix produced by https://github.com/tseemann/nullarbor/blob/master/bin/afa-pairwise.pl which is a 2d matrix

	# a b c
	# a 0 1 2
	# b 1 0 1
	# c 2 1 0

	# and prints out the half matrix in three column format, with no self-self comparisons

	# a b 1
	Order detailsOrdered on 30 October 2014 (1 item)
	The Forest Unseen: A Year's Watch in Nature (Kindle Books)
	Haskell, David George
	Sold by: Amazon Media EU Sarl

	Order detailsOrdered on 27 October 2014 (1 item)
	The Psychopath Test (Kindle Books)
	Ronson, Jon
	Sold by: Amazon Media EU Sarl
	library(reshape2)
	library(ggplot2)
	#library(scales)

	# from http://stackoverflow.com/questions/3550341/gantt-charts-with-r

	books <- c("All the Light We Cannot See", "13 Things That Don t Make Sense", "Why The Allies Won", "The Third Policeman", "Just Kids", "Hackers", "The Black Swan", "Prisoners of Geography", "Benjamin Franklin - Biography", "Stuff Matters", "Pale Fire", "Use Of Weapons", "Lustrum", "Microbe Hunters", "Perfume", "Winston s War", "Being Mortal", "The Man Who Mistook His Wife for a Hat", "The Life You Can Save", "The Ghost Map", "Hyperion", "Chaos - Making a New Science", "The Realm", "A Scientist in Wonderland", "Good Omens", "White Teeth", "The Sports Gene", "The Inimitable Jeeves", "The Illustrated Man", "How We Got to Now", "Gone Girl", "A Dance With Dragons", "H is for Hawk", "Green Mars", "Pompeii - Life of a Roman Town", "The Grapes of Wrath", "The Forest Unseen", "The Psychopath Test", "The Selfish Gene", "The Difference Engine", "Managing Your Boss", "Empire - How Britain Made the M
	import sys
	import re
	import amazonproduct
	import pprint
	import pickle
	import lxml.objectify
	import numpy as np
	from lxml import etree
	from datetime import datetime
	import socket
	from __future__ import division
	import datetime
	import random
	## inhandle is formatted 'sample_id\tdate', no header
	inhandle = '/Users/flashton/Desktop/sample_dates'

	def read_file(inhandle):
	res_dict = {}
	with open(inhandle) as fi:
	lines = fi.readlines()
	import os
	from Bio import SeqIO
	from BCBio import GFF

	root_dir = '/Users/flashton/projects/nctc3000/2016.01.17'

	def main(root_dir):
	for each in os.listdir(root_dir):
	with open('%s/%s' % (root_dir, each)) as fi:
	basename = each.split('.')[0]
	import ftplib

	# ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR038/ERR038787/ERR038787_1.fastq.gz
	todo_list = ['ERR024633'] # ERR accessions
	target_dir = '/where/you/want/the/data'

	def download_from_ena(todo_list, target_dir):
	ftp = ftplib.FTP('ftp.sra.ebi.ac.uk')
	ftp.login()
	ftp.cwd('vol1')
	library(reshape)
	library(ape)

	# read in the data
	f <- read.delim("~/Dropbox/mash_project/2015.09.23.all_vs_all.txt", header=F)
	# use reshape's cast function to change to matrix
	m <- cast(f, V1 ~ V2)
	# set the row names
	rownames(m) <- m[,1]
	# get rid of a couple of rows