Skip to content

Instantly share code, notes, and snippets.

View flashton2003's full-sized avatar

flashton2003

View GitHub Profile
from Bio import Phylo
'''
I want to get a dictionary where the keys are every leaf name
and the value is the parental (internal) node of that leaf
'''
tree = Phylo.read(tree_handle, 'newick')
res_dict = {}
for node in tree.find_clades():
## if the node is a leaf, the name will be in node.name
from collections import Counter, defaultdict
food = [{'date':'2014', 'address':'sesame street'},{'date':'2012', 'address':'eversholt street'},{'date':'2013', 'address':'eversholt street'},{'date':'2014', 'address':'eversholt street'},{'date':'2012', 'address':'sesame street'},{'date':'2013', 'address':'sesame street'},{'date':'2014', 'address':'sesame street'},{'date':'2014', 'address':'eversholt street'},{'date':'2014', 'address':'sesame street'}]
def short_way(food):
## a default dict sets the value type to the
by_year = defaultdict(Counter)
for row in food:
# print by_year
## simultaneously adds the date as a key to the dict and incremements the counter value for the address to += 1
@flashton2003
flashton2003 / convert_distance_matrix.py
Created July 27, 2016 16:37
convert 2d matrix to flat three column
### this script takes in a distance matrix produced by https://github.com/tseemann/nullarbor/blob/master/bin/afa-pairwise.pl which is a 2d matrix
# a b c
# a 0 1 2
# b 1 0 1
# c 2 1 0
# and prints out the half matrix in three column format, with no self-self comparisons
# a b 1
Order detailsOrdered on 30 October 2014 (1 item)
The Forest Unseen: A Year's Watch in Nature (Kindle Books)
Haskell, David George
Sold by: Amazon Media EU Sarl
Order detailsOrdered on 27 October 2014 (1 item)
The Psychopath Test (Kindle Books)
Ronson, Jon
Sold by: Amazon Media EU Sarl
library(reshape2)
library(ggplot2)
#library(scales)
# from http://stackoverflow.com/questions/3550341/gantt-charts-with-r
books <- c("All the Light We Cannot See", "13 Things That Don t Make Sense", "Why The Allies Won", "The Third Policeman", "Just Kids", "Hackers", "The Black Swan", "Prisoners of Geography", "Benjamin Franklin - Biography", "Stuff Matters", "Pale Fire", "Use Of Weapons", "Lustrum", "Microbe Hunters", "Perfume", "Winston s War", "Being Mortal", "The Man Who Mistook His Wife for a Hat", "The Life You Can Save", "The Ghost Map", "Hyperion", "Chaos - Making a New Science", "The Realm", "A Scientist in Wonderland", "Good Omens", "White Teeth", "The Sports Gene", "The Inimitable Jeeves", "The Illustrated Man", "How We Got to Now", "Gone Girl", "A Dance With Dragons", "H is for Hawk", "Green Mars", "Pompeii - Life of a Roman Town", "The Grapes of Wrath", "The Forest Unseen", "The Psychopath Test", "The Selfish Gene", "The Difference Engine", "Managing Your Boss", "Empire - How Britain Made the M
import sys
import re
import amazonproduct
import pprint
import pickle
import lxml.objectify
import numpy as np
from lxml import etree
from datetime import datetime
import socket
from __future__ import division
import datetime
import random
## inhandle is formatted 'sample_id\tdate', no header
inhandle = '/Users/flashton/Desktop/sample_dates'
def read_file(inhandle):
res_dict = {}
with open(inhandle) as fi:
lines = fi.readlines()
import os
from Bio import SeqIO
from BCBio import GFF
root_dir = '/Users/flashton/projects/nctc3000/2016.01.17'
def main(root_dir):
for each in os.listdir(root_dir):
with open('%s/%s' % (root_dir, each)) as fi:
basename = each.split('.')[0]
import ftplib
# ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR038/ERR038787/ERR038787_1.fastq.gz
todo_list = ['ERR024633'] # ERR accessions
target_dir = '/where/you/want/the/data'
def download_from_ena(todo_list, target_dir):
ftp = ftplib.FTP('ftp.sra.ebi.ac.uk')
ftp.login()
ftp.cwd('vol1')
@flashton2003
flashton2003 / change_pwd_to_matrix.r
Created October 25, 2015 16:36
Read in a pair wise distance matrix as a list, and convert to matrix and do hierarchical clustering and draw a tree and export it as a newick
library(reshape)
library(ape)
# read in the data
f <- read.delim("~/Dropbox/mash_project/2015.09.23.all_vs_all.txt", header=F)
# use reshape's cast function to change to matrix
m <- cast(f, V1 ~ V2)
# set the row names
rownames(m) <- m[,1]
# get rid of a couple of rows