Skip to content

Instantly share code, notes, and snippets.

View flashton2003's full-sized avatar

flashton2003

View GitHub Profile
import os
import pprint
import dendropy as dpy
## maybe useful https://gist.github.com/jeetsukumaran/512075/16cfec3a433042287a3704beb24bc73033f757a6
def extract_subtree(tree, target_node, i, tree_handle, output_dir, country):
## so, because dendropy is a bit shit, need to copy the tree
## and get the node which we want to take as a sub-tree
## from the copied version.
cd "/Users/benmorton/Documents/Ben's documents/Research/Paper Submission/Current/ISARIC W2/Analysis/Sequencing"
import excel "admissions_data 15032022", sheet ("Sheet1") firstrow clear
sort subjid
save ISARICadata, replace
import excel "discharge_data 15032022", sheet ("Sheet1") firstrow clear
sort subjid
save ISARICddate, replace
# a list of tuples, one tuple per sample
# within the tuple is (Ct value, percentage of genome covered at 20x)
all_samples = [(35.7, 0), (32.7, 0), (27.6, 0), (29.8, 0), (31.2, 0), (30.6, 0), (29.8, 0), (31.4, 0), (27.6, 0), (30, 0), (30.8, 0), (29.9, 0), (28.7, 0), (20.8, 0), (23.1, 0), (28.2, 0), (32.3, 0), (27.2, 2.33), (29, 3.33), (28.7, 6.25), (29.3, 6.6), (25.8, 9.38), (36.6, 9.7), (27.5, 13.16), (14.5, 15.17), (25.8, 21.84), (22.6, 24.02), (25.9, 25.77), (26.2, 26.18), (27.5, 31.05), (28.4, 35.62), (28.6, 42.34), (28.3, 44.2), (25.1, 44.92), (28.3, 45.42), (24.9, 47.9), (28.8, 48.85), (27.3, 49.63), (30.2, 51.79), (29.6, 52.48), (27, 56.45), (31.5, 63.44), (23.8, 63.89), (16.1, 63.9), (22.3, 63.9), (17.1, 63.91), (31.5, 67.75), (30.6, 72.76), (24.9, 73.66), (15.2, 76.66), (24.6, 77.6), (13.1, 77.69), (26.2, 82.32), (26.9, 83.05), (29.7, 84.68), (26.7, 84.99), (29.5, 85.36), (14.3, 86.2), (26.2, 86.73), (13.8, 87.73), (23.3, 88.24), (30.6, 89.36), (24.4, 89.79), (25.7, 90.14), (25.9, 90.3), (24.6, 90.71
@flashton2003
flashton2003 / nanopore_costs.py
Created March 22, 2021 15:20
script to calculate costs for nanopore sequencing
import math
from copy import deepcopy
def how_many_packs(number_flow_cells, option1):
# pack_sizes is nanopore flowcell pack sizes, sorted from high to low
pack_sizes = sorted([300, 48, 24, 12, 1], reverse = True)
# pack to buy is the largest pack which is less than or equal to the number we need
pack_to_buy = [x for x in pack_sizes if x <= number_flow_cells][0]
# make a note that we need one of that pack size in the option1 dict
import sys
import gffutils
class PseudoFeature():
def __init__(self, chrom, start, stop, strand):
self.chrom = chrom
self.start = start
self.stop = stop
self.strand = strand
import dendropy as dpy
tree = dpy.Tree.get_from_path('/path/to/tree.nxs', 'nexus')
for nd in tree.postorder_node_iter():
if nd.is_internal():
nd_annot = nd.annotations.values_as_dict()
print nd_annot
@flashton2003
flashton2003 / process_disty_output.py
Created March 21, 2019 11:23
converts disty output from symetrical matrix to upper rectangular, one value per line
import fileinput
lines = []
for line in fileinput.input():
lines.append(line.strip())
lines = lines[1:]
lines = [x.split()[1:] for x in lines]
@flashton2003
flashton2003 / midpoint_root_tree.py
Created March 21, 2019 11:21
midpoint roots a tree
from ete3 import Tree
import fileinput
lines = []
for line in fileinput.input():
lines.append(line.strip())
tree = Tree(lines[0])
library(readr)
library(dplyr)
library(cartogram)
library(tmap)
library(maptools)
data(wrld_simpl)
all_tb_burden <- read_delim("~/all_tb_burden.tsv", "\t", escape_double = FALSE, trim_ws = TRUE)
y = '2016'
from __future__ import division
from Bio import SeqIO
import pprint
import glob
import os
import sys
import itertools
class Contigs():
def __init__(self, contig_handle):