Created
May 24, 2020 17:35
-
-
Save emvolz/e66c579e518cdeceb95dca44754003ed to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library( sarscov2 ) | |
library( ape ) | |
res = 10 | |
tau0 = 30 / 365 / 36.5^2 | |
ncpu = 6 | |
# define some parameters | |
## path to the GISAID alignment: | |
fn = 'may18_filter1.fas' | |
## A simple name for the region: | |
region='washington' | |
## A path to the file containing small genetic distance pairs | |
distfn = 'may18_tn93dist.txt' | |
## How many sequences to include within the region? | |
n_region = Inf | |
## How many starting trees? A BEAST run will be carried out for each | |
n_startingtrees = 10 | |
# Load the metadata from the ncov-gisaid repo | |
md <- read.csv( 'may18_data_filter1.csv', stringsAs=FALSE ) | |
# select all sequences from Washington state | |
regiontips = region_sampler1( md, n = n_region | |
, inclusion_rules = | |
list( | |
c('RegionOrState', '^Washington$') | |
) | |
) | |
mdregion = md[ match( regiontips, md$seqName ) , ] | |
# Now make the aligment | |
d3 = prep_tip_labels_seijr( fn, outfn = 'algn3.fasta', regiontips = regiontips, exogtips = c(), metadata = mdregion ) | |
# Make the time trees | |
# Note requires IQTREE | |
tds = make_starting_trees('algn3.fasta', treeoutfn = "startTrees.nwk", ntres = n_startingtrees, ncpu = ncpu) | |
# smooth node times | |
gtds = parallel::mclapply( tds, function(td) gibbs_jitter( td, returnTrees=2 )[[2]] , mc.cores = ncpu ) | |
# compute genotype | |
s614 = compute_spike614genotype( 'algn3.fasta' ) | |
# do the thing | |
sp = s614_phylodynamics( gtds, s614 , res = res, tau0 = tau0 ) | |
rv = list( spike = sp , tds = gtds, s614 = s614 , region = 'Washington' , data= d3 ) | |
saveRDS(rv, file = 'p0-rv.rds' ) | |
# plots | |
pne = with( sp, add_ggNe_sarscov2skygrowth( sgD, sgG ) ) | |
pgr = with( sp, add_ggGR_sarscov2skygrowth( sgD, sgG ) ) | |
spl = plot_sample_distribution_spike614( s614 ) | |
#~ Using dates after feb 15: | |
sp1 = s614_phylodynamics( gtds, s614 , res = 15, tau0 = tau0 , tstart = decimal_date( as.Date( '2020-02-15' ) ) ) | |
pne1 = with( sp1, add_ggNe_sarscov2skygrowth( sgD, sgG ) ) | |
pgr1 = with( sp1, add_ggGR_sarscov2skygrowth( sgD, sgG ) ) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment