Created
May 19, 2020 10:00
-
-
Save emvolz/ab45a4de824ca8896ea559d6146a2261 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library( sarscov2 ) | |
library( ape ) | |
# define some parameters | |
## path to the GISAID alignment, distance file, metadata: | |
fn = 'may10_filter1.fas' | |
distfn = 'dist.txt' | |
mdfn = 'may10_dataC.csv' | |
## A simple name for the region: | |
region='yorkshire' | |
## How many sequences to include within the region? | |
n_region = Inf # include all matches | |
## How many to include from the intertional reservoir? | |
n_reservoir = 100 # (actual number to be included will be greater since it also includes close distance matches) | |
## How many starting trees? | |
n_startingtrees = 20 | |
# Load the metadata from the ncov-gisaid repo | |
md <- read.csv( mdfn, stringsAs=FALSE ) | |
# Sample from yorkshire | |
regiontips = region_sampler1( md, n = n_region , inclusion_rules = list( c('CityOrCounty', '.*Yorkshire$') ), dedup=FALSE) | |
mdregion = md[ match( regiontips, md$seqName ) , ] | |
exogtips = exog_sampler2( md, n_reservoir, distfn, regiontips, dedup=FALSE ) # no exclusion rules, but these can be added in same format as the inclusion rules | |
# Now make the aligment for BEAST. This adds sample time and deme to tip labels: | |
d3 = prep_tip_labels_seijr( fn, outfn = 'algn3.fasta', regiontips = regiontips, exogtips = exogtips, metadata = md ) | |
# Make the starting trees | |
# Note requires IQTREE | |
tds = make_starting_trees('algn3.fasta', treeoutfn = "startTrees.nwk", ntres = n_startingtrees, ncpu = 6) | |
ti = compute_timports(tds, numpb = 10, ncpu = 6, excludeBefore = 2020) | |
save.image('a0.rda' ) | |
#~ £ when is first | |
t0s = unlist( do.call( c, lapply( ti[[2]], function(x) lapply( x, function(y) min(y$y) ) ) ) ) | |
library( lubridate ) | |
print( quantile( date_decimal(t0s) , c(.025, .975)) ) | |
#~ 2.5% 97.5% | |
#~ "2020-01-17 04:37:19 UTC" "2020-02-13 10:41:29 UTC" | |
#~ £ how many imports | |
nimports = unlist( do.call( c, lapply( ti[[2]], function(x) lapply( x, nrow ) ) ) ) | |
print( summary( nimports )) | |
print( quantile( nimports, c(.025, .5, .975 )) ) | |
#~ 2.5% 50% 97.5% | |
#~ 15 30 35 | |
(treepl = quick_region_treeplot( tds[[1]], region='Il' , maxdate=max( date_decimal( tds[[1]]$sts) ) ) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment