Skip to content

Instantly share code, notes, and snippets.

@emvolz
Created May 19, 2020 10:00
Show Gist options
  • Save emvolz/ab45a4de824ca8896ea559d6146a2261 to your computer and use it in GitHub Desktop.
Save emvolz/ab45a4de824ca8896ea559d6146a2261 to your computer and use it in GitHub Desktop.
library( sarscov2 )
library( ape )
# define some parameters
## path to the GISAID alignment, distance file, metadata:
fn = 'may10_filter1.fas'
distfn = 'dist.txt'
mdfn = 'may10_dataC.csv'
## A simple name for the region:
region='yorkshire'
## How many sequences to include within the region?
n_region = Inf # include all matches
## How many to include from the intertional reservoir?
n_reservoir = 100 # (actual number to be included will be greater since it also includes close distance matches)
## How many starting trees?
n_startingtrees = 20
# Load the metadata from the ncov-gisaid repo
md <- read.csv( mdfn, stringsAs=FALSE )
# Sample from yorkshire
regiontips = region_sampler1( md, n = n_region , inclusion_rules = list( c('CityOrCounty', '.*Yorkshire$') ), dedup=FALSE)
mdregion = md[ match( regiontips, md$seqName ) , ]
exogtips = exog_sampler2( md, n_reservoir, distfn, regiontips, dedup=FALSE ) # no exclusion rules, but these can be added in same format as the inclusion rules
# Now make the aligment for BEAST. This adds sample time and deme to tip labels:
d3 = prep_tip_labels_seijr( fn, outfn = 'algn3.fasta', regiontips = regiontips, exogtips = exogtips, metadata = md )
# Make the starting trees
# Note requires IQTREE
tds = make_starting_trees('algn3.fasta', treeoutfn = "startTrees.nwk", ntres = n_startingtrees, ncpu = 6)
ti = compute_timports(tds, numpb = 10, ncpu = 6, excludeBefore = 2020)
save.image('a0.rda' )
#~ £ when is first
t0s = unlist( do.call( c, lapply( ti[[2]], function(x) lapply( x, function(y) min(y$y) ) ) ) )
library( lubridate )
print( quantile( date_decimal(t0s) , c(.025, .975)) )
#~ 2.5% 97.5%
#~ "2020-01-17 04:37:19 UTC" "2020-02-13 10:41:29 UTC"
#~ £ how many imports
nimports = unlist( do.call( c, lapply( ti[[2]], function(x) lapply( x, nrow ) ) ) )
print( summary( nimports ))
print( quantile( nimports, c(.025, .5, .975 )) )
#~ 2.5% 50% 97.5%
#~ 15 30 35
(treepl = quick_region_treeplot( tds[[1]], region='Il' , maxdate=max( date_decimal( tds[[1]]$sts) ) ) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment