Skip to content

Instantly share code, notes, and snippets.

@emvolz
Created May 14, 2020 11:43
Show Gist options
  • Save emvolz/7306c793c33562e068b0d1ab7a4335c9 to your computer and use it in GitHub Desktop.
Save emvolz/7306c793c33562e068b0d1ab7a4335c9 to your computer and use it in GitHub Desktop.
library( sarscov2 )
library( ape )
# define some parameters
## path to the GISAID alignment:
fn = 'large_alignment_may5_Erik_Brazil.fasta'
## A simple name for the region:
region='brazil'
## A path to the file containing small genetic distance pairs
distfn = 'large_alignment_may5_Erik_Brazil.dst'
## When do SEIR dynamics initiate:
startTime = 2020.085
## How many sequences to include within the region?
n_region = Inf
## How many to include from the intertional reservoir?
n_reservoir = 50 # (actual number to be included will be greater since it also includes close distance matches)
## How many starting trees? A BEAST run will be carried out for each
n_startingtrees = 2
# Load the metadata from the ncov-gisaid repo
md <- read.csv( 'Darlan_may5_dataG.csv', stringsAs=FALSE )
# Alternatively you could select KingCounty AND Washington by giving multiple inclusion criteria
regiontips = region_sampler1( md, n = n_region , inclusion_rules = list( c('CityOrCounty', 'SaoPaulo') ) , dedup= FALSE )
exrules = list( c('Country', 'Brazil' ) )
exogtips = exog_sampler2( md, n_reservoir, distfn, regiontips, dedup=FALSE, exclusion_rules= exrules )
cat( 'How many brazil sequences are in exogtips?\n' )
print( sum( grepl( exogtips , patt='Brazil', ignore.case=TRUE) ))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment