Skip to content

Instantly share code, notes, and snippets.

@DomBennett
DomBennett / sequence_info.R
Created January 14, 2019 09:37
Getting extra sequence information for a phylotaR sequence
library(phylotaR)
data("yeasts")
# not all info is stored on the seq object
str(yeasts@sqs@sqs[[1]])
# but the accession can be used to download extra info
accssn <- yeasts@sqs@sqs[[1]]@id
library(rentrez)
smmry_obj <- entrez_summary(db = 'nucleotide', id = accssn)
str(smmry_obj)
# e.g. getting country
@DomBennett
DomBennett / aedes_culex.R
Created October 29, 2018 21:33
Mutltiple species IDs with phylotaR
# Libs ----
# devtools::install_github('ropensci/phylotaR', ref = 'multiple_ids')
library(phylotaR)
library(taxize)
# Txids ----
# look-up random selection of 60 aedes and culex spp
# 7158 -- Aedes genus
aedes_spp <- downstream(x = '7158', db = 'ncbi', downto = 'species')
# 7174 -- Culex genus
@DomBennett
DomBennett / dplyr.R
Last active October 22, 2018 09:57
Software club: Intro to the tidyverse
# http://genomicsclass.github.io/book/pages/dplyr_tutorial.html
# Libs ----
library(dplyr)
# Data ----
url <- "https://raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/msleep_ggplot2.csv"
filename <- "msleep_ggplot2.csv"
if (!file.exists(filename)) downloader::download(url,filename)
# forget head! Just make use it is a "tibble"
msleep <- as_tibble(read.csv("msleep_ggplot2.csv"))
@DomBennett
DomBennett / install_deps.R
Created June 5, 2018 13:33
Get consensus from forward and reverse sequences in R
# Installed
already_installed <- installed.packages()
# CRAN packages
cran_deps <- c("ape", "reshape2", "phangorn", "stringi", "stringr")
cran_deps <- cran_deps[!cran_deps %in% already_installed]
for (dep in cran_deps) {
install.packages(dep)
}
@DomBennett
DomBennett / genbank_download.R
Last active April 9, 2018 08:34
Download from GenBank with cache using R
# LIBS
library(rentrez)
# FUNCTIONS
connect_safely <- function(fnctn, args, wt_tms) {
res <- NULL
for (wt_tm in wt_tms) {
query <- try(R.utils::withTimeout(do.call(fnctn, args),
timeout = 3600),
silent = TRUE)
@DomBennett
DomBennett / parse_raxml_newick.R
Created February 12, 2018 14:53
Convert RAxML consensus tree | Readable in R | APE library
# LIB
library(ape)
# DATA
# tree string produced by `RAxML -J ` arg
trstr <- "(A,B,(C,((D,((E,F):1.0[80],((G,H):1.0[100],(I,J):1.0[100]):1.0[67]):1.0[58]):1.0[98],(K,(L,(M,N):1.0[89]):1.0[91]):1.0[100]):1.0[97]):1.0[97]);"
# FAIL
tree <- read.tree(text=trstr)
# node.label = NULL
@DomBennett
DomBennett / fasta_mover.R
Created February 6, 2018 10:42
Take sequences from fasta file and write as separate files
# Take sequences from fasta file and write as separate files
# FUNCTION
readSqs <- function(fl) {
all_data <- readLines(fl)
sqs <- list()
for(i in seq_along(all_data)) {
bit <- all_data[[i]]
if(grepl(pattern='^>', x=bit)) {
nm <- sub(pattern='^>', '', x=bit)
@DomBennett
DomBennett / over_a_list.R
Created January 25, 2018 13:15
Calculating trip dist in parallel
# LIB
library(treeman)
library(doMC)
library(foreach)
# FUNCTION
randTreeList <- function(n) {
res <- vector(mode='list', length=n)
for(i in 1:n) {
res[[i]] <- randTree(20, wndmtrx=TRUE)
@DomBennett
DomBennett / getGroups.R
Created September 18, 2017 19:44
getGroups w/ edges
getGroups <- function(tree, get_edges=TRUE) {
.fillInRes <- function(i) {
res <- vector('list', length=2)
names(res) <- c('g1', 'g2')
res[['g1']] <- which(edge.tips[i, ] == 1)
res[['g2']] <- which(edge.tips[i, ] == 0)
res
}
.addEdges <- function(i) {
res[[i]][['edglngth']] <<- edglngths[[i]]
@DomBennett
DomBennett / text_distance_example.R
Created February 11, 2017 16:25
Calculating text distances using string distance metrics and word frequencies
# EXAMPLE TEXT DISTANCES
# FUNCTIONS
justText <- function(txt) {
# converts text to its readable form by removing punctuation and numbers
gsub("[^a-zA-Z ]", "", txt)
}
calcStrDst <- function(txts) {
# calculate the distance between vector of texts