Skip to content

Instantly share code, notes, and snippets.

View dnanto's full-sized avatar
⚗️
🝎

Daniel Antonio Negrón dnanto

⚗️
🝎
View GitHub Profile
@dnanto
dnanto / rmsd.py
Created March 8, 2024 17:07
Calculate RMSD using Biopython
#!/usr/bin/env python3
__author__ = "dnanto"
import json
import sys
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, FileType
from pathlib import Path
from Bio.PDB import PDBParser, Superimposer
@dnanto
dnanto / ft.R
Created May 5, 2021 16:42
R function to read the NCBI BankIT Feature Table file format using the tidyverse package
read_ft <- function(file) {
# read tsv with a maximum of five columns
suppressWarnings(
read_tsv(
file,
col_names = c("start", "end", "name", "key", "val"),
col_types = cols(.default = "c")
)
) %>%
# associate rows with a feature accession and feature identifier number
@dnanto
dnanto / geomid.R
Created June 25, 2020 18:14
Calculate the geographic midpoint given a list of geographic coordinates in R.
library(tidyverse)
geomid <- function(lat, lon)
{
# http://www.geomidpoint.com/calculation.html
lat <- lat * pi / 180
lon <- lon * pi / 180
x <- mean(sum(cos(lat) * cos(lon)))
y <- mean(sum(cos(lat) * sin(lon)))
z <- mean(sum(sin(lat)))
@dnanto
dnanto / ft.py
Created May 4, 2020 14:46
Parse the Feature Table format of Entrez Direct E-utilities.
def parse_coor(coor):
return int(coor[0].lstrip(">").lstrip("<")), int(coor[1].lstrip(">").lstrip("<"))
def parse_ft(file):
acc, feat, coors, anno = None, None, None, None
for line in map(str.rstrip, file):
if line.startswith(">Feature"):
acc = line[9:].split("|", maxsplit=2)[1].split(".")[0]
elif acc and line:
if not line[0].isspace():
@dnanto
dnanto / dot.R
Last active April 8, 2020 16:25
R function to read a DOT file into an igraph obj
library(tidyverse)
library(igraph)
parse_attr <- function(val)
{
tokens <-
str_split(val, '(,)(?=(?:[^"]|"[^"]*")*$)') %>%
lapply(str_split, '(=)(?=(?:[^"]|"[^"]*")*$)') %>%
unlist() %>%
str_trim() %>%
@dnanto
dnanto / pkg.sh
Created March 18, 2020 17:27
Manual installation of BEAST2 v2.5.0+ packages...
#!/usr/bin/env bash
# ideally, set this outside of this script, like in a profile
export BEAST_PACKAGE_PATH="$(pwd)"/.pkg
arr=(
"https://github.com/BEAST2-Dev/BEASTLabs/releases/download/v1.9.0/BEASTlabs.addon.v1.9.2.zip"
"https://github.com/BEAST2-Dev/bModelTest/releases/download/v1.2.0/bModelTest.addon.v1.2.1.zip"
"https://github.com/BEAST2-Dev/model-selection/releases/download/v1.5.0/MODEL_SELECTION.addon.v1.5.2.zip"
"https://github.com/BEAST2-Dev/nested-sampling/releases/download/v1.1.0/NS.addon.v1.1.0.zip"
@dnanto
dnanto / snp-sites-counts.R
Last active March 9, 2020 17:55
snp-sites VCF to SNP counts...
# mafft ncov.fna > msa.fna
# snp-sites -c -v msa.fna > snp.vcf
library(tidyverse)
lines <- read_lines("snp.vcf")
fields <- str_split(last(lines[startsWith(lines, "#")]), "\t")[[1]]
read_tsv(lines, comment = "#", col_names = fields, col_types = cols(.default = "c")) %>%
select(10:ncol(.)) %>%
mutate_all(as.integer) %>%
colSums() %>%
sort(decreasing = T) %>%
@dnanto
dnanto / lsystem.py
Created January 10, 2020 05:41
L-system string generator in python...
#!/usr/bin/env python3
import sys
axiom = "0"
rules = { "0": "1[0]0", "1": "11" }
for i in range(int(sys.argv[1])):
axiom = "".join(rules.get(e, e) for e in axiom)
print(axiom)
@dnanto
dnanto / rbtop.R
Created December 31, 2019 22:07
calculate the reverse complement of a BTOP string (uses tidyverse)
comp <- set_names(
as.list(str_split("TAACGRYSWMKVHDBN", "", simplify = 1)),
str_split("ATUGCYRSWKMBDHVN", "", simplify = T)
)
revcomp_btop <- function(btop)
{
str_replace_all(btop, "([A-Z-])([A-Z-])", " \\2 \\1 ") %>%
str_split(" ", simplify = T) %>%
rev() %>%
@dnanto
dnanto / btop.R
Created September 21, 2019 16:35
decode mutations from BTOP string in R (uses tidyverse)
decode_btop <- function(btop)
{
matches <- as.integer(str_extract_all(btop, "\\d+", simplify = T))
matches <- if (str_starts(btop, "\\d", negate = T)) c(0, matches) else matches
pos <- 0; n <- 0; m <- 0; muts <- list();
for (ele in Filter(nchar, str_split(btop, "\\d+", simplify = T)))
{
pos <- pos + matches[n<-n+1]
for (i in seq(1, nchar(ele), 2)) muts[[m<-m+1]] <- list(pos = pos<-pos+1, mut = substr(ele, i, i+1))
}