Skip to content

Instantly share code, notes, and snippets.

View trvinh's full-sized avatar

Vinh Tran trvinh

  • Goethe University Frankfurt
  • Frankfurt am Main, Germany
View GitHub Profile
@trvinh
trvinh / update_ete_ncbi.py
Created February 8, 2024 08:48
Update NCBI database of python ETE3 library
from ete3 import NCBITaxa
ncbi = NCBITaxa()
ncbi.update_taxonomy_database()
@trvinh
trvinh / process_taxDB.R
Last active June 12, 2023 14:31
Process taxonomy DB for PhyloProfile from downloaded taxdmp.zip file
library(PhyloProfile)
processNcbiTaxonomy <- function(taxdmpfile = NULL) {
if (is.null(taxdmpfile) || !file.exists(taxdmpfile)) {
stop("taxdmp.zip file invalid!")
} else temp <- taxdmpfile
names <- utils::read.table(
unz(temp, "names.dmp"), header = FALSE, fill = TRUE, sep = "\t",
quote = "", comment.char = "", stringsAsFactors = FALSE
@trvinh
trvinh / combine_fasta.py
Last active March 8, 2023 14:51
Python script for concatenating 2 fasta file without duplicated sequence headers
# -*- coding: utf-8 -*-
from Bio import SeqIO
import argparse
import shutil
def combine_fa(fa_1, fa_2, out_file):
""" Combine 2 fasta files """
new_fa_dict = SeqIO.to_dict(SeqIO.parse(open(fa_2),'fasta'))
existing_seq = SeqIO.to_dict(SeqIO.parse(open(fa_1),'fasta'))
@trvinh
trvinh / update_data_pp.txt
Created February 2, 2023 13:39
Update PhyloProfile predata (RData files in PhyloProfile/data folder)
library(PhyloProfile)
setwd('PhyloProfile/data')
# load data
data(taxonNamesReduced)
# modify the dataframe
# for example, rename Actinobacteria to Actinomycetota
taxonNamesReduced$fullName[
taxonNamesReduced$rank == "phylum" & taxonNamesReduced$ncbiID == 201174
@trvinh
trvinh / use_timeit.py
Created December 13, 2022 16:07
Use timeit to calculate runtime of a function
import timeit
def test(st,en):
return random.randint(st, en)
t = timeit.Timer(lambda: test(10, 100))
print(t.timeit(10))
@trvinh
trvinh / update_domainFile.py
Created November 30, 2022 15:00
Update domain files with intepro IDs
#!/bin/env python
############################################
# #
# Update domain files with intepro IDs #
# #
############################################
import sys
import os
@trvinh
trvinh / update_annoFAS.py
Last active November 30, 2022 15:26
Update FAS annotation files to version 1.15 (containing interpro IDs and tool versions)
#!/bin/env python
import sys
import os
from pathlib import Path
import re
import json
import shutil
import subprocess
import argparse
@trvinh
trvinh / get_rank.R
Last active April 25, 2022 14:03
get rank name of a list of taxon IDs or names
library(PhyloProfile)
library(data.table)
library(tidyr)
args = commandArgs(trailingOnly=TRUE)
idListFile <- args[1]
idListDf <- read.csv(idListFile, stringsAsFactors = FALSE, header = FALSE)
type <- args[2]
firstup <- function(x) {
@trvinh
trvinh / get_rank_names.R
Last active April 25, 2022 14:05
get names of a DEFINED TAXONOMY RANKS for a list of taxon IDs or names
library(PhyloProfile)
library(data.table)
library(tidyr)
args = commandArgs(trailingOnly=TRUE)
idListFile <- args[1]
idListDf <- read.csv(idListFile, stringsAsFactors = FALSE, header = FALSE) # data.table::fread(idListFile)
type <- args[2]
@trvinh
trvinh / install_jekyll.md
Created March 28, 2022 09:49
install and using Jekyll on Mac - PRESENTATION WITH JEKYLL

Source: 01 02

# install xcode (if not yet)
xcode-select --install

# install homebrew (if not yet)
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"

### STEP 1: install ruby and jekyll