This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import pandas as pd | |
def find_clusters(df: pd.DataFrame, protein_list: list) -> pd.DataFrame: | |
""" | |
Given a dataframe and a list of protein IDs, | |
return groups of proteins that belong to the same cluster (per chromosome). | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
import argparse | |
def parse_gff(gff_file): | |
""" | |
Parse GFF file and extract CDS with protein ID, gene locus, mRNA, chrom, strand, positions. | |
""" | |
records = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
import os | |
import sys | |
import argparse | |
from pathlib import Path | |
import subprocess | |
import multiprocessing as mp | |
from tqdm import tqdm |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create new conda env | |
mamba create -n phyloprofile_v1.20 r-base pkg-config pkgconfig fontconfig gsl lxml | |
# activate that env and start an R terminal | |
mamba activate phyloprofile_v1.20 | |
R | |
# install phyloprofile from bioconductor | |
install.packages("BiocManager") | |
BiocManager::install("PhyloProfile") | |
# or install dev version from github | |
install.packages("devtools") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(dplyr) | |
#' Split a multi ortholo group file into single files | |
splitDomainFile <- function(domainFile = NULL, outPath = NULL) { | |
if (is.null(domainFile)) stop("Domain file cannot be NULL") | |
if (is.null(outPath)) stop("Output path cannot be NULL") | |
df <- fread( | |
domainFile, header = TRUE, stringsAsFactors = FALSE, sep = "\t" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ete3 import NCBITaxa | |
ncbi = NCBITaxa() | |
ncbi.update_taxonomy_database() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(PhyloProfile) | |
processNcbiTaxonomy <- function(taxdmpfile = NULL) { | |
if (is.null(taxdmpfile) || !file.exists(taxdmpfile)) { | |
stop("taxdmp.zip file invalid!") | |
} else temp <- taxdmpfile | |
names <- utils::read.table( | |
unz(temp, "names.dmp"), header = FALSE, fill = TRUE, sep = "\t", | |
quote = "", comment.char = "", stringsAsFactors = FALSE |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from Bio import SeqIO | |
import argparse | |
import shutil | |
def combine_fa(fa_1, fa_2, out_file): | |
""" Combine 2 fasta files """ | |
new_fa_dict = SeqIO.to_dict(SeqIO.parse(open(fa_2),'fasta')) | |
existing_seq = SeqIO.to_dict(SeqIO.parse(open(fa_1),'fasta')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(PhyloProfile) | |
setwd('PhyloProfile/data') | |
# load data | |
data(taxonNamesReduced) | |
# modify the dataframe | |
# for example, rename Actinobacteria to Actinomycetota | |
taxonNamesReduced$fullName[ | |
taxonNamesReduced$rank == "phylum" & taxonNamesReduced$ncbiID == 201174 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import timeit | |
def test(st,en): | |
return random.randint(st, en) | |
t = timeit.Timer(lambda: test(10, 100)) | |
print(t.timeit(10)) |
NewerOlder