This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pretty-blastn () { | |
column_names=(qaccver saccver pident length mismatch gapopen qstart qend sstart send slen evalue bitscore) | |
column_names_string=$(IFS=' ' ; echo "${column_names[*]}") | |
header=$(IFS=',' ; echo "${column_names[*]}") | |
echo "${header}" | |
blastn \ | |
-outfmt "10 delim=, ${column_names_string}" \ | |
"${@}" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Rule to count the number of lines in input.interleaved.fastq | |
rule count_lines: | |
input: | |
"input.interleaved.fastq" | |
output: | |
"line_count.txt" | |
shell: | |
"wc -l < {input} > {output}" | |
# Rule to convert paired-end FASTQ files to interleaved format |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from itertools import product | |
from typing import Dict, Tuple | |
import sys | |
def generate_codon_mapping( | |
dna_alphabet="GATC", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import downloads | |
from joblib import Parallel, delayed | |
from itertools import islice | |
from tqdm import tqdm | |
def get_gbk_path(assembly) -> str: | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import taxonomy | |
tax = taxonomy.Taxonomy.from_ncbi("ncbi_taxdump/") | |
FULL_RANKS = [ 'superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'] | |
def get_scalar(d: dict, key: str): | |
if key in d: | |
vals = d[key] | |
if len(vals) == 1: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>MN908947.3 | |
ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCT | |
GTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACT | |
CACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATC | |
TTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT | |
CGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC | |
ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGG | |
AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG | |
CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAA | |
ACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
(None, {"id": "447834", "parent": "447833", "rank": "subspecies"}), | |
(None, {"id": "447833", "parent": "111897", "rank": "species"}), | |
(None, {"id": "111897", "parent": "1664845", "rank": "genus"}), | |
(None, {"id": "1664845", "parent": "42282", "rank": "tribe"}), | |
(None, {"id": "42282", "parent": "33415", "rank": "subfamily"}), | |
(None, {"id": "33415", "parent": "37572", "rank": "family"}), | |
(None, {"id": "37572", "parent": "104431", "rank": "superfamily"}), | |
(None, {"id": "104431", "parent": "37567", "rank": "clade"}), | |
(None, {"id": "37567", "parent": "41197", "rank": "clade"}), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# usage: cat reads.fastq | ./get-barcodes.py | |
from Bio import SeqIO | |
from collections import defaultdict | |
BARCODE_SIZE = 14 | |
counts = defaultdict(lambda: 0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model STRING NULLABLE | |
taxonomy_name STRING NULLABLE | |
description STRING NULLABLE | |
title STRING NULLABLE | |
gsm STRING NULLABLE | |
attributes STRING REPEATED | |
dbgap STRING NULLABLE | |
attribute_recs RECORD REPEATED | |
attribute_recs. unit STRING NULLABLE | |
attribute_recs. display_name STRING NULLABLE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>AY179509.1 Mink astrovirus, complete genome | |
CCGAAGTAGGTGTGTGTGTTGCCGTTATGGCTAACAACACTACCAGCGCTCTTCACCCTCGTGGCTCTGGCCAGCGCTGT | |
GTCTATGACACAGTGCTCCGGTTTGGGGACCCCGATGCACGTCGCAGGGGTTTCCAATTGGACGAGGTGTCACATAATAA | |
GTTGTGTGACATTTTTGACAGCGGCCCGCTCCACTTCGCTTTTGGTGATCTTAAAGTGATGAAGGTGGCGGGTGGTGTGG | |
TCACACCGCATAAAACAGTTGTCAAAACAGTCTATGTCTCAGGTGTTCAAGAGGGTAACGATTATGTCACTTTTGCCTTC | |
ACGCCTGGACCTAACGAGTGGCGCGAAGTTGATCCCCGCATCGACAAGCGCACAGCACTCGTCGGTGTCCTTGTGCAAGA | |
ACATAAAAAATTGGACTCAGACCTTAAGGAGTCGCGCCGTGAGTTGTCCCAGCTCAAGTTGGAGCACTCACTGTTGAGAC | |
ATGACTATGAGCGCTTGGTCCGTGAAAAGCCTGGTCCTGCTATGAGAACTTTTAAATTCTCAGCTGTCATCTTTTATGCG | |
TTTTTCCTTGGTTTCCTGCTTATGTCTGCTGTCAAGGGTGAGGTGTATGGTCGCTGTCTTGACAGCGAGCTTAACCTCAA | |
TGGCAACCCTGAAGTGTGTTTGCATTGGGAAGAGGTTAAATCTTTTAGCCTCCAGGTTGCCCTTGCAGACTTCTGGAACA |
NewerOlder