View sort-rmdup-bbl.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# NOTE: this is a hack, so it will probably break if you have BBL files that | |
# don't look like the natbib-generated ones I'm used to. It is also pretty | |
# unintelligent about *how* it sorts entries (it defers most of the work | |
# to python), so if you have cases where some of your references are by | |
# the same person or whatever then that might cause the output to not match | |
# your expectations. | |
import sys |
View gfa-to-fasta.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# Converts a GFA assembly graph to a FASTA file of all sequences | |
# within the graph. Notably, this ignores connections between sequences | |
# in the graph. | |
# | |
# Depends on Python 3.6 or later. | |
# | |
# Usage: | |
# $ ./gfa_to_fasta.py mygraph.gfa contigs.fasta |
View handle_duplicate_sample_ids.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
import os | |
from collections import Counter | |
from math import ceil | |
import re | |
from numpy import argmax | |
import pandas as pd | |
from qiime2 import Metadata | |
# "Parameters" of this script |
View find_missing_dates.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
from dateutil.parser import parse | |
import pandas as pd | |
df = pd.read_csv("20191209_metadata.txt", sep="\t", index_col=0) | |
# Subset to a certain host subject ID, if desired | |
df = df[df["host_subject_id"] == "M03"] |
View negative_control_stats.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
""" | |
This is a small script that looks through the annotated taxonomies of all | |
features present in a dataset's negative control samples. It's handy for | |
checking that certain features are (for the most part) absent from these | |
samples. | |
This obviously isn't a very formal way of accounting for contamination, | |
but it is useful for quickly verifying that certain taxa are probably not | |
the product of contamination. (Better approaches include e.g. the decontam |
View convert_timestamp_to_ordinal_date.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
from qiime2 import Metadata | |
from dateutil.parser import parse | |
from dateutil.relativedelta import relativedelta | |
m = Metadata.load("metadata-with-age.tsv") | |
m_df = m.to_dataframe() | |
m_df["ordinal-timestamp"] = 0 |
View convert_timestamp_to_days_elapsed.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
from qiime2 import Metadata | |
from dateutil.parser import parse | |
m = Metadata.load("metadata-with-age-and-ordinal-timestamp.tsv") | |
m_df = m.to_dataframe() | |
# Compute earliest date | |
min_date = None | |
for sample_id in m_df.index: |
View gh_url_to_raw_gh_url.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# your link goes here | |
link = "https://github.com/knightlab-analyses/qurro-mackerel-analysis/blob/master/AnalysisOutput/qurro-plot.qzv" | |
# note: this will break if a repo/organization or subfolder is named "blob" -- would be ideal to use a fancy regex | |
# to be more precise here | |
print(link.replace("github.com", "raw.githubusercontent.com").replace("/blob/", "/")) | |
# example output link: | |
# https://raw.githubusercontent.com/knightlab-analyses/qurro-mackerel-analysis/master/AnalysisOutput/qurro-plot.qzv |
View split_metadata_by_run.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NOTE: Assumes that there's a SAMPLE_METADATA environment variable declared pointing to a metadata file | |
# NOTE: Assumes that this metadata file contains BarcodeSequence and seq_run_ord columns | |
import pandas as pd | |
import os | |
md = pd.read_csv(os.environ["SAMPLE_METADATA"], sep="\t", index_col=0) | |
print("There are {} unique barcode sequences in this metadata file.".format(len(md["BarcodeSequence"].unique()))) | |
runs = tuple(md["seq_run_ord"].unique()) | |
print("Also, the {} runs listed in this metadata file are {}.".format(len(runs), runs)) |
View add_age_column_to_metadata.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
from qiime2 import Metadata | |
from dateutil.parser import parse | |
from dateutil.relativedelta import relativedelta | |
subject_id = "HOST SUBJECT ID" | |
subject_birthday = "HOST BIRTHDAY" | |
subject_birthday_datetime = parse(subject_birthday) | |
age_col_name = "subject_age_years" |
NewerOlder