Skip to content

Instantly share code, notes, and snippets.

View sminot's full-sized avatar

Sam Minot sminot

View GitHub Profile
@sminot
sminot / download_results.py
Last active July 11, 2016 18:23
Download all results
#!/usr/bin/python
"""
One Codex CSV Download Script.
Simple 1 dependency (requests) Python 2/3 script for downloading
One Codex analysis results and saving them to CSVs, as well as read-level results
"""
from __future__ import print_function
import os
import requests
#
# Copyright Reference Genomics, Inc. 2016
# Released under the MIT License
#
# Script for fetching analysis results from the One Codex API (v0)
# See https://docs.onecodex.com for full documentation on the REST API
#
# Script can be run from the command line with:
# `Rscript fetch_ocx_analyses.R $ONE_CODEX_API_KEY $output_filepath`
#
@sminot
sminot / download_result_table.py
Created May 9, 2016 23:07
Download results from One Codex and combine into a single table (python) (note: requires pandas)
#!/usr/bin/python
"""Download results from One Codex and combine into a single table (note: requires pandas)."""
import requests
import os
import pandas as pd
# Expects the API key for One Codex to be stored in the ONE_CODEX_API_KEY environment variable
api_key = os.environ['ONE_CODEX_API_KEY']
@sminot
sminot / fetch_ocx_analyses.R
Last active August 22, 2016 23:55 — forked from boydgreenfield/fetch_ocx_analyses.R
Sample R script for generating a CSV with One Codex analysis results, one column per sample
#!/usr/local/bin/Rscript
# Copyright Reference Genomics, Inc. 2016
# Released under the MIT License
#
# Script for fetching analysis results from the One Codex API (v0)
# See https://docs.onecodex.com for full documentation on the REST API
#
# Script can be run from the command line with:
# `Rscript fetch_ocx_analyses.R <API_KEY> <FILE> [-d DB] [-b BEGINNING_DATE -e ENDING_DATE]`
@sminot
sminot / split_by_header.py
Created July 14, 2017 17:32
Split a FASTQ file by header
#!/usr/bin/python
"""Split up a FASTQ file based on the first field of the header."""
from collections import defaultdict
import gzip
import sys
import os
fp = sys.argv[1]
if not os.path.exists(fp):
@sminot
sminot / compare_proteins_blastp.py
Created September 8, 2017 20:52
Compare protein FASTAs with BLASTP and output XLSX
#!/usr/bin/python
"""Given a set of protein FASTA files, perform pairwise comparison via BLAST, outputting an Excel spreadsheet."""
import os
import sys
import json
import subprocess
import pandas as pd
from collections import defaultdict
from Bio.SeqIO.FastaIO import SimpleFastaParser
@sminot
sminot / ncbi_taxonomy.py
Last active January 7, 2024 09:37
Class for using the NCBI taxonomy, reading from taxdump files
import os
from functools import lru_cache
from collections import defaultdict
# Read in the taxonomy
class NCBITaxonomy():
def __init__(self, folder):
self.tax = defaultdict(dict)
# Read in the file of taxid information
names_fp = os.path.join(folder, 'names.dmp')
@sminot
sminot / test_sparse_dataframe_creation.ipy
Created October 26, 2017 17:52
Profiling sparse DataFrame creation
#!/usr/local/bin/ipython
import pandas as pd
from collections import defaultdict
from random import choice
alph = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
# Function to make some test data
def make_dat(nrows=1000, ncols=1000, nvals=1000):
@sminot
sminot / make_mothur_tax.py
Last active December 8, 2017 23:08
Make a taxonomy file compatible with mothur
#!/usr/bin/python
"""Make a taxonomy file compatible with mothur."""
import os
import sys
import pandas as pd
if len(sys.argv) != 4:
print("Please specify the seq_info.csv, tax_info.csv, and output.tsv files")
@sminot
sminot / read_from_s3.py
Created February 20, 2018 22:23
Read JSON directly from AWS S3
import io
import json
import gzip
import boto3
def read_gzipped_json_file_from_s3(bucket_name, key_name):
s3 = boto3.client('s3')
retr = s3.get_object(Bucket=bucket_name, Key=key_name)
bytestream = io.BytesIO(retr['Body'].read())