Sam Minot sminot

## download_results.py
#!/usr/bin/python
"""
One Codex CSV Download Script.

Simple 1 dependency (requests) Python 2/3 script for downloading
One Codex analysis results and saving them to CSVs, as well as read-level results
"""
from __future__ import print_function
import os
import requests

## fetch_ocx_analyses.R
#
# Copyright Reference Genomics, Inc. 2016
# Released under the MIT License
#
# Script for fetching analysis results from the One Codex API (v0)
# See https://docs.onecodex.com for full documentation on the REST API
#
# Script can be run from the command line with:
# `Rscript fetch_ocx_analyses.R $ONE_CODEX_API_KEY $output_filepath`
#

## download_result_table.py
#!/usr/bin/python
"""Download results from One Codex and combine into a single table (note: requires pandas)."""

import requests
import os
import pandas as pd

# Expects the API key for One Codex to be stored in the ONE_CODEX_API_KEY environment variable
api_key = os.environ['ONE_CODEX_API_KEY']

## fetch_ocx_analyses.R
#!/usr/local/bin/Rscript

# Copyright Reference Genomics, Inc. 2016
# Released under the MIT License
#
# Script for fetching analysis results from the One Codex API (v0)
# See https://docs.onecodex.com for full documentation on the REST API
#
# Script can be run from the command line with:
# `Rscript fetch_ocx_analyses.R <API_KEY> <FILE> [-d DB] [-b BEGINNING_DATE -e ENDING_DATE]`

## split_by_header.py
#!/usr/bin/python
"""Split up a FASTQ file based on the first field of the header."""

from collections import defaultdict
import gzip
import sys
import os

fp = sys.argv[1]
if not os.path.exists(fp):

## compare_proteins_blastp.py
#!/usr/bin/python
"""Given a set of protein FASTA files, perform pairwise comparison via BLAST, outputting an Excel spreadsheet."""

import os
import sys
import json
import subprocess
import pandas as pd
from collections import defaultdict
from Bio.SeqIO.FastaIO import SimpleFastaParser

## ncbi_taxonomy.py
import os
from functools import lru_cache
from collections import defaultdict

# Read in the taxonomy
class NCBITaxonomy():
    def __init__(self, folder):
        self.tax = defaultdict(dict)
        # Read in the file of taxid information
        names_fp = os.path.join(folder, 'names.dmp')

## test_sparse_dataframe_creation.ipy
#!/usr/local/bin/ipython

import pandas as pd
from collections import defaultdict
from random import choice

alph = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

# Function to make some test data
def make_dat(nrows=1000, ncols=1000, nvals=1000):

## make_mothur_tax.py
#!/usr/bin/python
"""Make a taxonomy file compatible with mothur."""

import os
import sys
import pandas as pd

if len(sys.argv) != 4:
    print("Please specify the seq_info.csv, tax_info.csv, and output.tsv files")

## read_from_s3.py
import io
import json
import gzip
import boto3

def read_gzipped_json_file_from_s3(bucket_name, key_name):
    s3 = boto3.client('s3')
    retr = s3.get_object(Bucket=bucket_name, Key=key_name)

    bytestream = io.BytesIO(retr['Body'].read())
	#!/usr/bin/python
	"""
	One Codex CSV Download Script.

	Simple 1 dependency (requests) Python 2/3 script for downloading
	One Codex analysis results and saving them to CSVs, as well as read-level results
	"""
	from __future__ import print_function
	import os
	import requests
	#
	# Copyright Reference Genomics, Inc. 2016
	# Released under the MIT License
	#
	# Script for fetching analysis results from the One Codex API (v0)
	# See https://docs.onecodex.com for full documentation on the REST API
	#
	# Script can be run from the command line with:
	# `Rscript fetch_ocx_analyses.R $ONE_CODEX_API_KEY $output_filepath`
	#
	#!/usr/bin/python
	"""Download results from One Codex and combine into a single table (note: requires pandas)."""

	import requests
	import os
	import pandas as pd

	# Expects the API key for One Codex to be stored in the ONE_CODEX_API_KEY environment variable
	api_key = os.environ['ONE_CODEX_API_KEY']
	#!/usr/local/bin/Rscript

	# Copyright Reference Genomics, Inc. 2016
	# Released under the MIT License
	#
	# Script for fetching analysis results from the One Codex API (v0)
	# See https://docs.onecodex.com for full documentation on the REST API
	#
	# Script can be run from the command line with:
	# `Rscript fetch_ocx_analyses.R <API_KEY> <FILE> [-d DB] [-b BEGINNING_DATE -e ENDING_DATE]`
	#!/usr/bin/python
	"""Split up a FASTQ file based on the first field of the header."""

	from collections import defaultdict
	import gzip
	import sys
	import os

	fp = sys.argv[1]
	if not os.path.exists(fp):
	#!/usr/bin/python
	"""Given a set of protein FASTA files, perform pairwise comparison via BLAST, outputting an Excel spreadsheet."""

	import os
	import sys
	import json
	import subprocess
	import pandas as pd
	from collections import defaultdict
	from Bio.SeqIO.FastaIO import SimpleFastaParser
	import os
	from functools import lru_cache
	from collections import defaultdict

	# Read in the taxonomy
	class NCBITaxonomy():
	def __init__(self, folder):
	self.tax = defaultdict(dict)
	# Read in the file of taxid information
	names_fp = os.path.join(folder, 'names.dmp')
	#!/usr/local/bin/ipython

	import pandas as pd
	from collections import defaultdict
	from random import choice

	alph = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

	# Function to make some test data
	def make_dat(nrows=1000, ncols=1000, nvals=1000):
	#!/usr/bin/python
	"""Make a taxonomy file compatible with mothur."""

	import os
	import sys
	import pandas as pd

	if len(sys.argv) != 4:
	print("Please specify the seq_info.csv, tax_info.csv, and output.tsv files")
	import io
	import json
	import gzip
	import boto3

	def read_gzipped_json_file_from_s3(bucket_name, key_name):
	s3 = boto3.client('s3')
	retr = s3.get_object(Bucket=bucket_name, Key=key_name)

	bytestream = io.BytesIO(retr['Body'].read())