Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python
from sys import argv
with open(argv[1], 'U') as input_f, open(argv[2], 'w') as output_f:
for line in input_f:
# Write comment/header line(s) to output file unmodified
if line.startswith('#'):
output_f.write(line)
continue
@adamrp
adamrp / add_taxonomy_prefixes.py
Created March 5, 2015 19:44
Adds taxonomy level prefixes smilar to Greengenes' to RDP output
#!/usr/bin/env python
from sys import argv
prefixes = ['k__', 'p__', 'c__', 'o__', 'f__', 'g__', 's__']
with open(argv[1], 'U') as input_f, open(argv[2], 'w') as output_f:
for line in input_f:
otu_id, tax, p = line.strip().split('\t')
tax = '; '.join([''.join(x) for x in zip(prefixes, tax.split(';'))])
@adamrp
adamrp / transfer_consent.py
Last active August 29, 2015 14:08
Transfers consent information from old american Gut DB to new structure
@adamrp
adamrp / map_survey.py
Last active August 29, 2015 14:08
Maps old AG surveys to new survey structure
#!/usr/bin/env python
from datetime import datetime
from data_access_connections import data_access_factory
from enums import ServerConfig, DataAccessType
from amgut import db_conn
data_access = data_access_factory(ServerConfig.data_access_type,
@adamrp
adamrp / get_study_configs.py
Last active August 29, 2015 14:08
Port studies from old database to qiita
#!/usr/bin/env python
from datetime import datetime
from os.path import join
import click
from data_access_connections import data_access_factory
from enums import ServerConfig, DataAccessType
from qiita_db.study import StudyPerson
@adamrp
adamrp / generate_db_init.py
Last active August 29, 2015 14:07
File used to reverse engineer python structures into database tables
#!/usr/bin/env python
from amgut.lib.human_survey_supp import (
question_map, responses_map, key_map, question_group, group_order,
supplemental_map, question_type)
from amgut.lib.locale_data.american_gut import _HUMAN_SURVEY
from amgut.lib.data_access.sql_connection import SQLConnectionHandler
conn = SQLConnectionHandler()
@adamrp
adamrp / gc_content.py
Created June 6, 2014 17:33
Calculates the GC content across all sequences in an input fasta file
#!/usr/bin/env python
from sys import argv
from skbio.parse.sequences.fasta import parse_fasta
def calculate_gc_content(input_fasta):
gc = 0
total_length = 0
@adamrp
adamrp / filter_observations_by_sample.py
Last active December 11, 2019 10:32
Given a biom table, iterate over the SampleData. Set observations that represent less than a certain fraction of the sample's total abundance to zero.
#!/usr/bin/env python
from argparse import ArgumentParser
from numpy import array
from biom import load_table, Table
__author__ = "Adam Robbins-Pianka"
__copyright__ = "Copyright 2013"
__credits__ = ["Adam Robbins-Pianka"]