Last active
April 12, 2017 16:00
-
-
Save ctb/3ac8e48f03401d4af849cc5408e7b159 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
""" | |
Do a multi-k Jaccard resemblance analysis between many genome | |
signatures and a single metagenome. | |
Usage: | |
cmp-multik-multisig.py metagenome.sig query-1.sig [ query-2.sig ... ] | |
Construct signatures with: | |
sourmash compute -k 21,31,41,51 --dna --scaled 10000 *.fa | |
""" | |
import sys | |
sys.path.insert(0, '/Users/t/dev/sourmash') | |
import sourmash_lib | |
from sourmash_lib.signature import load_signatures | |
from collections import defaultdict | |
import csv | |
KSIZES=[21,31,41,51] | |
def main(): | |
import argparse | |
p = argparse.ArgumentParser() | |
p.add_argument('against_sig') | |
p.add_argument('query_sigs', nargs='+') | |
args = p.parse_args() | |
minhashdict = defaultdict(dict) | |
signame_to_file = {} | |
for sigfile in args.query_sigs: | |
sigs = load_signatures(sigfile) | |
for sig in sigs: | |
ksize = sig.minhash.ksize | |
minhashdict[sig.name()][ksize] = sig.minhash | |
signame_to_file[sig.name()] = sigfile | |
against_minhashes = {} | |
against_name = None | |
for against_sig in load_signatures(args.against_sig): | |
ksize = against_sig.minhash.ksize | |
against_minhashes[ksize] = against_sig.minhash | |
against_name = against_sig.name() | |
csvout = csv.writer(sys.stdout) | |
for signame in minhashdict: | |
results = [] | |
for K in KSIZES: | |
against = against_minhashes[K] | |
query = minhashdict[signame][K] | |
match = query.jaccard(against) | |
results.append(match) | |
sigfile = signame_to_file[signame] | |
results.insert(0, sigfile) | |
results.insert(0, signame) | |
results.insert(0, against_name) | |
csvout.writerow(results) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
podar-reads | Salinispora_tropica_CNB-440 | g47.fa.sig | 0.9858585858585859 | 0.9758064516129032 | 0.9602649006622517 | 0.9513184584178499 | |
---|---|---|---|---|---|---|---|
podar-reads | Sulfitobacter_NAS-14.1_scf_1099451320477_ | g61.fa.sig | 1.0 | 0.997093023255814 | 1.0 | 0.9943502824858758 | |
podar-reads | Zymomonas_mobilis_subsp._mobilis_ZM4 | g59.fa.sig | 0.9735449735449735 | 0.9639175257731959 | 0.9581151832460733 | 0.9540816326530612 | |
podar-reads | Enterococcus_faecalis_V583 | g20.fa.sig | 0.6798679867986799 | 0.5945945945945946 | 0.5946843853820598 | 0.5578231292517006 | |
podar-reads | Clostridium_thermocellum_ATCC_27405 | g16.fa.sig | 1.0 | 1.0 | 1.0 | 0.9973404255319149 | |
podar-reads | Bacteroides_thetaiotaomicron_VPI-5482 | g5.fa.sig | 1.0 | 0.9983739837398374 | 1.0 | 0.9966887417218543 | |
podar-reads | Persephonella_marina_EX-H1 | g55.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Geobacter_sulfurreducens_PCA | g23.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Nitrosomonas_europaea_ATCC_19718 | g35.fa.sig | 1.0 | 0.9959349593495935 | 0.9922480620155039 | 1.0 | |
podar-reads | Pyrococcus_furiosus_DSM_3638 | g42.fa.sig | 0.9947916666666666 | 1.0 | 0.9893048128342246 | 1.0 | |
podar-reads | Shewanella_baltica_OS223, complete genome | g64.fa.sig | 0.949685534591195 | 0.9352380952380952 | 0.8930693069306931 | 0.8210332103321033 | |
podar-reads | Chloroflexus_aurantiacus_J-10-fl | g15.fa.sig | 1.0 | 1.0 | 0.9979166666666667 | 1.0 | |
podar-reads | Haloferax_volcanii_DS2 | g24.fa.sig | 0.9824561403508771 | 0.9637681159420289 | 0.9636363636363636 | 0.9640287769784173 | |
podar-reads | Pyrobaculum_aerophilum_str._IM2 | g39.fa.sig | 1.0 | 1.0 | 1.0 | 0.9919354838709677 | |
podar-reads | Sulfitobacter_sp._EE-36_scf_1099451318008_ | g60.fa.sig | 0.9967213114754099 | 1.0 | 1.0 | 0.9967105263157895 | |
podar-reads | Archaeoglobus_fulgidus_DSM_4304 | g4.fa.sig | 1.0 | 1.0 | 1.0 | 0.9954954954954955 | |
podar-reads | Treponema_denticola_ATCC_35405 | g57.fa.sig | 1.0 | 1.0 | 1.0 | 0.9965397923875432 | |
podar-reads | Burkholderia1_xenovorans_LB400_chromosome_1,_complete_sequence | g8.fa.sig | 0.916135881104034 | 0.8467391304347827 | 0.7883995703544576 | 0.7740585774058577 | |
podar-reads | Methanococcus_maripaludis_strain_S2,_complete_sequence | g31.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Salinispora_arenicola_CNS-205 | g46.fa.sig | 0.9761904761904762 | 0.965034965034965 | 0.9526515151515151 | 0.9547038327526133 | |
podar-reads | Thermus_thermophilus_HB27 | g56.fa.sig | 0.7965116279069767 | 0.772972972972973 | 0.7128205128205128 | 0.6502242152466368 | |
podar-reads | Chlorobiumtepidum_TLS | g14.fa.sig | 1.0 | 1.0 | 1.0 | 0.9949748743718593 | |
podar-reads | Methanopyrus_kandleri_AV19 | g32.fa.sig | 1.0 | 1.0 | 0.9940119760479041 | 1.0 | |
podar-reads | Sulfurihydrogenibium_yellowstonense_SS-5 | g49.fa.sig | 0.9872611464968153 | 0.9726775956284153 | 0.9647887323943662 | 0.96 | |
podar-reads | Acidobacterium_capsulatum_ATCC_51196 | g1.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Methanocaldococcus_jannaschii_DSM_2661 | g29.fa.sig | 1.0 | 1.0 | 0.9941176470588236 | 1.0 | |
podar-reads | Thermotoga_sp._RQ2 | g54.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Pyrobaculum_calidifontis_JCM_11548 | g41.fa.sig | 1.0 | 0.9947368421052631 | 1.0 | 1.0 | |
podar-reads | Sulfolobus_tokodaii str. 7 DNA, complete genome | g63.fa.sig | 1.0 | 1.0 | 0.9963898916967509 | 1.0 | |
podar-reads | Bacteroides_vulgatus_ATCC_8482 | g6.fa.sig | 0.9978021978021978 | 1.0 | 1.0 | 0.997872340425532 | |
podar-reads | Nostoc_sp._PCC_7120_DNA | g36.fa.sig | 0.998371335504886 | 1.0 | 0.9983792544570502 | 1.0 | |
podar-reads | Wolinella_succinogenes_DSM_1740 | g58.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Hydrogenobaculum_sp._Y04AAS1 | g26.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Chlorobiumlimicola_DSM_245 | g11.fa.sig | 0.9961832061068703 | 1.0 | 0.9960629921259843 | 0.9965870307167235 | |
podar-reads | Dictyoglomus_turgidum_DSM_6724 | g19.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Chlorobiumphaeovibrioides_DSM_265 | g13.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Fusobacterium_nucleatum_subsp._nucleatum_ATCC_25586 | g21.fa.sig | 0.17757009345794392 | 0.10476190476190476 | 0.08196721311475409 | 0.04700854700854701 | |
podar-reads | Caldibescii_DSM_6725 | g10.fa.sig | 1.0 | 0.9966216216216216 | 0.9964664310954063 | 1.0 | |
podar-reads | Desulfovibrio_vulgaris_DP4 | g18.fa.sig | 0.7911764705882353 | 0.7566765578635015 | 0.7238372093023255 | 0.6676136363636364 | |
podar-reads | Caldisaccharolyticus_DSM_8903 | g9.fa.sig | 0.9963369963369964 | 1.0 | 0.9966442953020134 | 0.9963503649635036 | |
podar-reads | SulfuriYO3AOP1 | g50.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Methanosarcina_acetivorans_str._C2A | g33.fa.sig | 0.9962264150943396 | 0.996219281663516 | 0.9927007299270073 | 0.9907407407407407 | |
podar-reads | Leptothrix_cholodnii_SP-6 | g28.fa.sig | 0.972972972972973 | 0.9681908548707754 | 0.9632034632034632 | 0.9515789473684211 | |
podar-reads | Akkermansia_muciniphila_ATCC_BAA-835 | g3.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Thermotoga_petrophila_RKU-1 | g53.fa.sig | 0.9947368421052631 | 0.9945945945945946 | 0.9751552795031055 | 0.9788359788359788 | |
podar-reads | Deinococcus_radiodurans_R1_chromosome_1,_complete_sequence | g17.fa.sig | 0.9688473520249221 | 0.9516129032258065 | 0.9519230769230769 | 0.9146757679180887 | |
podar-reads | Ignicoccus_hospitalis_KIN4/I | g27.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Bordetella_bronchiseptica_strain_RB50 | g7.fa.sig | 0.9274509803921569 | 0.9032846715328468 | 0.8853974121996303 | 0.8665413533834586 | |
podar-reads | Gemmatimonas_aurantiaca_T-27_DNA | g22.fa.sig | 1.0 | 1.0 | 1.0 | 0.997737556561086 | |
podar-reads | Rhodopirellula_baltica_SH_1_complete_genome | g44.fa.sig | 1.0 | 1.0 | 0.9985401459854014 | 1.0 | |
podar-reads | Chlorobiumphaeobacteroides_DSM_266 | g12.fa.sig | 0.9963369963369964 | 1.0 | 1.0 | 0.9966101694915255 | |
podar-reads | Shewanella_baltica_OS185 | g48.fa.sig | 0.9869848156182213 | 0.9783889980353635 | 0.9577981651376147 | 0.9314814814814815 | |
podar-reads | Pelodictyon_phaeoclathratiforme_BU-1 | g37.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Porphyromonas_gingivalis_ATCC_33277_DNA | g38.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Pyrobaculum_arsenaticum_DSM_13514 | g40.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Pyrococcus_horikoshii_OT3_DNA | g43.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Ruegeria_pomeroyi_DSS-3 | g45.fa.sig | 0.9628712871287128 | 0.9327146171693735 | 0.8989071038251366 | 0.8634020618556701 | |
podar-reads | Herpetosiphon_aurantiacus_ATCC_23779 | g25.fa.sig | 0.996875 | 0.9879725085910653 | 0.9854014598540146 | 0.9938556067588326 | |
podar-reads | Desulfovibrio_piger_ATCC_29098 | g62.fa.sig | 1.0 | 0.9962962962962963 | 0.9957983193277311 | 0.9924528301886792 | |
podar-reads | Nanoarchaeum_equitans_Kin4-M | g34.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Aciduliprofundum_boonei_T469 | g2.fa.sig | 1.0 | 1.0 | 0.9939393939393939 | 1.0 | |
podar-reads | Methanococcus_maripaludis_C5 | g30.fa.sig | 1.0 | 1.0 | 1.0 | 1.0 | |
podar-reads | Thermoanaerobacter_pseudethanolicus_ATCC_33223 | g51.fa.sig | 1.0 | 1.0 | 0.9956331877729258 | 1.0 | |
podar-reads | Thermotoga_neapolitana_DSM_4359 | g52.fa.sig | 1.0 | 1.0 | 0.9951923076923077 | 0.9884393063583815 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment