Mercurial > repos > devteam > lca_wrapper
changeset 0:e1dea768b4c1 draft default tip
Imported from capsule None
author | devteam |
---|---|
date | Thu, 23 Jan 2014 12:30:52 -0500 |
parents | |
children | |
files | lca.py lca.xml test-data/lca_input.taxonomy test-data/lca_input2.taxonomy test-data/lca_input3.taxonomy test-data/lca_output.taxonomy test-data/lca_output2.taxonomy test-data/lca_output3.taxonomy tool_dependencies.xml |
diffstat | 9 files changed, 450 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lca.py Thu Jan 23 12:30:52 2014 -0500 @@ -0,0 +1,193 @@ +#!/usr/bin/env python +#Guruprasad Ananda +""" +Least Common Ancestor tool. +""" +import sys, string, re, commands, tempfile, random + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit() + +def main(): + try: + inputfile = sys.argv[1] + outfile = sys.argv[2] + rank_bound = int( sys.argv[3] ) + """ + Mapping of ranks: + root :2, + superkingdom:3, + kingdom :4, + subkingdom :5, + superphylum :6, + phylum :7, + subphylum :8, + superclass :9, + class :10, + subclass :11, + superorder :12, + order :13, + suborder :14, + superfamily :15, + family :16, + subfamily :17, + tribe :18, + subtribe :19, + genus :20, + subgenus :21, + species :22, + subspecies :23, + """ + except: + stop_err("Syntax error: Use correct syntax: program infile outfile") + + fin = open(sys.argv[1],'r') + for j, line in enumerate( fin ): + elems = line.strip().split('\t') + if len(elems) < 24: + stop_err("The format of the input dataset is incorrect. Taxonomy datatype should contain at least 24 columns.") + if j > 30: + break + cols = range(1,len(elems)) + fin.close() + + group_col = 0 + tmpfile = tempfile.NamedTemporaryFile() + + try: + """ + The -k option for the Posix sort command is as follows: + -k, --key=POS1[,POS2] + start a key at POS1, end it at POS2 (origin 1) + In other words, column positions start at 1 rather than 0, so + we need to add 1 to group_col. + if POS2 is not specified, the newer versions of sort will consider the entire line for sorting. To prevent this, we set POS2=POS1. + """ + command_line = "sort -f -k " + str(group_col+1) +"," + str(group_col+1) + " -o " + tmpfile.name + " " + inputfile + except Exception, exc: + stop_err( 'Initialization error -> %s' %str(exc) ) + + error_code, stdout = commands.getstatusoutput(command_line) + + if error_code != 0: + stop_err( "Sorting input dataset resulted in error: %s: %s" %( error_code, stdout )) + + prev_item = "" + prev_vals = [] + remaining_vals = [] + skipped_lines = 0 + fout = open(outfile, "w") + block_valid = False + + + for ii, line in enumerate( file( tmpfile.name )): + if line and not line.startswith( '#' ) and len(line.split('\t')) >= 24: #Taxonomy datatype should have at least 24 columns + line = line.rstrip( '\r\n' ) + try: + fields = line.split("\t") + item = fields[group_col] + if prev_item != "": + # At this level, we're grouping on values (item and prev_item) in group_col + if item == prev_item: + # Keep iterating and storing values until a new value is encountered. + if block_valid: + for i, col in enumerate(cols): + if col >= 3: + prev_vals[i].append(fields[col].strip()) + if len(set(prev_vals[i])) > 1: + block_valid = False + break + + else: + """ + When a new value is encountered, write the previous value and the + corresponding aggregate values into the output file. This works + due to the sort on group_col we've applied to the data above. + """ + out_list = ['']*24 + out_list[0] = str(prev_item) + out_list[1] = str(prev_vals[0][0]) + out_list[2] = str(prev_vals[1][0]) + + for k, col in enumerate(cols): + if col >= 3 and col < 24: + if len(set(prev_vals[k])) == 1: + out_list[col] = prev_vals[k][0] + else: + break + while k < 23: + out_list[k+1] = 'n' + k += 1 + + j = 0 + while True: + try: + out_list.append(str(prev_vals[23+j][0])) + j += 1 + except: + break + + if rank_bound == 0: + print >>fout, '\t'.join(out_list).strip() + else: + if ''.join(out_list[rank_bound:24]) != 'n'*( 24 - rank_bound ): + print >>fout, '\t'.join(out_list).strip() + + block_valid = True + prev_item = item + prev_vals = [] + for col in cols: + val_list = [] + val_list.append(fields[col].strip()) + prev_vals.append(val_list) + + else: + # This only occurs once, right at the start of the iteration. + block_valid = True + prev_item = item #groupby item + for col in cols: #everyting else + val_list = [] + val_list.append(fields[col].strip()) + prev_vals.append(val_list) + + except: + skipped_lines += 1 + else: + skipped_lines += 1 + + # Handle the last grouped value + out_list = ['']*24 + out_list[0] = str(prev_item) + out_list[1] = str(prev_vals[0][0]) + out_list[2] = str(prev_vals[1][0]) + + for k, col in enumerate(cols): + if col >= 3 and col < 24: + if len(set(prev_vals[k])) == 1: + out_list[col] = prev_vals[k][0] + else: + break + while k < 23: + out_list[k+1] = 'n' + k += 1 + + j = 0 + while True: + try: + out_list.append(str(prev_vals[23+j][0])) + j += 1 + except: + break + + if rank_bound == 0: + print >>fout, '\t'.join(out_list).strip() + else: + if ''.join(out_list[rank_bound:24]) != 'n'*( 24 - rank_bound ): + print >>fout, '\t'.join(out_list).strip() + + if skipped_lines > 0: + print "Skipped %d invalid lines." % ( skipped_lines ) + +if __name__ == "__main__": + main() \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lca.xml Thu Jan 23 12:30:52 2014 -0500 @@ -0,0 +1,100 @@ +<tool id="lca1" name="Find lowest diagnostic rank" version="1.0.1"> + <description></description> + <requirements> + <requirement type="package" version="1.0.0">taxonomy</requirement> + </requirements> + <command interpreter="python"> + lca.py $input1 $out_file1 $rank_bound + </command> + <inputs> + <param format="taxonomy" name="input1" type="data" label="for taxonomy dataset"/> + <param name="rank_bound" label="require the lowest rank to be at least" type="select"> + <option value="0">No restriction</option> + <option value="3">Superkingdom</option> + <option value="4">Kingdom</option> + <option value="5">Subkingdom</option> + <option value="6">Superphylum</option> + <option value="7">Phylum</option> + <option value="8">Subphylum</option> + <option value="9">Superclass</option> + <option value="10">Class</option> + <option value="11">Subclass</option> + <option value="12">Superorder</option> + <option value="13">Order</option> + <option value="14">Suborder</option> + <option value="15">Superfamily</option> + <option value="16">Family</option> + <option value="17">Subfamily</option> + <option value="18">Tribe</option> + <option value="19">Subtribe</option> + <option value="20">Genus</option> + <option value="21">Subgenus</option> + <option value="22">Species</option> + <option value="23">Subspecies</option> + </param> + </inputs> + <outputs> + <data format="taxonomy" name="out_file1" metadata_source="input1" /> + </outputs> + <tests> + <test> + <param name="input1" value="lca_input.taxonomy" ftype="taxonomy"/> + <param name="rank_bound" value="0" /> + <output name="out_file1" file="lca_output.taxonomy" ftype="taxonomy"/> + </test> + <test> + <param name="input1" value="lca_input2.taxonomy" ftype="taxonomy"/> + <param name="rank_bound" value="7" /> + <output name="out_file1" file="lca_output2.taxonomy" ftype="taxonomy"/> + </test> + + <!--Test case with invalid lines --> + <test> + <param name="input1" value="lca_input3.taxonomy" ftype="taxonomy"/> + <param name="rank_bound" value="10" /> + <output name="out_file1" file="lca_output3.taxonomy" ftype="taxonomy"/> + </test> + </tests> + + <help> + +**What it does** + +This tool identifies the lowest taxonomic rank for which a mategenomic sequencing read is diagnostic. It takes datasets produced by *Fetch Taxonomic Ranks* tool (aka Taxonomy format) as the input. + +------- + +**Example** + +Suppose you have two reads, **read_1** and **read_2**, with the following taxonomic profiles (scroll sideways to see the entire dataset):: + + read_1 1 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus1 subgenus1 species1 subspecies1 + read_1 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus2 subgenus2 species2 subspecies2 + read_2 3 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum3 subphylum3 superclass3 class3 subclass3 superorder3 order3 suborder3 superfamily3 family3 subfamily3 tribe3 subtribe3 genus3 subgenus3 species3 subspecies3 + read_2 4 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum4 subphylum4 superclass4 class4 subclass4 superorder4 order4 suborder4 superfamily4 family4 subfamily4 tribe4 subtribe4 genus4 subgenus4 species4 subspecies4 + +For **read_1** taxonomic labels are consistent until the genus level, where the taxonomy splits into two branches, one ending with *subspecies1* and the other with *subspecies2*. This implies **that the lowest taxomomic rank read_1 can identify is SUBTRIBE**. Similarly, read_2 is diagnostic up until the **superphylum** level. As a results the output of this tool will be:: + + read_1 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 n n n n + read_2 3 root superkingdom1 kingdom1 subkingdom1 superphylum1 n n n n n n n n n n n n n n n n n + +where, **n** means *EMPTY*. + +-------- + +**What's up with the drop down?** + +Why do we need the *require the lowest rank to be at least* dropdown? Let's look at the above example again. Suppose you need to find only those reads that are diagnostic on at least phylum level. To do this you need to set the *require the lowest rank to be at least* to **phylum**. As a result your output will look like this:: + + read_1 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 n n n n + +.. class:: infomark + +Note, that **read_2** is now omitted as it matches two phyla (**phylum3** and **phylum4**) and therefore is not diagnostic (but rather cosmopolitan) on *phylum* level. + + + + + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lca_input.taxonomy Thu Jan 23 12:30:52 2014 -0500 @@ -0,0 +1,4 @@ +read_1 1 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus1 subgenus1 species1 subspecies1 1 +read_1 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus2 subgenus2 species2 subspecies2 1 +read_2 3 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum3 subphylum3 superclass3 class3 subclass3 superorder3 order3 suborder3 superfamily3 family3 subfamily3 tribe3 subtribe3 genus3 subgenus3 species3 subspecies3 1 +read_2 4 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum4 subphylum4 superclass4 class4 subclass4 superorder4 order4 suborder4 superfamily4 family4 subfamily4 tribe4 subtribe4 genus4 subgenus4 species4 subspecies4 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lca_input2.taxonomy Thu Jan 23 12:30:52 2014 -0500 @@ -0,0 +1,4 @@ +read_1 1 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus1 subgenus1 species1 subspecies1 1 2 3 4 +read_1 2 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 genus2 subgenus2 species2 subspecies2 1 2 3 4 +read_2 3 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum3 subphylum3 superclass3 class3 subclass3 superorder3 order3 suborder3 superfamily3 family3 subfamily3 tribe3 subtribe3 genus3 subgenus3 species3 subspecies3 1 X Y Z +read_2 4 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum4 subphylum4 superclass4 class4 subclass4 superorder4 order4 suborder4 superfamily4 family4 subfamily4 tribe4 subtribe4 genus4 subgenus4 species4 subspecies4 1 X Y Z
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lca_input3.taxonomy Thu Jan 23 12:30:52 2014 -0500 @@ -0,0 +1,100 @@ +IA_1-79371 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604065 +IA_1-84488 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604065 +IA_1-270826 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604070 +IA_1-285361 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604070 +IA_1-93958 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604070 +IA_1-99821 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604070 +IA_1-144417 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604077 +IA_1-278966 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604077 +IA_1-314709 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604077 +IA_1-324951 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604077 +IA_1-27817 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604153 +IA_1-95255 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604181 +IA_1-104173 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-135979 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-139090 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-139090 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-139090 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-144996 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-160446 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-160446 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-160446 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-160446 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-160446 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-160446 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-160446 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-160446 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-161439 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-216231 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-237681 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-250166 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-254274 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-254274 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-27817 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-29000 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-291427 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-291427 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-293054 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-293054 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-296315 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-296315 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus +IA_1-310974 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-310974 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-311282 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-311282 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-322295 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n +IA_1-42600 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-45102 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-45102 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-48105 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-48105 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-57254 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-61975 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-61975 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-66943 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-68288 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-82334 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-95526 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lca_output.taxonomy Thu Jan 23 12:30:52 2014 -0500 @@ -0,0 +1,2 @@ +read_1 1 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 n n n n 1 +read_2 3 root superkingdom1 kingdom1 subkingdom1 superphylum1 n n n n n n n n n n n n n n n n n 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lca_output2.taxonomy Thu Jan 23 12:30:52 2014 -0500 @@ -0,0 +1,1 @@ +read_1 1 root superkingdom1 kingdom1 subkingdom1 superphylum1 phylum1 subphylum1 superclass1 class1 subclass1 superorder1 order1 suborder1 superfamily1 family1 subfamily1 tribe1 subtribe1 n n n n 1 2 3 4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lca_output3.taxonomy Thu Jan 23 12:30:52 2014 -0500 @@ -0,0 +1,40 @@ +IA_1-104173 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-135979 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-139090 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-144417 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604077 +IA_1-144996 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-160446 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-161439 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-190855 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-205154 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-216231 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-236286 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-237681 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-250166 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-254274 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-270826 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604070 +IA_1-27817 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604153 +IA_1-278966 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604077 +IA_1-285361 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604070 +IA_1-29000 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-291427 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-293054 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-296315 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-310974 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-311282 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-314709 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604077 +IA_1-324951 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604077 +IA_1-42600 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-45102 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-48105 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-57254 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-61975 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-66943 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-68288 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-79371 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604065 +IA_1-82334 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-84488 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604065 +IA_1-93958 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604070 +IA_1-95255 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604181 +IA_1-95526 10116 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n Euarchontoglires Rodentia Sciurognathi n Muridae Murinae n n Rattus n Rattus norvegicus n 281604186 +IA_1-99821 591020 root Bacteria n n n Proteobacteria n n Gammaproteobacteria n n Enterobacteriales n n Enterobacteriaceae n n n Shigella n Shigella flexneri n 281604070
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jan 23 12:30:52 2014 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="taxonomy" version="1.0.0"> + <repository changeset_revision="00dc297ecd07" name="package_taxonomy_1_0_0" owner="devteam" prior_installation_required="False" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>