Mercurial > repos > abims-sbr > orthogroups_tool
changeset 4:427dd144d915 draft
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 17acd02c547bd1f7661a846661aa99de9743efe9
| author | abims-sbr |
|---|---|
| date | Tue, 27 Feb 2018 08:42:05 -0500 |
| parents | 82cc91f8cacb |
| children | 6f00f3801441 |
| files | adaptsearch_picture_helps.png orthogroups_tool.xml scripts/filter_orthofinder.py static/images/adaptsearch_picture_helps.png test-data/2_orthogroups_tool.output test-data/orthogroups_tool.output |
| diffstat | 6 files changed, 70 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/orthogroups_tool.xml Fri Jan 19 09:51:12 2018 -0500 +++ b/orthogroups_tool.xml Tue Feb 27 08:42:05 2018 -0500 @@ -18,7 +18,7 @@ <![CDATA[ #set $infiles = "" #for $input in $inputs_fasta - ln -s '$input' '$input.element_identifier'; + ln -s '$input' '$input.element_identifier'.fasta; #set $infiles = $infiles + $input.element_identifier + "," #end for #set $infiles = $infiles[:-1] @@ -26,10 +26,10 @@ $__tool_directory__/scripts/format_transdecoder_headers.sh '$orthogroups' tmp_orthogroups.txt && python $__tool_directory__/scripts/filter_orthofinder.py tmp_orthogroups.txt $nbspec $nbseq - #if '$verbosity': + #if $verbosity=="T": -v #end if - #if '$paralogs': + #if $paralogs=="T": -p #end if @@ -42,8 +42,8 @@ <param name="inputs_fasta" type="data" format="fasta" multiple="true" label="Outputs from Filter Assemblies" help="Contains the nucleic sequences to retrieve."/> <param name="nbspec" type="integer" value="0" label="Number of studied species" help="Specifies the TOTAL (no less) number of species of the analysis"/> <param name="nbseq" type="integer" value="3" label="Minimal number of sequences per orthogroup" help="Drop orthogroups with less than x sequences :" /> - <param name="verbosity" type="boolean" label="verbosity" help="Turn on this paramater to display an additional summary table (computed before the pralogous filtering)" /> - <param name="paralogs" type="boolean" label="paralogs" help="Turn on this parameter to obtain an additional output (orthogroups files before paralogous filtering)" /> + <param name="verbosity" type="boolean" checked="True" truevalue="T" falsevalue="F" label="verbosity" help="Turn on this paramater to display an additional summary table (computed before the pralogous filtering)" /> + <param name="paralogs" type="boolean" checked="False" truevalue="T" falsevalue="F" label="paralogs" help="Turn on this parameter to obtain an additional output (orthogroups files before paralogous filtering)" /> </inputs> <outputs> @@ -85,6 +85,24 @@ <element name="orthogroup_7_2_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_7_2_sequences_withParalogs.fasta" /> </output_collection> </test> + <test> + <param name="orthogroups" value="Orthogroups.txt"/> + <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/AsAsp1_trinity.fasta"/> + <param name="nbspec" value="4"/> + <param name="nbseq" value="2"/> + <param name="verbosity" value="True"/> + <param name="paralogs" value="False"/> + <output name="output" value="2_orthogroups_tool.output" lines_diff="2"/> + <output_collection name="orthogroups_fasta" type="list"> + <element name="orthogroup_1_3_sequences" value="filtered_orthogroups/orthogroup_1_3_sequences.fasta" /> + <element name="orthogroup_2_4_sequences" value="filtered_orthogroups/orthogroup_2_4_sequences.fasta" /> + <element name="orthogroup_3_2_sequences" value="filtered_orthogroups/orthogroup_3_2_sequences.fasta" /> + <element name="orthogroup_4_3_sequences" value="filtered_orthogroups/orthogroup_4_3_sequences.fasta" /> + <element name="orthogroup_5_3_sequences" value="filtered_orthogroups/orthogroup_5_3_sequences.fasta" /> + <element name="orthogroup_6_3_sequences" value="filtered_orthogroups/orthogroup_6_3_sequences.fasta" /> + <element name="orthogroup_7_2_sequences" value="filtered_orthogroups/orthogroup_7_2_sequences.fasta" /> + </output_collection> + </test> </tests> <help>
--- a/scripts/filter_orthofinder.py Fri Jan 19 09:51:12 2018 -0500 +++ b/scripts/filter_orthofinder.py Tue Feb 27 08:42:05 2018 -0500 @@ -21,8 +21,9 @@ with open(file, "r") as origin: for line1,line2 in itertools.izip_longest(*[origin]*2): gene=line1.strip("\r\n ") - sequence=line2.strip(" \r\n ") - hashTable[gene] = sequence + sequence=line2.strip("\r\n ") + hashTable[gene] = sequence + return hashTable ## PART 2 : Create orthogroups file (one file per orthogroup) @@ -76,7 +77,7 @@ name = "orthogroup_{}_{}_sequences.fasta".format(i, length) result = open(name, "w") with result: - for locus in orthogroup: + for locus in orthogroup: result.write("{}\n".format(locus)) # write geneID. ">%s\n" before result.write("{}\n".format(hashTable[locus])) # write sequence @@ -127,12 +128,12 @@ # STEP 3 - Print summaries ---------------------------------------------------- if verbose: print " Summary before paralogous filtering : \n" - frame1 = asFrame(countings(list_orthogroups_withpara, nbspecs)) - print frame1 + df1 = asFrame(countings(list_orthogroups_withpara, nbspecs)) + print df1.loc[df1.ne(0).any(1),df1.ne(0).any()] #print " Summary before paralogous filtering : \n",countings(list_orthogroups_withpara, nbspecs),"\n" - print " Summary after paralogous filtering : \n" - frame2= asFrame(countings(list_orthogroups_format, nbspecs)) - print frame2 + print "\n Summary after paralogous filtering : \n" + df2 = asFrame(countings(list_orthogroups_format, nbspecs)) + print df2.loc[df2.ne(0).any(1),df2.ne(0).any()] return len(list_orthogroups_format) #list_orthogroups_no_para @@ -156,7 +157,7 @@ hashTable = hashSequences(path) # Open txt file with orthogroups - print " Reading Orthogroups.txt and wrting orthogroups to separated files..." + print " Reading Orthogroups.txt and writing orthogroups to separated files..." print " (Dropping orthogroups of less than {} loci.)\n".format(args.minspec) list_orthogroups = formatAndFilter(args.files, args.minspec, args.nbspec, hashTable, args.verbose, args.paralogs) print "\n{} filtered orthogroups have been written in separated files".format(list_orthogroups)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2_orthogroups_tool.output Tue Feb 27 08:42:05 2018 -0500 @@ -0,0 +1,26 @@ + +-This script works on the 'Orthogroups' file output of Orthofinder to split each orthogroup in a single fasta file. +-It also gets rid of orthogroups with less sequences than the number specified by the user. + Building hashTable IDs/sequences ... + + Reading Orthogroups.txt and writing orthogroups to separated files... + (Dropping orthogroups of less than 2 loci.) + + Summary before paralogous filtering : + + 2 sps 3 sps 4 sps +2 seqs 1 0 0 +3 seqs 1 3 0 +5 seqs 0 0 1 +8 seqs 0 1 0 + + Summary after paralogous filtering : + + 2 sps 3 sps 4 sps +2 seqs 2 0 0 +3 seqs 0 4 0 +4 seqs 0 0 1 + +7 filtered orthogroups have been written in separated files + +Filtered orthogroups are written in the directory 'filtered_orthogroups'
--- a/test-data/orthogroups_tool.output Fri Jan 19 09:51:12 2018 -0500 +++ b/test-data/orthogroups_tool.output Tue Feb 27 08:42:05 2018 -0500 @@ -3,27 +3,23 @@ -It also gets rid of orthogroups with less sequences than the number specified by the user. Building hashTable IDs/sequences ... - Reading Orthogroups.txt and wrting orthogroups to separated files... + Reading Orthogroups.txt and writing orthogroups to separated files... (Dropping orthogroups of less than 2 loci.) Summary before paralogous filtering : - 1 sps 2 sps 3 sps 4 sps -1 seqs 0 0 0 0 -2 seqs 0 1 0 0 -3 seqs 0 1 3 0 -4 seqs 0 0 0 0 -5 seqs 0 0 0 1 -6 seqs 0 0 0 0 -7 seqs 0 0 0 0 -8 seqs 0 0 1 0 + 2 sps 3 sps 4 sps +2 seqs 1 0 0 +3 seqs 1 3 0 +5 seqs 0 0 1 +8 seqs 0 1 0 + Summary after paralogous filtering : - 1 sps 2 sps 3 sps 4 sps -1 seqs 0 0 0 0 -2 seqs 0 2 0 0 -3 seqs 0 0 4 0 -4 seqs 0 0 0 1 + 2 sps 3 sps 4 sps +2 seqs 2 0 0 +3 seqs 0 4 0 +4 seqs 0 0 1 7 filtered orthogroups have been written in separated files
