Mercurial > repos > abims-sbr > orthogroups_tool

Binary file adaptsearch_picture_helps.png has changed
--- a/orthogroups_tool.xml	Fri Jan 19 09:51:12 2018 -0500
+++ b/orthogroups_tool.xml	Tue Feb 27 08:42:05 2018 -0500
@@ -18,7 +18,7 @@
     <![CDATA[
         #set $infiles = ""
         #for $input in $inputs_fasta
-            ln -s '$input' '$input.element_identifier';
+            ln -s '$input' '$input.element_identifier'.fasta;
             #set $infiles = $infiles + $input.element_identifier + ","
         #end for
         #set $infiles = $infiles[:-1]
@@ -26,10 +26,10 @@
         $__tool_directory__/scripts/format_transdecoder_headers.sh '$orthogroups' tmp_orthogroups.txt
         &&
         python $__tool_directory__/scripts/filter_orthofinder.py tmp_orthogroups.txt $nbspec $nbseq
-        #if '$verbosity':
+        #if $verbosity=="T":
             -v
         #end if
-        #if '$paralogs':
+        #if $paralogs=="T":
             -p
         #end if

@@ -42,8 +42,8 @@
         <param name="inputs_fasta" type="data" format="fasta" multiple="true" label="Outputs from Filter Assemblies" help="Contains the nucleic sequences to retrieve."/>
         <param name="nbspec" type="integer" value="0" label="Number of studied species" help="Specifies the TOTAL (no less) number of species of the analysis"/>
         <param name="nbseq" type="integer" value="3" label="Minimal number of sequences per orthogroup" help="Drop orthogroups with less than x sequences :" />
-        <param name="verbosity" type="boolean" label="verbosity" help="Turn on this paramater to display an additional summary table (computed before the pralogous filtering)" />
-        <param name="paralogs" type="boolean" label="paralogs" help="Turn on this parameter to obtain an additional output (orthogroups files before paralogous filtering)" />
+        <param name="verbosity" type="boolean" checked="True" truevalue="T" falsevalue="F" label="verbosity" help="Turn on this paramater to display an additional summary table (computed before the pralogous filtering)" />
+        <param name="paralogs" type="boolean" checked="False" truevalue="T" falsevalue="F" label="paralogs" help="Turn on this parameter to obtain an additional output (orthogroups files before paralogous filtering)" />
     </inputs>

     <outputs>
@@ -85,6 +85,24 @@
                 <element name="orthogroup_7_2_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_7_2_sequences_withParalogs.fasta" />
             </output_collection>
         </test>
+        <test>
+            <param name="orthogroups" value="Orthogroups.txt"/>
+            <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/AsAsp1_trinity.fasta"/>
+            <param name="nbspec" value="4"/>
+            <param name="nbseq" value="2"/>
+            <param name="verbosity" value="True"/>
+            <param name="paralogs" value="False"/>
+            <output name="output" value="2_orthogroups_tool.output" lines_diff="2"/>
+            <output_collection name="orthogroups_fasta" type="list">
+                <element name="orthogroup_1_3_sequences" value="filtered_orthogroups/orthogroup_1_3_sequences.fasta" />
+                <element name="orthogroup_2_4_sequences" value="filtered_orthogroups/orthogroup_2_4_sequences.fasta" />
+                <element name="orthogroup_3_2_sequences" value="filtered_orthogroups/orthogroup_3_2_sequences.fasta" />
+                <element name="orthogroup_4_3_sequences" value="filtered_orthogroups/orthogroup_4_3_sequences.fasta" />
+                <element name="orthogroup_5_3_sequences" value="filtered_orthogroups/orthogroup_5_3_sequences.fasta" />
+                <element name="orthogroup_6_3_sequences" value="filtered_orthogroups/orthogroup_6_3_sequences.fasta" />
+                <element name="orthogroup_7_2_sequences" value="filtered_orthogroups/orthogroup_7_2_sequences.fasta" />
+            </output_collection>
+        </test>
     </tests>

     <help>
--- a/scripts/filter_orthofinder.py	Fri Jan 19 09:51:12 2018 -0500
+++ b/scripts/filter_orthofinder.py	Tue Feb 27 08:42:05 2018 -0500
@@ -21,8 +21,9 @@
         with open(file, "r") as origin:
             for line1,line2 in itertools.izip_longest(*[origin]*2):
                 gene=line1.strip("\r\n ")
-                sequence=line2.strip(" \r\n ")
-                hashTable[gene] = sequence
+                sequence=line2.strip("\r\n ")
+                hashTable[gene] = sequence
+
     return hashTable

 ## PART 2 : Create orthogroups file (one file per orthogroup)
@@ -76,7 +77,7 @@
             name = "orthogroup_{}_{}_sequences.fasta".format(i, length)
         result = open(name, "w")
         with result:
-            for locus in orthogroup:
+            for locus in orthogroup:
                 result.write("{}\n".format(locus)) # write geneID. ">%s\n" before
                 result.write("{}\n".format(hashTable[locus])) # write sequence

@@ -127,12 +128,12 @@
     # STEP 3 - Print summaries ----------------------------------------------------
     if verbose:
         print "  Summary before paralogous filtering : \n"
-        frame1 = asFrame(countings(list_orthogroups_withpara, nbspecs))
-        print frame1
+        df1 = asFrame(countings(list_orthogroups_withpara, nbspecs))
+        print df1.loc[df1.ne(0).any(1),df1.ne(0).any()]
         #print "  Summary before paralogous filtering : \n",countings(list_orthogroups_withpara, nbspecs),"\n"
-    print "  Summary after paralogous filtering : \n"
-    frame2= asFrame(countings(list_orthogroups_format, nbspecs))
-    print frame2
+    print "\n  Summary after paralogous filtering : \n"
+    df2 = asFrame(countings(list_orthogroups_format, nbspecs))
+    print df2.loc[df2.ne(0).any(1),df2.ne(0).any()]

     return len(list_orthogroups_format) #list_orthogroups_no_para

@@ -156,7 +157,7 @@
     hashTable = hashSequences(path)

     # Open txt file with orthogroups
-    print "  Reading Orthogroups.txt and wrting orthogroups to separated files..."
+    print "  Reading Orthogroups.txt and writing orthogroups to separated files..."
     print "    (Dropping orthogroups of less than {} loci.)\n".format(args.minspec)
     list_orthogroups = formatAndFilter(args.files, args.minspec, args.nbspec, hashTable, args.verbose, args.paralogs)
     print "\n{} filtered orthogroups have been written in separated files".format(list_orthogroups)
Binary file static/images/adaptsearch_picture_helps.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2_orthogroups_tool.output	Tue Feb 27 08:42:05 2018 -0500
@@ -0,0 +1,26 @@
+
+-This script works on the 'Orthogroups' file output of Orthofinder to split each orthogroup in a single fasta file.
+-It also gets rid of orthogroups with less sequences than the number specified by the user.
+  Building hashTable IDs/sequences ...
+
+  Reading Orthogroups.txt and writing orthogroups to separated files...
+    (Dropping orthogroups of less than 2 loci.)
+
+  Summary before paralogous filtering :
+
+        2 sps  3 sps  4 sps
+2 seqs      1      0      0
+3 seqs      1      3      0
+5 seqs      0      0      1
+8 seqs      0      1      0
+
+  Summary after paralogous filtering :
+
+        2 sps  3 sps  4 sps
+2 seqs      2      0      0
+3 seqs      0      4      0
+4 seqs      0      0      1
+
+7 filtered orthogroups have been written in separated files
+
+Filtered orthogroups are written in the directory 'filtered_orthogroups'
--- a/test-data/orthogroups_tool.output	Fri Jan 19 09:51:12 2018 -0500
+++ b/test-data/orthogroups_tool.output	Tue Feb 27 08:42:05 2018 -0500
@@ -3,27 +3,23 @@
 -It also gets rid of orthogroups with less sequences than the number specified by the user.
   Building hashTable IDs/sequences ...

-  Reading Orthogroups.txt and wrting orthogroups to separated files...
+  Reading Orthogroups.txt and writing orthogroups to separated files...
     (Dropping orthogroups of less than 2 loci.)

   Summary before paralogous filtering :

-        1 sps  2 sps  3 sps  4 sps
-1 seqs      0      0      0      0
-2 seqs      0      1      0      0
-3 seqs      0      1      3      0
-4 seqs      0      0      0      0
-5 seqs      0      0      0      1
-6 seqs      0      0      0      0
-7 seqs      0      0      0      0
-8 seqs      0      0      1      0
+        2 sps  3 sps  4 sps
+2 seqs      1      0      0
+3 seqs      1      3      0
+5 seqs      0      0      1
+8 seqs      0      1      0
+
   Summary after paralogous filtering :

-        1 sps  2 sps  3 sps  4 sps
-1 seqs      0      0      0      0
-2 seqs      0      2      0      0
-3 seqs      0      0      4      0
-4 seqs      0      0      0      1
+        2 sps  3 sps  4 sps
+2 seqs      2      0      0
+3 seqs      0      4      0
+4 seqs      0      0      1

 7 filtered orthogroups have been written in separated files