Mercurial > repos > abims-sbr > orthogroups_tool

--- a/orthogroups_tool.xml	Wed Feb 28 06:04:35 2018 -0500
+++ b/orthogroups_tool.xml	Fri Mar 16 05:08:57 2018 -0400
@@ -18,14 +18,14 @@
     <![CDATA[
         #set $infiles = ""
         #for $input in $inputs_fasta
-            ln -s '$input' '$input.element_identifier'.fasta;
+            ln -s '$input' '$input.element_identifier';
             #set $infiles = $infiles + $input.element_identifier + ","
         #end for
         #set $infiles = $infiles[:-1]

         $__tool_directory__/scripts/format_transdecoder_headers.sh '$orthogroups' tmp_orthogroups.txt
         &&
-        python $__tool_directory__/scripts/filter_orthofinder.py tmp_orthogroups.txt $nbspec $nbseq
+        python $__tool_directory__/scripts/filter_orthofinder.py tmp_orthogroups.txt '$infiles' $nbseq
         #if $verbosity=="T":
             -v
         #end if
@@ -40,7 +40,6 @@
     <inputs>
         <param name="orthogroups" type="data" format="txt" label="Select a file of Orthogroups" help="The output from OrthoFinder, 'Orthogroups.txt'."/>
         <param name="inputs_fasta" type="data" format="fasta" multiple="true" label="Outputs from Filter Assemblies" help="Contains the nucleic sequences to retrieve."/>
-        <param name="nbspec" type="integer" value="0" label="Number of studied species" help="Specifies the TOTAL (no less) number of species of the analysis"/>
         <param name="nbseq" type="integer" value="3" label="Minimal number of sequences per orthogroup" help="Drop orthogroups with less than x sequences :" />
         <param name="verbosity" type="boolean" checked="True" truevalue="T" falsevalue="F" label="verbosity" help="Turn on this paramater to display an additional summary table (computed before the pralogous filtering)" />
         <param name="paralogs" type="boolean" checked="False" truevalue="T" falsevalue="F" label="paralogs" help="Turn on this parameter to obtain an additional output (orthogroups files before paralogous filtering)" />
@@ -61,7 +60,6 @@
         <test>
             <param name="orthogroups" value="Orthogroups.txt"/>
             <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/AsAsp1_trinity.fasta"/>
-            <param name="nbspec" value="4"/>
             <param name="nbseq" value="2"/>
             <param name="verbosity" value="True"/>
             <param name="paralogs" value="True"/>
@@ -88,7 +86,6 @@
         <test>
             <param name="orthogroups" value="Orthogroups.txt"/>
             <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/AsAsp1_trinity.fasta"/>
-            <param name="nbspec" value="4"/>
             <param name="nbseq" value="2"/>
             <param name="verbosity" value="True"/>
             <param name="paralogs" value="False"/>
@@ -141,7 +138,6 @@

 - File of Orthogroups : the orthogroups.txt file from OrthoFinder.
 - Output from Filter_Assemblies (either as multiple datasets or dataset collection)
-- Total number of studied species on which the orthogroups have been built.
 - Minimal number of sequences : The orthogroups with less than the specified number won't be recorded.
 - Verbose : If 'Yes', a supplementary table will be displayed in the outputs (coutings of species and sequences in orthogroups before the removal of paralogs).
 - Paralogs : if 'Yes', there will be a supplementary output of orthogroups file, before the removal of paralogs.
--- a/scripts/filter_orthofinder.py	Wed Feb 28 06:04:35 2018 -0500
+++ b/scripts/filter_orthofinder.py	Fri Mar 16 05:08:57 2018 -0400
@@ -142,7 +142,7 @@
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("files", help="Orthogroups.txt file from OrthoFinder")
-    parser.add_argument("nbspec", type=int, help="Number of studied species")
+    parser.add_argument("infiles", help="fasta files used for OrthoFinder (after or before TransDecoder), separated by commas")
     parser.add_argument("minspec", type=int, help="Minimal number of species to keep per group")
     parser.add_argument("-v", "--verbose", action="store_true", help="Add another summary table : countings before paralogous genes filtering")
     parser.add_argument("-p", "--paralogs", action="store_true", help="Proceeds to write orthogroups also before paralogous filtering")
@@ -153,13 +153,15 @@

     # Build hashtable
     print "  Building hashTable IDs/sequences ...\n"
-    path = glob.glob('*.fasta')
-    hashTable = hashSequences(path)
+    infiles_good = str.split(args.infiles, ",")
+    nbspec = len(infiles_good)
+    #path = glob.glob('*.fasta')
+    hashTable = hashSequences(infiles_good)

     # Open txt file with orthogroups
     print "  Reading Orthogroups.txt and writing orthogroups to separated files..."
     print "    (Dropping orthogroups of less than {} loci.)\n".format(args.minspec)
-    list_orthogroups = formatAndFilter(args.files, args.minspec, args.nbspec, hashTable, args.verbose, args.paralogs)
+    list_orthogroups = formatAndFilter(args.files, args.minspec, nbspec, hashTable, args.verbose, args.paralogs)
     print "\n{} filtered orthogroups have been written in separated files".format(list_orthogroups)

     # Move output files in a new directory