diff POGs.xml @ 6:b19ed7395dcc draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author abims-sbr
date Wed, 17 Jan 2018 08:54:30 -0500
parents dad7053ba20e
children 04422117fcd7
line wrap: on
line diff
--- a/POGs.xml	Wed Sep 27 10:02:20 2017 -0400
+++ b/POGs.xml	Wed Jan 17 08:54:30 2018 -0500
@@ -10,17 +10,12 @@
 
 	<requirements>
 		<expand macro="python_required" />
+        <requirement type="package" version="0.20.0">pandas</requirement>
+        <requirement type="package" version="1.12.0">numpy</requirement>
 	</requirements>
 
   	<command>
 	<![CDATA[
-        #set $infiles_from_filter_assembly = ""
-        #for $input_from_filter_assembly in $inputs_from_filter_assembly
-            ln -s '$input_from_filter_assembly' '$input_from_filter_assembly.element_identifier';
-            #set $infiles_from_filter_assembly = $infiles_from_filter_assembly + $input_from_filter_assembly.element_identifier + ","
-        #end for
-        #set $infiles_from_filter_assembly = $infiles_from_filter_assembly[:-1]
-
         #set $infiles_from_paiwise_prot = ""
         #for $input_from_paiwise_prot in $inputs_from_paiwise_prot
             ln -s '$input_from_paiwise_prot' '$input_from_paiwise_prot.element_identifier';
@@ -28,165 +23,127 @@
         #end for
         #set $infiles_from_paiwise_prot = $infiles_from_paiwise_prot[:-1]
 
-		python '$__tool_directory__/scripts/S01_get_locus_orthologs_part1.py' '$infiles_from_paiwise_prot'
-		> ${log} &&
+		python '$__tool_directory__/scripts/pogs.py' '$infiles_from_paiwise_prot' '$minspec' 
+        
+        #if '$verbose':
+            -v
+        #end if
+        #if '$paralogs':
+            -p
+        #end if
 
-		python '$__tool_directory__/scripts/S02_get_locus_orthologs_part2.py' '$infiles_from_filter_assembly' $minseq $paralogs
-		>> ${log};
+        > ${log}
+
 	]]>
   	</command>
 
  	<inputs>
-        <param name="inputs_from_filter_assembly" type="data" format="fasta" multiple="true" label="Input files from Filter assemblies" />
         <param name="inputs_from_paiwise_prot" type="data" format="fasta" multiple="true" label="Input files from Pairwise Prot" />
-		<param name="minseq" type="integer" value="3" label="Drop orthogroups with less than n species" />
-		<param name="paralogs" type="select" label="Paralogs savage removal" help="Yes : orthogroups with paralogs will be fully removed. No : paralogs sequences will be (naively) filtered to keep only one sequence." >
-			<option value="yes">Yes</option>
-			<option value="no">No</option>
-		</param>
+		<param name="minspec" type="integer" value="3" label="Drop orthogroups with less than n species" />	
+        <param name="verbose" type="boolean" checked="True" truevalue="T" falsevalue="F" label="Verbose" help="Display a summary table of orthogroups before paralogs filtering"/>
+        <param name="paralogs" type="boolean" checked="False" truevalue="T" falsevalue="F" label="Paralogs" help="Return also orthogroups without paralogs filtering"/>
 	</inputs>
 
 	<outputs>
-		<data format="txt" name="log" label="POGs" />
-        <collection name="output" type="list" label="POGs locus orthologs unaligned">
+		<data format="txt" name="log" label="pogs.output" />
+        <collection name="output" type="list" label="POGs_unaligned">
             <discover_datasets pattern="__name_and_ext__" directory="outputs" />
         </collection>
+        <collection name="outputPara" type="list" label="POGs_withParalogs_unaligned">
+            <discover_datasets pattern="__name_and_ext__" directory="outputs_withParalogs" />
+            <filter>paralogs == True</filter>
+        </collection>
 	</outputs>
 
 	<tests>
 		<test>
-            <param name="inputs_from_filter_assembly" ftype="fasta" value="inputs_from_filter_assembly/PfPfiji_Trinity.fasta,inputs_from_filter_assembly/ApApomp_Trinity.fasta,inputs_from_filter_assembly/AmAmphi_Trinity.fasta,inputs_from_filter_assembly/AcAcaud_Trinity.fasta" />
-            <param name="inputs_from_paiwise_prot" ftype="fasta" value="inputs_from_paiwise_prot/ReciprocalBestHits_AmAmphi_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_ApApomp.fasta" />
-			<param name="minseq" value="2" />
-			<param name="paralogs" value="no" />
+            <param name="inputs_from_paiwise_prot" ftype="fasta" value="output_pairwise/RBH_AmAmphi_AcAcaud.fasta,output_pairwise/RBH_ApApomp_AcAcaud.fasta,output_pairwise/RBH_ApApomp_AmAmphi.fasta,output_pairwise/RBH_TeTerlap_AcAcaud.fasta,output_pairwise/RBH_TeTerlap_AmAmphi.fasta,output_pairwise/RBH_TeTerlap_ApApomp.fasta"/>
+			<param name="minspec" value="3"/>
+            <param name="verbose" value="True"/>
+            <param name="paralogs" value="True"/>
 			<output name="log" value="pogs.output" />
             <output_collection name="output" type="list">
-                <element name="locus1_sp2" value="outputs_pogs_no/locus1_sp2.fasta" />
-                <element name="locus1_sp3" value="outputs_pogs_no/locus1_sp3.fasta" />
-                <element name="locus2_sp2" value="outputs_pogs_no/locus2_sp2.fasta" />
-                <element name="locus3_sp2" value="outputs_pogs_no/locus3_sp2.fasta" />
-                <element name="locus4_sp2" value="outputs_pogs_no/locus4_sp2.fasta" />
-                <element name="locus5_sp2" value="outputs_pogs_no/locus5_sp2.fasta" />
-                <element name="locus6_sp2" value="outputs_pogs_no/locus6_sp2.fasta" />
-                <element name="locus7_sp2" value="outputs_pogs_no/locus7_sp2.fasta" />
-                <element name="locus8_sp2" value="outputs_pogs_no/locus8_sp2.fasta" />
-                <element name="locus9_sp2" value="outputs_pogs_no/locus9_sp2.fasta" />
-                <element name="locus10_sp2" value="outputs_pogs_no/locus10_sp2.fasta" />
+                <element name="orthogroup_1_with_4_sequences" value="outputs/orthogroup_1_with_4_sequences.fasta" />
+                <element name="orthogroup_2_with_4_sequences" value="outputs/orthogroup_2_with_4_sequences.fasta" />
+                <element name="orthogroup_3_with_3_sequences" value="outputs/orthogroup_3_with_3_sequences.fasta" />
+                <element name="orthogroup_4_with_4_sequences" value="outputs/orthogroup_4_with_4_sequences.fasta" />
+                <element name="orthogroup_5_with_3_sequences" value="outputs/orthogroup_5_with_3_sequences.fasta" />
+                <element name="orthogroup_6_with_4_sequences" value="outputs/orthogroup_6_with_4_sequences.fasta" />
+                <element name="orthogroup_7_with_3_sequences" value="outputs/orthogroup_7_with_3_sequences.fasta" />
+                <element name="orthogroup_8_with_3_sequences" value="outputs/orthogroup_8_with_3_sequences.fasta" />
+                <element name="orthogroup_9_with_3_sequences" value="outputs/orthogroup_9_with_3_sequences.fasta" />
+                <element name="orthogroup_10_with_3_sequences" value="outputs/orthogroup_10_with_3_sequences.fasta" />
+            </output_collection>
+            <output_collection name="outputPara" type="list">
+                <element name="orthogroup_1_with_13_sequences_withParalogs" value="outputs_withParalogs/orthogroup_1_with_13_sequences_withParalogs.fasta" />
+                <element name="orthogroup_2_with_7_sequences_withParalogs" value="outputs_withParalogs/orthogroup_2_with_7_sequences_withParalogs.fasta" />
+                <element name="orthogroup_3_with_3_sequences_withParalogs" value="outputs_withParalogs/orthogroup_3_with_3_sequences_withParalogs.fasta" />
+                <element name="orthogroup_4_with_7_sequences_withParalogs" value="outputs_withParalogs/orthogroup_4_with_7_sequences_withParalogs.fasta" />
+                <element name="orthogroup_5_with_3_sequences_withParalogs" value="outputs_withParalogs/orthogroup_5_with_3_sequences_withParalogs.fasta" />
+                <element name="orthogroup_6_with_4_sequences_withParalogs" value="outputs_withParalogs/orthogroup_6_with_4_sequences_withParalogs.fasta" />
+                <element name="orthogroup_7_with_3_sequences_withParalogs" value="outputs_withParalogs/orthogroup_7_with_3_sequences_withParalogs.fasta" />
+                <element name="orthogroup_8_with_5_sequences_withParalogs" value="outputs_withParalogs/orthogroup_8_with_5_sequences_withParalogs.fasta" />
+                <element name="orthogroup_9_with_3_sequences_withParalogs" value="outputs_withParalogs/orthogroup_9_with_3_sequences_withParalogs.fasta" />
+                <element name="orthogroup_10_with_4_sequences_withParalogs" value="outputs_withParalogs/orthogroup_10_with_4_sequences_withParalogs.fasta" />
             </output_collection>
 		</test>
-		<test>
-            <param name="inputs_from_filter_assembly" ftype="fasta" value="inputs_from_filter_assembly/PfPfiji_Trinity.fasta,inputs_from_filter_assembly/ApApomp_Trinity.fasta,inputs_from_filter_assembly/AmAmphi_Trinity.fasta,inputs_from_filter_assembly/AcAcaud_Trinity.fasta" />
-            <param name="inputs_from_paiwise_prot" ftype="fasta" value="inputs_from_paiwise_prot/ReciprocalBestHits_AmAmphi_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_ApApomp.fasta" />
-			<param name="minseq" value="2" />
-			<param name="paralogs" value="yes" />
-			<output name="log" value="pogs_para.output" />
-            <output_collection name="output" type="list">
-                <element name="locus1_sp2" value="outputs_pogs_yes/locus1_sp2.fasta" />
-                <element name="locus1_sp3" value="outputs_pogs_yes/locus1_sp3.fasta" />
-                <element name="locus2_sp2" value="outputs_pogs_yes/locus2_sp2.fasta" />
-                <element name="locus3_sp2" value="outputs_pogs_yes/locus3_sp2.fasta" />
-                <element name="locus4_sp2" value="outputs_pogs_yes/locus4_sp2.fasta" />
-                <element name="locus5_sp2" value="outputs_pogs_yes/locus5_sp2.fasta" />
-                <element name="locus6_sp2" value="outputs_pogs_yes/locus6_sp2.fasta" />
-                <element name="locus7_sp2" value="outputs_pogs_yes/locus7_sp2.fasta" />
-                <element name="locus8_sp2" value="outputs_pogs_yes/locus8_sp2.fasta" />
-                <element name="locus9_sp2" value="outputs_pogs_yes/locus9_sp2.fasta" />
-                <element name="locus10_sp2" value="outputs_pogs_yes/locus10_sp2.fasta" />
-            </output_collection>
-		</test>		
 	</tests>	
 
 	<help>
 
-@HELP_AUTHORS@
+@HELP_AUTHORS@    
+
+<![CDATA[
 
-============
-What it does
-============
+**Last Version** : Victor Mataigne and Gildas Le Corguillé
 
-| This tool parses homologous sequences obtained by pairwise and gather sequences into groups of orthologous sequences.
-| There are 2 outputs.
+**Description**
+
+This tool parses homologous sequences obtained by pairwise and gather sequences into groups of orthologous sequences.
 
 --------
 
-======
-Inputs
-======
-| A dataset collection with the the pairwise with the homologous sequences (obtained from the tool "Pairwise").
-| A dataset collection with the fasta sequences of each species (obtained from the tool "Filter_Assemblies")
+**Inputs**
+
+The output (nucleic format) of the tool 'Pairwise' (AdaptSearch suite), which consists in lists of homologous sequences between pairs of species obtained by RBH comparisons.
 
 --------
 
-=======
-Outputs
-=======
+**Parameters**
+
+** Species minimum ** : drop orthogroups with less than n species : orthogroups with less than n species within won't be recorded.
+** verbose ** : check 'Yes' if you want a supplementary table : countings of orthogroups before the paralogous sequences removal
+** paralogs ** : check 'yes' if you also want to retrieve the orthogroups before the paralogous sequences removal
+
+.. class:: warningmark
+The minimum number of species per group only applies to the main output files. In the case of the 'paralogs' option, this parameter is more likely the minimum amount of sequences to keep.
+
+--------
+
+**Outputs**
 
 This tool, produces the following files :
 
-**POGs** :
-
-| is the general output. It gives the number of sequences at each filtering step and then count
-| the number of groups.
-
-**POGs_locus_orthologs_unaligned** :
-
-| is the output which contains the groups of orthologous sequences,
-| one file corresponding to one group.
-
-the sequences of each group are in nucleic format.
-
+- POGs : the general output, displaying countings tables of orthogroups.
 
-===============
-Working Example
-===============
-
----------------
-The input files
----------------
+- POGs_unaligned : a dataset collection which contains the groups of orthologous sequences (one group per file) in nucleic format, with paralogous sequences removed (only one sequence is kept per species and per group)
 
-| 4 simulated files with a few nucleic sequences each : Ac.fasta Am.fasta Ap.fasta Pf.fasta.
-| 6 files containing the homologous sequences of each pairwise.
-
-----------------
-The output files
-----------------
-
-**POGs**
+- POGs_withParalogs_unaligned : a similar dataset colelction in where paralogous sequences are kept. This output is available if you check 'Yes' to the 'paralogous' option.
 
-
-| Number of locus before removeRedondancy = 11
-
-
-| Number of locus = 11
-
-
-| NUMBER OF REMAINING LOCUS AFTER INTRA LOCUS TREATMENT [REMOVE GROUPS WITH PARALOGS] = 11
-| NUMBER OF REMAINING LOCUS AFTER 2ND TREATMENT [INTER LOCUS] = 11
-
-
-| REMOVAL OF LOCUS WITH LESS THAN 2 SEQUENCES
+---------
 
-| Number of species in the locus : 4
-    Number of locus : 0
-
-| Number of species in the locus : 3
-    Number of locus : 1
-
-| Number of species in the locus : 2
-    Number of locus : 10
+**The AdaptSearch Pipeline**
 
-**POGs_locus_orthologs_unaligned**
-
-| For example the file locus1_sp2.fasta :
-| &gt;Ac7_1/1_1.000_160
-| GCACCTAGAATTACCCGAAGTTGCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGCAGGAAGAAGGCATGTGGTACCATTGGGAACCGTCAAGCGTTTCCTCAGCCCTGTGGCAGCTGCCCGTCTGCGCCCGTGTTTGACCTTGAGCACCAAG
-| &gt;Am3_1/1_1.000_160
-  GCACCTAGAATTACCCGAAGCTTCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGGTGGAAGAAGGCATGTGGTACCATTGGGGTGGTACAAGCGTTTCCTCAGCCCTGTGGCAGCTGCCCGTCTGCGGAGCAGTTTGACCTTGAGCACCAAG
----------------------------------------------------
+.. image:: ../../adaptsearch_picture_helps.png :heigth: 593 :width: 852
 
 Changelog
 ---------
 
+**Version 2.1 - 04/01/2018**
+
+  - Rewritten with improved results and better code
+  - Added options verbose and paralogs
+
 **Version 2.0 - 11/07/2017**
 
  - Replace the zip between tools by Dataset Collection
@@ -201,6 +158,8 @@
  - TEST: Add funtional test with planemo
  - IMPROVEMENT: Use conda dependencies for python
 
+    ]]>
+
 	</help>
 
 	<expand macro="citations" />