Mercurial > repos > abims-sbr > pogs

diff POGs.xml @ 5:dad7053ba20e draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
author: abims-sbr
date: Wed, 27 Sep 2017 10:02:20 -0400
parents: b7bd93fc0ac9
children: b19ed7395dcc
--- a/POGs.xml	Mon May 29 07:45:07 2017 -0400
+++ b/POGs.xml	Wed Sep 27 10:02:20 2017 -0400
@@ -1,30 +1,44 @@
-<tool name="POGs" id="POGs" version="1.0">
+<tool name="POGs" id="POGs" version="2.0">
 
 	<description>
 		Find orthologous groups
 	</description>
-	
+
 	<macros>
 		<import>macros.xml</import>
 	</macros>
 
 	<requirements>
-		<expand macro="python_required" />		
+		<expand macro="python_required" />
 	</requirements>
-	
+
   	<command>
 	<![CDATA[
-		python $__tool_directory__/scripts/S01_get_locus_orthologs_part1_v2.py ${zip_file}
-		> ${output} &&
+        #set $infiles_from_filter_assembly = ""
+        #for $input_from_filter_assembly in $inputs_from_filter_assembly
+            ln -s '$input_from_filter_assembly' '$input_from_filter_assembly.element_identifier';
+            #set $infiles_from_filter_assembly = $infiles_from_filter_assembly + $input_from_filter_assembly.element_identifier + ","
+        #end for
+        #set $infiles_from_filter_assembly = $infiles_from_filter_assembly[:-1]
 
-		python $__tool_directory__/scripts/S02_get_locus_orthologs_part2_v2.py ${zip} ${minseq} ${paralogs}
-		>> ${output};
+        #set $infiles_from_paiwise_prot = ""
+        #for $input_from_paiwise_prot in $inputs_from_paiwise_prot
+            ln -s '$input_from_paiwise_prot' '$input_from_paiwise_prot.element_identifier';
+            #set $infiles_from_paiwise_prot = $infiles_from_paiwise_prot + $input_from_paiwise_prot.element_identifier + ","
+        #end for
+        #set $infiles_from_paiwise_prot = $infiles_from_paiwise_prot[:-1]
+
+		python '$__tool_directory__/scripts/S01_get_locus_orthologs_part1.py' '$infiles_from_paiwise_prot'
+		> ${log} &&
+
+		python '$__tool_directory__/scripts/S02_get_locus_orthologs_part2.py' '$infiles_from_filter_assembly' $minseq $paralogs
+		>> ${log};
 	]]>
   	</command>
 
  	<inputs>
-		<param name="zip" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="Contains the files filter after the tool oase" />
-		<param name="zip_file" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file " help="Contains homologous sequences in PROTEIC format, ONLY A ZIP FILE" />
+        <param name="inputs_from_filter_assembly" type="data" format="fasta" multiple="true" label="Input files from Filter assemblies" />
+        <param name="inputs_from_paiwise_prot" type="data" format="fasta" multiple="true" label="Input files from Pairwise Prot" />
 		<param name="minseq" type="integer" value="3" label="Drop orthogroups with less than n species" />
 		<param name="paralogs" type="select" label="Paralogs savage removal" help="Yes : orthogroups with paralogs will be fully removed. No : paralogs sequences will be (naively) filtered to keep only one sequence." >
 			<option value="yes">Yes</option>
@@ -33,51 +47,73 @@
 	</inputs>
 
 	<outputs>
-		<data format="txt" name="output" label="POGs" />
-		<data format="no_unzip.zip" name="output_zip" label="POGs_locus_orthologs_unaligned" from_work_dir="POGs_locus_orthologs_unaligned.zip" />
+		<data format="txt" name="log" label="POGs" />
+        <collection name="output" type="list" label="POGs locus orthologs unaligned">
+            <discover_datasets pattern="__name_and_ext__" directory="outputs" />
+        </collection>
 	</outputs>
 
 	<tests>
 		<test>
-			<param name="zip" ftype="zip" value="outputfilter.zip" />
-			<param name="zip_file" ftype="zip" value="outputpairwise.zip" />
+            <param name="inputs_from_filter_assembly" ftype="fasta" value="inputs_from_filter_assembly/PfPfiji_Trinity.fasta,inputs_from_filter_assembly/ApApomp_Trinity.fasta,inputs_from_filter_assembly/AmAmphi_Trinity.fasta,inputs_from_filter_assembly/AcAcaud_Trinity.fasta" />
+            <param name="inputs_from_paiwise_prot" ftype="fasta" value="inputs_from_paiwise_prot/ReciprocalBestHits_AmAmphi_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_ApApomp.fasta" />
 			<param name="minseq" value="2" />
 			<param name="paralogs" value="no" />
-			<output name="output" value="test_03.out" />
+			<output name="log" value="pogs.output" />
+            <output_collection name="output" type="list">
+                <element name="locus1_sp2" value="outputs_pogs_no/locus1_sp2.fasta" />
+                <element name="locus1_sp3" value="outputs_pogs_no/locus1_sp3.fasta" />
+                <element name="locus2_sp2" value="outputs_pogs_no/locus2_sp2.fasta" />
+                <element name="locus3_sp2" value="outputs_pogs_no/locus3_sp2.fasta" />
+                <element name="locus4_sp2" value="outputs_pogs_no/locus4_sp2.fasta" />
+                <element name="locus5_sp2" value="outputs_pogs_no/locus5_sp2.fasta" />
+                <element name="locus6_sp2" value="outputs_pogs_no/locus6_sp2.fasta" />
+                <element name="locus7_sp2" value="outputs_pogs_no/locus7_sp2.fasta" />
+                <element name="locus8_sp2" value="outputs_pogs_no/locus8_sp2.fasta" />
+                <element name="locus9_sp2" value="outputs_pogs_no/locus9_sp2.fasta" />
+                <element name="locus10_sp2" value="outputs_pogs_no/locus10_sp2.fasta" />
+            </output_collection>
 		</test>
 		<test>
-			<param name="zip" ftype="zip" value="outputfilter.zip" />
-			<param name="zip_file" ftype="zip" value="outputpairwise.zip" />
+            <param name="inputs_from_filter_assembly" ftype="fasta" value="inputs_from_filter_assembly/PfPfiji_Trinity.fasta,inputs_from_filter_assembly/ApApomp_Trinity.fasta,inputs_from_filter_assembly/AmAmphi_Trinity.fasta,inputs_from_filter_assembly/AcAcaud_Trinity.fasta" />
+            <param name="inputs_from_paiwise_prot" ftype="fasta" value="inputs_from_paiwise_prot/ReciprocalBestHits_AmAmphi_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_ApApomp.fasta" />
 			<param name="minseq" value="2" />
 			<param name="paralogs" value="yes" />
-			<output name="output" value="test_03b.out" />
-		</test>
+			<output name="log" value="pogs_para.output" />
+            <output_collection name="output" type="list">
+                <element name="locus1_sp2" value="outputs_pogs_yes/locus1_sp2.fasta" />
+                <element name="locus1_sp3" value="outputs_pogs_yes/locus1_sp3.fasta" />
+                <element name="locus2_sp2" value="outputs_pogs_yes/locus2_sp2.fasta" />
+                <element name="locus3_sp2" value="outputs_pogs_yes/locus3_sp2.fasta" />
+                <element name="locus4_sp2" value="outputs_pogs_yes/locus4_sp2.fasta" />
+                <element name="locus5_sp2" value="outputs_pogs_yes/locus5_sp2.fasta" />
+                <element name="locus6_sp2" value="outputs_pogs_yes/locus6_sp2.fasta" />
+                <element name="locus7_sp2" value="outputs_pogs_yes/locus7_sp2.fasta" />
+                <element name="locus8_sp2" value="outputs_pogs_yes/locus8_sp2.fasta" />
+                <element name="locus9_sp2" value="outputs_pogs_yes/locus9_sp2.fasta" />
+                <element name="locus10_sp2" value="outputs_pogs_yes/locus10_sp2.fasta" />
+            </output_collection>
+		</test>		
 	</tests>	
 
 	<help>
+
+@HELP_AUTHORS@
+
 ============
 What it does
 ============
 
-| This tool takes a zip archive containing nucleic fasta sequence files and a zip archive containing a file per pairwise with the homologous sequences (in proteic format). It searches for orthologous sequences.
+| This tool parses homologous sequences obtained by pairwise and gather sequences into groups of orthologous sequences.
 | There are 2 outputs.
-| 
-| The script was written by **Eric Fontanillas**.
-| The wrapper was written by **Julie Baffard**.
-| Last improvments (paralogous filtering and code factoring) made by Victor Mataigne.
-
 
 --------
 
 ======
 Inputs
 ======
-
-option **Select a zip file containing the input files** :
-
-| the input zip file must have the extension .ort.zip
-| At the beginning, when you upload your input, you have to change the extension .zip to .ort.zip
-
+| A dataset collection with the the pairwise with the homologous sequences (obtained from the tool "Pairwise").
+| A dataset collection with the fasta sequences of each species (obtained from the tool "Filter_Assemblies")
 
 --------
 
@@ -89,22 +125,81 @@
 
 **POGs** :
 
-| is the general output. It gives different informations : number of locus, each locus, ...
-| it gives for example, all the locus containing 2 species :
-| 2_sp : [[name_of_sequence,name_of_squence],...,[name_of_sequence, name_of_sequence]]
-| it's the same for locus containing 3 species, 4 species, ... 
-| 
+| is the general output. It gives the number of sequences at each filtering step and then count
+| the number of groups.
 
 **POGs_locus_orthologs_unaligned** :
 
-| is the output (in zip format) which contains each locus.
-| one file corresponding to one locus.
+| is the output which contains the groups of orthologous sequences,
+| one file corresponding to one group.
+
+the sequences of each group are in nucleic format.
+
+
+===============
+Working Example
+===============
+
+---------------
+The input files
+---------------
+
+| 4 simulated files with a few nucleic sequences each : Ac.fasta Am.fasta Ap.fasta Pf.fasta.
+| 6 files containing the homologous sequences of each pairwise.
+
+----------------
+The output files
+----------------
+
+**POGs**
+
+
+| Number of locus before removeRedondancy = 11
+
+
+| Number of locus = 11
+
+
+| NUMBER OF REMAINING LOCUS AFTER INTRA LOCUS TREATMENT [REMOVE GROUPS WITH PARALOGS] = 11
+| NUMBER OF REMAINING LOCUS AFTER 2ND TREATMENT [INTER LOCUS] = 11
+
 
-the sequences of each locus are in nucleic format.
+| REMOVAL OF LOCUS WITH LESS THAN 2 SEQUENCES
+
+| Number of species in the locus : 4
+    Number of locus : 0
+
+| Number of species in the locus : 3
+    Number of locus : 1
+
+| Number of species in the locus : 2
+    Number of locus : 10
+
+**POGs_locus_orthologs_unaligned**
 
-.. class:: warningmark
+| For example the file locus1_sp2.fasta :
+| &gt;Ac7_1/1_1.000_160
+| GCACCTAGAATTACCCGAAGTTGCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGCAGGAAGAAGGCATGTGGTACCATTGGGAACCGTCAAGCGTTTCCTCAGCCCTGTGGCAGCTGCCCGTCTGCGCCCGTGTTTGACCTTGAGCACCAAG
+| &gt;Am3_1/1_1.000_160
+  GCACCTAGAATTACCCGAAGCTTCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGGTGGAAGAAGGCATGTGGTACCATTGGGGTGGTACAAGCGTTTCCTCAGCCCTGTGGCAGCTGCCCGTCTGCGGAGCAGTTTGACCTTGAGCACCAAG
+---------------------------------------------------
+
+Changelog
+---------
+
+**Version 2.0 - 11/07/2017**
 
-The zip output has to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface.
+ - Replace the zip between tools by Dataset Collection
+ - Add option : paralogs filtering
+ - Added a second functional test (to test the new option)
+ - Code factoring
+ - Removed zip format of input and output files ; replaced by dataset collections
+
+
+**Version 1.0 - 13/04/2017**
+
+ - TEST: Add funtional test with planemo
+ - IMPROVEMENT: Use conda dependencies for python
 
 	</help>
author	abims-sbr
date	Wed, 27 Sep 2017 10:02:20 -0400
parents	b7bd93fc0ac9
children	b19ed7395dcc