Mercurial > repos > abims-sbr > filter_assemblies

diff filter_assembly.xml @ 2:1daa43b4729c draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
author: abims-sbr
date: Wed, 27 Sep 2017 10:06:45 -0400
parents: 13a9ae9ef940
children: ac2fcb9c706a
--- a/filter_assembly.xml	Thu Apr 13 09:36:16 2017 -0400
+++ b/filter_assembly.xml	Wed Sep 27 10:06:45 2017 -0400
@@ -1,6 +1,4 @@
-<?xml version="1.0"?>
-
-<tool name="Filter assemblies" id="filter_assemblies" version="1.0">	
+<tool name="Filter assemblies" id="filter_assemblies" version="2.0">
 
 	<description>
 		Filter the outputs of Velvet or Trinity assemblies
@@ -13,89 +11,87 @@
 	<requirements>
 		<expand macro="python_required" />
 		<requirement type="package" version="0.0.14">fastx_toolkit</requirement>
-		<requirement type="package">cap3</requirement>
+		<requirement type="package" version="10.2011">cap3</requirement>
 	</requirements>
 
   	<command>
 	<![CDATA[
-		ln -s $__tool_directory__/scripts/S02a_remove_redondancy_from_velvet_oases.py .
-		&&
-		ln -s $__tool_directory__/scripts/S02b_format_fasta_name_trinity.py .
-		&&
-		ln -s $__tool_directory__/scripts/S03_choose_one_variants_per_locus_trinity.py .
-		&&
-		ln -s $__tool_directory__/scripts/S04_find_orf.py .
-		&&
-		ln -s $__tool_directory__/scripts/S05_filter.py .
-		&&
-		python $__tool_directory__/scripts/S01_script_to_choose.py
+        #set $infiles = ""
+        #for $input in $inputs
+            ln -s '$input' '$input.element_identifier';
+            #set $infiles = $infiles + $input.element_identifier + ","
+        #end for
+        #set $infiles = $infiles[:-1]
 
-		#if $input.input_type_choice == "velvet"
-			velvet ${input.zip_velvet} 
-		#else
-			trinity ${input.zip_trinity} 
-		#end if
-		$length_seq_max $percent_identity $overlap_length
-		> ${output}
+		ln -s '$__tool_directory__/scripts/S02a_remove_redondancy_from_velvet_oases.py' . &&
+		ln -s '$__tool_directory__/scripts/S02b_format_fasta_name_trinity.py' . &&
+		ln -s '$__tool_directory__/scripts/S03_choose_one_variants_per_locus_trinity.py' . &&
+		ln -s '$__tool_directory__/scripts/S04_find_orf.py' . &&
+		ln -s '$__tool_directory__/scripts/S05_filter.py' . &&
+
+		python '$__tool_directory__/scripts/S01_script_to_choose.py'
+
+        '$infiles'
+		$assembler
+		$length_seq_max
+        $percent_identity
+        $overlap_length
+		> ${log}
 	]]>
   	</command>
 
  	<inputs>
-		<conditional name="input">
-			<param name="input_type_choice" type="select" label="What is the type of your inputs? ">
-				<option value="velvet">Velvet Oases</option>
-				<option value="trinity">Trinity</option>
-			</param>
-			<when value="velvet">
-				<param name="zip_velvet" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="A zip file containing all your files" />
-			</when>
-			<when value="trinity">
-				<param name="zip_trinity" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="A zip file containing all your files" />
-			</when>
-		</conditional>
+        <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" />
+		<param name="assembler" type="select" label="Which assembler was used?">
+			<option value="velvet">Velvet Oases</option>
+			<option value="trinity">Trinity</option>
+		</param>
 		<param name="percent_identity" type="integer" value="100" label="Maximum sequence length" help="Cap3 parameter -p  N  specify overlap percent identity cutoff N &gt; 65 " />
 		<param name="overlap_length" type="integer" value="60" label="Maximum sequence length" help="Cap3 parameter -o  N  specify overlap length cutoff &gt; 15 " />
 		<param name="length_seq_max" type="integer" value="100" label="Minimum sequence length" help="Keep sequences which length is higher than the minimum sequence length  " />
 	</inputs>
 
 	<outputs>
-		<data format="no_unzip.zip" name="output_zip" label="Filter_oase_zip" from_work_dir="sequences_filtered.zip" />
-		<data format="txt" name="output" label="Filter_Assemblies_Summary"/>
+        <collection name="output_fasta" type="list" label="Filter Assemblies outputs">
+            <discover_datasets pattern="__name_and_ext__" directory="outputs" />
+        </collection>
+		<data format="txt" name="log" label="Filter Assemblies Summary"/>
 	</outputs>
 
 	<tests>
 		<test>
-			<param name="input_type_choice" value="trinity" />
-			<param name="input|zip_trinity" ftype="zip" value="test_01_input_Trinity_filter_oase.zip" />
-			<param name="percent_identity" value="100" />
+            <param name="inputs" ftype="fasta" value="trinity/Pfiji_trinity.fasta,trinity/Apomp_trinity.fasta,trinity/Amphi_trinity.fasta,trinity/Acaud_trinity.fasta" />
+            <param name="assembler" value="trinity" />
+            <param name="percent_identity" value="100" />
 			<param name="overlap_length" value="60" />
 			<param name="length_seq_max" value="100" />
-			<output name="output" value="test_06.out" />
+			<output name="log" value="trinity.output" />
+            <output_collection name="output_fasta" type="list">
+                <element name="AcAcaud_trinity" value="trinity_out/AcAcaud_trinity.fasta" />
+                <element name="AmAmphi_trinity" value="trinity_out/AmAmphi_trinity.fasta" />
+                <element name="ApApomp_trinity" value="trinity_out/ApApomp_trinity.fasta" />
+                <element name="PfPfiji_trinity" value="trinity_out/PfPfiji_trinity.fasta" />
+            </output_collection>
 		</test>
 		<test>
-			<param name="input_type_choice" value="velvet" />
-			<param name="input|zip_velvet" ftype="zip" value="test_01_input_Velvet_filter_oase.zip" />
+            <param name="inputs" ftype="fasta" value="velvet/Pg_transcriptome_90109.fasta,velvet/Ap_transcriptome_35099.fasta,velvet/Ac_transcriptome_25591.fasta" />
+			<param name="assembler" value="velvet" />
 			<param name="percent_identity" value="100" />
 			<param name="overlap_length" value="60" />
 			<param name="length_seq_max" value="100" />
-			<output name="output" value="test_06bis.out" />
+			<output name="log" value="velvet.output" />
+            <output_collection name="output_fasta" type="list">
+                <element name="AcAc_transcriptome_25591" value="velvet_out/AcAc_transcriptome_25591.fasta" />
+                <element name="ApAp_transcriptome_35099" value="velvet_out/ApAp_transcriptome_35099.fasta" />
+                <element name="PgPg_transcriptome_90109" value="velvet_out/PgPg_transcriptome_90109.fasta" />
+            </output_collection>
 		</test>
-		
-	</tests>	
+
+	</tests>
 
 	<help>
 
-.. class:: infomark
-
-**Authors**  Eric Fontanillas creates the scripts of this pipeline.
-
-.. class:: infomark
-
-**Galaxy integration** Julie Baffard and ABIMS TEAM
-
- | Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool.
-
----------------------------------------------------
+@HELP_AUTHORS@
 
 ===========
 Filter Oase
@@ -108,17 +104,8 @@
 
 This script reformats Velvet Oases or Trinity assemblies for the AdaptSearch galaxy suite and selects only one variant per gene according to its length and quality check.
 
-
-
-.. class:: infomark
-
-
-**Important part of this tool (the inputs format)**
-
 --------
 
-
-
 ============
 Input format
 ============
@@ -136,7 +123,7 @@
 	| AAAGAGAGAVCACATGTCAGTAGC
 	| &gt;seqname
 	| AAAGAGAGAVCACATGTCAGTAGC
-		
+
 (2)The name of each transcript must start by a species abbreviation of two letters, unique for each assembly. The file name of the assembly file must also begin with these same two letters.
 
 (3)The syntax of the sequence name must be constructed as: &gt;fh1_2/3_4_5 where are the two chosen letters the number of the locus, the transcript variant among all versions of the transcript,the confidence value and the length of the transcript. In this case, the file name should thus begin with fs.
@@ -153,7 +140,7 @@
 	| AAAGAGAGAVCACATGTCAGTAGC
 	| &gt;seqname
 	| AAAGAGAGAVCACATGTCAGTAGC
-		
+
 (2)The name of each transcript must start by &gt;comp, e.g. &gt;comp10020 instead of &gt;c10020 or &gt;transcript, etc...
 
 (3)The syntax of the sequence name should avoid spaces and must contain _len:, e.g. &gt;comp1_g1_it1_len:1000_path=[.....]
@@ -161,7 +148,25 @@
 (4)A suffix of two letters abbreviating the species name must be chosen, e.g. Homo sapiens -&gt; Hs. The file name of the assembly file must begin with this suffix. Each suffix should be unique among the n analyzed transcriptomes.
 e.g. if the file name is Hs_trinity_22000_transcripts.fasta with sequences, the suffix abbreviation must be Hs and all sequences must be : &gt;Hs.
 Alternatively if the name is: Homosapiens_trinity_22000_transcripts.fasta, the suffix must be thus Ho.
-	</help>		
+
+---------------------------------------------------
+
+Changelog
+---------
+
+**Version 2.0 - 14/04/2017**
+
+ - NEW: Replace the zip between tools by Dataset Collection
+
+
+**Version 1.0 - 13/04/2017**
+
+ - TEST: Add funtional test with planemo
+
+ - IMPROVEMENT: Use conda dependencies for cap3, fastaformatter and python
+
+
+	</help>
 
     <expand macro="citations" />
author	abims-sbr
date	Wed, 27 Sep 2017 10:06:45 -0400
parents	13a9ae9ef940
children	ac2fcb9c706a