diff filter_assembly.xml @ 11:36084a2949bf draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 68979144b9949c27bcc3340a9e8375de1391526c
author abims-sbr
date Mon, 03 Feb 2025 14:37:14 +0000
parents e418021a8c69
children 79f75b3faca5
line wrap: on
line diff
--- a/filter_assembly.xml	Mon Sep 24 03:56:11 2018 -0400
+++ b/filter_assembly.xml	Mon Feb 03 14:37:14 2025 +0000
@@ -1,4 +1,4 @@
-<tool name="Filter assemblies" id="filter_assemblies" version="2.0.3">
+<tool name="Filter assemblies" id="filter_assemblies" version="2.0.4">
 
     <description>
         Filter the outputs of Velvet or Trinity assemblies
@@ -9,8 +9,7 @@
     </macros>
 
     <requirements>
-        <expand macro="python_required" />
-        <requirement type="package" version="0.0.14">fastx_toolkit</requirement>
+        <expand macro="python3_required" />
         <requirement type="package" version="10.2011">cap3</requirement>
     </requirements>
 
@@ -23,19 +22,13 @@
         #end for
         #set $infiles = $infiles[:-1]
 
-        ln -s '$__tool_directory__/scripts/S02a_remove_redondancy_from_velvet_oases.py' . &&
-        ln -s '$__tool_directory__/scripts/S02b_format_fasta_name_trinity.py' . &&
-        ln -s '$__tool_directory__/scripts/S03_choose_one_variants_per_locus_trinity.py' . &&
-        ln -s '$__tool_directory__/scripts/S04_find_orf.py' . &&
-        ln -s '$__tool_directory__/scripts/S05_filter.py' . &&
-
         python '$__tool_directory__/scripts/S01_script_to_choose.py'
 
         '$infiles'
         $length_seq_max
         $percent_identity
         $overlap_length
-        > ${log}
+        > '${log}'
     ]]>
     </command>
 
@@ -106,13 +99,13 @@
 
 **Description**
 
-This tool reformats Velvet Oases or Trinity assemblies for the AdaptSearch galaxy suite and selects only one variant per gene according to its length and quality check.
+This tool runs the CAP3 software on assembly FASTA data, merge singlets and contigs and then reformat headers to allow any assembly tools.
 
 ---------
 
 **Input format**
 
-(1) Sequences are in the sequential format:
+Sequences are in the FASTA format:
 
 | >seqname1
 | AAAGAGAGACCACATGTCAGTAGC -on one or several lines -
@@ -121,18 +114,6 @@
 | etc ...
 |
 
-2) The file name should begin with a two letter abbreviation of the species name (for isntance, 'Ap' if the species is Alvinella pompejana).
-
-**For Velvet Oases assemblies input**
-            
-    The headers must be as follow : *>Locus_i_Transcript_i/j_Confidence_x.xxx_Length_N* where i is the locus number, j the transcript variant among all versions of the transcript, x.xxx the confidence value and N the length.
-
-**For Trinity assemblies inputs**   
-            
-    The headers must be as follow : *>cj_gj_ij Len=j path=[j:0-j]* where all the j are integers (locus number, transcript variant, length, position...)
-
-**The tool handles the case if input files come from both assemblers (there is no need for input files to be exclusively from one or another assembler).**
-
 ---------
 
 **Parameters**
@@ -150,11 +131,9 @@
 **Steps**:
     
 The tool:
-    1) Modifies the sequence name to add the species abbreviation using the 2 first letters of the name of the transcriptome file : note that each species abbreviation must be unique
-    2) Selects one allelic sequence from each transcript (c or locus) using the length of the sequence and its level of confidence
-    3) Selects the best ORF from the sequence between two stop codons
-    4) Performs a CAP3 from the full set of ORFs to minimize redundancy
-    5) Retrieves the initial transcript sequences from the remaining set of proceeded ORF sequences
+    1) Performs a CAP3 from the full set of ORFs to minimize redundancy
+    2) Merges singlets and contigs identified by CAP3
+    3) Reformats headers of the FASTA records by adding a specified prefix (defined from the original filename) and ensures that sequences are on a single line
 
 **Outputs**
 
@@ -172,6 +151,11 @@
 Changelog
 ---------
 
+		
+**Version 2.2 - 07/10/2024**
+
+    - Input files can be from any assembly tools
+		
 **Version 2.1 - 15/01/2018**
 
     - Input files can be a mix from files coming either from Trinity or Velvet Oases assemblers