Mercurial > repos > artbio > sr_bowtie_dataset_annotation

diff sr_bowtie_dataset_annotation.xml @ 0:bfe92ceffe57 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit 60340e9e0d2795b88e23fd57e1ccb190918bf337"
author: artbio
date: Mon, 07 Oct 2019 08:40:15 -0400
children: 19ba2e38e8ec
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sr_bowtie_dataset_annotation.xml	Mon Oct 07 08:40:15 2019 -0400
@@ -0,0 +1,178 @@
+<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.4.0">
+  <description>by iterative alignments with sRbowtie</description>
+  <requirements>
+        <requirement type="package" version="1.1.2">bowtie</requirement>
+        <requirement type="package" version="1.6.0">r-optparse</requirement>
+        <requirement type="package" version="3.1.0">r-ggplot2</requirement>
+        <requirement type="package" version="0.8.0">r-ggrepel</requirement>
+  </requirements>
+  <command  detect_errors="exit_code"><![CDATA[
+        #if $refGenomeSource1.genomeSource == "history":
+            bowtie-build -f $refGenomeSource1.ownFile genome  1>/dev/null &&
+            #set index_path = 'genome'
+        #else:
+            #set index_path = $refGenomeSource1.index.fields.path
+        #end if
+        
+        #for $i in $AdditionalQueries:
+            bowtie-build -f $i.ownFile $i.ownFile.name  1>/dev/null &&
+        #end for
+                
+        #set method_prefix = "-v %s -k 1 --best" % str($mismatches)
+        #if $input[0].is_of_type('fasta'):
+            #set format = "-f"
+        #elif $input[0].is_of_type('fastq'):
+            #set format = "-q"
+        #end if
+
+        #for $file in $input:
+            #set sample=$file.element_identifier
+            bowtie -p \${GALAXY_SLOTS:-4}
+                   $method_prefix
+                   --al matched.fa
+                   --un unmatched.fa
+                   --suppress 6,7,8
+                   $index_path $format $file > tabular_bowtie_output.tab &&
+            genome_aligned=\$(wc -l < matched.fa) &&
+            genome_aligned=\$(( \$genome_aligned/2)) &&
+            #set counter = 0
+            #for $i in $AdditionalQueries:
+                #set $counter += 1
+                #if $counter != 1:
+                    #set to_align = "class_unmatched.fa"
+                #else:
+                    #set to_align = "matched.fa"
+                #end if
+                touch tmp_class_matched.fa tmp_class_unmatched.fa &&
+                bowtie -p \${GALAXY_SLOTS:-4}
+                    $method_prefix
+                    --al tmp_class_matched.fa
+                    --un tmp_class_unmatched.fa
+                    --suppress 6,7,8
+                    $i.ownFile.name $format '$to_align' > tabular_bowtie_output.tab &&
+                class_aligned=\$(( \$(wc -l < tmp_class_matched.fa)/2)) &&
+                class_unaligned=\$(( \$(wc -l < tmp_class_unmatched.fa)/2)) &&
+                echo -e "$sample\t$i.ownFile.name\t\$class_aligned\t\${genome_aligned}" >> $output &&
+                mv tmp_class_unmatched.fa class_unmatched.fa &&
+                rm tmp_class_matched.fa &&
+            #end for
+            remaining=\$(( \$(wc -l < class_unmatched.fa)/2)) &&
+            echo -e "$sample\tNot classified\t\${remaining}\t\${genome_aligned}" >> $output &&
+        #end for
+        
+        
+        Rscript $__tool_directory__/barplot.r --input $output --barplot $barplot
+        ]]></command>
+  <inputs>
+    <param name="input" type="data" multiple="True" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/>
+    <param name="mismatches" type="select" label="Number of mismatches allowed" help="specify the number of mismatches allowed during alignments">
+        <option value="0">0</option>
+        <option value="1" selected="true">1</option>
+        <option value="2">2</option>
+        <option value="3">3</option>
+    </param>
+<!-- First bowtie index selection -->
+    <conditional name="refGenomeSource1">
+      <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Bowtie Built-ins were indexed using default options">
+        <option value="indexed">Use a built-in index</option>
+        <option value="history">Use one from the history</option>
+      </param>
+      <when value="indexed">
+        <param name="index" type="select" label="Select a DNA reference index" help="if your genome of interest is not listed - contact instance administrator">
+          <options from_data_table="bowtie_indexes"/>
+        </param>
+      </when>
+      <when value="history">
+        <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
+      </when>
+    </conditional>
+<!-- End of first bowtie index selection -->
+<!-- other  bowtie index selections from fasta in history (mandatory) -->
+    <repeat name="AdditionalQueries" title="Additional Alignment Step">
+        <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
+    </repeat>
+<!-- End of other bowtie index selections -->
+   </inputs>
+   <outputs>
+       <data format="tabular" name="output" label="Cascade Annotation Analysis">
+           <actions>
+               <action name="column_names" type="metadata" default="Sample,Reference Index,Number of reads, Total reads" />
+           </actions>
+        </data>
+        <data name="barplot" format="pdf" label="barplot from ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value ="sample1.fa" ftype="fasta" />
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
+            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
+            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
+            <output name="output" ftype="tabular" file="sample1_output.tab" />
+            <output name="barplot" ftype="pdf" file="sample1_output.pdf" compare="sim_size" delta="500"/>
+        </test>
+        <test>
+            <param name="input" value ="sample.fastq" ftype="fastq" />
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
+            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
+            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
+            <output name="output" ftype="tabular" file="sample_output.tab" />
+            <output name="barplot" ftype="pdf" file="sample_output.pdf" compare="sim_size" delta="500"/>
+        </test>
+        <test>
+            <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" />
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
+            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
+            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
+            <output name="output" ftype="tabular" file="multisample5_output.tab" />
+            <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" />
+        </test>
+    </tests>
+  <help>
+
+**Introduction**
+
+Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient.
+A generic "Map with Bowtie for Illumina" Galaxy tool is available in the main Galaxy distribution.
+
+Here The sRbowtie wrapper specifically works with short reads FASTA or FASTQ inputs
+(-v bowtie mode, with -k 1) which has to be clipped from adapter before alignment.
+
+.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml
+
+
+------
+
+**What it does**
+
+.. class:: infomark
+
+This script uses the sRbowtie wrapper to iteratively match reads on a reference indexes.
+Read that aligned to the first reference are realigned to the second reference.
+From this point, unaligned reads are taken as input for alignment to the third reference, etc.
+
+
+Reads are Matched on DNA references (both strands) as fast as possible, without taking care of mapping issues
+
+*-v [0,1,2,3] -k 1 --best -p 12 --suppress 6,7,8*
+
+unaligned reads at step N are used as input for sRbowtie at step N+1
+
+-----
+
+**Input formats**
+
+.. class:: warningmark
+
+*Reads must be clipped from their adapter and provided in a FASTA or FASTQ format*
+
+-----
+
+**OUTPUTS**
+
+**Annotation table in a tabular format**
+
+  </help>
+</tool>
author	artbio
date	Mon, 07 Oct 2019 08:40:15 -0400
parents
children	19ba2e38e8ec