Mercurial > repos > artbio > sr_bowtie_dataset_annotation
diff sr_bowtie_dataset_annotation.xml @ 0:bfe92ceffe57 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit 60340e9e0d2795b88e23fd57e1ccb190918bf337"
| author | artbio |
|---|---|
| date | Mon, 07 Oct 2019 08:40:15 -0400 |
| parents | |
| children | 19ba2e38e8ec |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sr_bowtie_dataset_annotation.xml Mon Oct 07 08:40:15 2019 -0400 @@ -0,0 +1,178 @@ +<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.4.0"> + <description>by iterative alignments with sRbowtie</description> + <requirements> + <requirement type="package" version="1.1.2">bowtie</requirement> + <requirement type="package" version="1.6.0">r-optparse</requirement> + <requirement type="package" version="3.1.0">r-ggplot2</requirement> + <requirement type="package" version="0.8.0">r-ggrepel</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #if $refGenomeSource1.genomeSource == "history": + bowtie-build -f $refGenomeSource1.ownFile genome 1>/dev/null && + #set index_path = 'genome' + #else: + #set index_path = $refGenomeSource1.index.fields.path + #end if + + #for $i in $AdditionalQueries: + bowtie-build -f $i.ownFile $i.ownFile.name 1>/dev/null && + #end for + + #set method_prefix = "-v %s -k 1 --best" % str($mismatches) + #if $input[0].is_of_type('fasta'): + #set format = "-f" + #elif $input[0].is_of_type('fastq'): + #set format = "-q" + #end if + + #for $file in $input: + #set sample=$file.element_identifier + bowtie -p \${GALAXY_SLOTS:-4} + $method_prefix + --al matched.fa + --un unmatched.fa + --suppress 6,7,8 + $index_path $format $file > tabular_bowtie_output.tab && + genome_aligned=\$(wc -l < matched.fa) && + genome_aligned=\$(( \$genome_aligned/2)) && + #set counter = 0 + #for $i in $AdditionalQueries: + #set $counter += 1 + #if $counter != 1: + #set to_align = "class_unmatched.fa" + #else: + #set to_align = "matched.fa" + #end if + touch tmp_class_matched.fa tmp_class_unmatched.fa && + bowtie -p \${GALAXY_SLOTS:-4} + $method_prefix + --al tmp_class_matched.fa + --un tmp_class_unmatched.fa + --suppress 6,7,8 + $i.ownFile.name $format '$to_align' > tabular_bowtie_output.tab && + class_aligned=\$(( \$(wc -l < tmp_class_matched.fa)/2)) && + class_unaligned=\$(( \$(wc -l < tmp_class_unmatched.fa)/2)) && + echo -e "$sample\t$i.ownFile.name\t\$class_aligned\t\${genome_aligned}" >> $output && + mv tmp_class_unmatched.fa class_unmatched.fa && + rm tmp_class_matched.fa && + #end for + remaining=\$(( \$(wc -l < class_unmatched.fa)/2)) && + echo -e "$sample\tNot classified\t\${remaining}\t\${genome_aligned}" >> $output && + #end for + + + Rscript $__tool_directory__/barplot.r --input $output --barplot $barplot + ]]></command> + <inputs> + <param name="input" type="data" multiple="True" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/> + <param name="mismatches" type="select" label="Number of mismatches allowed" help="specify the number of mismatches allowed during alignments"> + <option value="0">0</option> + <option value="1" selected="true">1</option> + <option value="2">2</option> + <option value="3">3</option> + </param> +<!-- First bowtie index selection --> + <conditional name="refGenomeSource1"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Bowtie Built-ins were indexed using default options"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a DNA reference index" help="if your genome of interest is not listed - contact instance administrator"> + <options from_data_table="bowtie_indexes"/> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" /> + </when> + </conditional> +<!-- End of first bowtie index selection --> +<!-- other bowtie index selections from fasta in history (mandatory) --> + <repeat name="AdditionalQueries" title="Additional Alignment Step"> + <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" /> + </repeat> +<!-- End of other bowtie index selections --> + </inputs> + <outputs> + <data format="tabular" name="output" label="Cascade Annotation Analysis"> + <actions> + <action name="column_names" type="metadata" default="Sample,Reference Index,Number of reads, Total reads" /> + </actions> + </data> + <data name="barplot" format="pdf" label="barplot from ${on_string}" /> + </outputs> + <tests> + <test> + <param name="input" value ="sample1.fa" ftype="fasta" /> + <param name="genomeSource" value="history" /> + <param name="ownFile" value ="2L-tail.fa" ftype="fasta" /> + <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" /> + <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> + <output name="output" ftype="tabular" file="sample1_output.tab" /> + <output name="barplot" ftype="pdf" file="sample1_output.pdf" compare="sim_size" delta="500"/> + </test> + <test> + <param name="input" value ="sample.fastq" ftype="fastq" /> + <param name="genomeSource" value="history" /> + <param name="ownFile" value ="2L-tail.fa" ftype="fasta" /> + <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" /> + <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> + <output name="output" ftype="tabular" file="sample_output.tab" /> + <output name="barplot" ftype="pdf" file="sample_output.pdf" compare="sim_size" delta="500"/> + </test> + <test> + <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" /> + <param name="genomeSource" value="history" /> + <param name="ownFile" value ="2L-tail.fa" ftype="fasta" /> + <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" /> + <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> + <output name="output" ftype="tabular" file="multisample5_output.tab" /> + <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" /> + </test> + </tests> + <help> + +**Introduction** + +Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient. +A generic "Map with Bowtie for Illumina" Galaxy tool is available in the main Galaxy distribution. + +Here The sRbowtie wrapper specifically works with short reads FASTA or FASTQ inputs +(-v bowtie mode, with -k 1) which has to be clipped from adapter before alignment. + +.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml + + +------ + +**What it does** + +.. class:: infomark + +This script uses the sRbowtie wrapper to iteratively match reads on a reference indexes. +Read that aligned to the first reference are realigned to the second reference. +From this point, unaligned reads are taken as input for alignment to the third reference, etc. + + +Reads are Matched on DNA references (both strands) as fast as possible, without taking care of mapping issues + +*-v [0,1,2,3] -k 1 --best -p 12 --suppress 6,7,8* + +unaligned reads at step N are used as input for sRbowtie at step N+1 + +----- + +**Input formats** + +.. class:: warningmark + +*Reads must be clipped from their adapter and provided in a FASTA or FASTQ format* + +----- + +**OUTPUTS** + +**Annotation table in a tabular format** + + </help> +</tool>
