Mercurial > repos > artbio > sr_bowtie_dataset_annotation
comparison sr_bowtie_dataset_annotation.xml @ 0:bfe92ceffe57 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit 60340e9e0d2795b88e23fd57e1ccb190918bf337"
| author | artbio |
|---|---|
| date | Mon, 07 Oct 2019 08:40:15 -0400 |
| parents | |
| children | 19ba2e38e8ec |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bfe92ceffe57 |
|---|---|
| 1 <tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.4.0"> | |
| 2 <description>by iterative alignments with sRbowtie</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.1.2">bowtie</requirement> | |
| 5 <requirement type="package" version="1.6.0">r-optparse</requirement> | |
| 6 <requirement type="package" version="3.1.0">r-ggplot2</requirement> | |
| 7 <requirement type="package" version="0.8.0">r-ggrepel</requirement> | |
| 8 </requirements> | |
| 9 <command detect_errors="exit_code"><![CDATA[ | |
| 10 #if $refGenomeSource1.genomeSource == "history": | |
| 11 bowtie-build -f $refGenomeSource1.ownFile genome 1>/dev/null && | |
| 12 #set index_path = 'genome' | |
| 13 #else: | |
| 14 #set index_path = $refGenomeSource1.index.fields.path | |
| 15 #end if | |
| 16 | |
| 17 #for $i in $AdditionalQueries: | |
| 18 bowtie-build -f $i.ownFile $i.ownFile.name 1>/dev/null && | |
| 19 #end for | |
| 20 | |
| 21 #set method_prefix = "-v %s -k 1 --best" % str($mismatches) | |
| 22 #if $input[0].is_of_type('fasta'): | |
| 23 #set format = "-f" | |
| 24 #elif $input[0].is_of_type('fastq'): | |
| 25 #set format = "-q" | |
| 26 #end if | |
| 27 | |
| 28 #for $file in $input: | |
| 29 #set sample=$file.element_identifier | |
| 30 bowtie -p \${GALAXY_SLOTS:-4} | |
| 31 $method_prefix | |
| 32 --al matched.fa | |
| 33 --un unmatched.fa | |
| 34 --suppress 6,7,8 | |
| 35 $index_path $format $file > tabular_bowtie_output.tab && | |
| 36 genome_aligned=\$(wc -l < matched.fa) && | |
| 37 genome_aligned=\$(( \$genome_aligned/2)) && | |
| 38 #set counter = 0 | |
| 39 #for $i in $AdditionalQueries: | |
| 40 #set $counter += 1 | |
| 41 #if $counter != 1: | |
| 42 #set to_align = "class_unmatched.fa" | |
| 43 #else: | |
| 44 #set to_align = "matched.fa" | |
| 45 #end if | |
| 46 touch tmp_class_matched.fa tmp_class_unmatched.fa && | |
| 47 bowtie -p \${GALAXY_SLOTS:-4} | |
| 48 $method_prefix | |
| 49 --al tmp_class_matched.fa | |
| 50 --un tmp_class_unmatched.fa | |
| 51 --suppress 6,7,8 | |
| 52 $i.ownFile.name $format '$to_align' > tabular_bowtie_output.tab && | |
| 53 class_aligned=\$(( \$(wc -l < tmp_class_matched.fa)/2)) && | |
| 54 class_unaligned=\$(( \$(wc -l < tmp_class_unmatched.fa)/2)) && | |
| 55 echo -e "$sample\t$i.ownFile.name\t\$class_aligned\t\${genome_aligned}" >> $output && | |
| 56 mv tmp_class_unmatched.fa class_unmatched.fa && | |
| 57 rm tmp_class_matched.fa && | |
| 58 #end for | |
| 59 remaining=\$(( \$(wc -l < class_unmatched.fa)/2)) && | |
| 60 echo -e "$sample\tNot classified\t\${remaining}\t\${genome_aligned}" >> $output && | |
| 61 #end for | |
| 62 | |
| 63 | |
| 64 Rscript $__tool_directory__/barplot.r --input $output --barplot $barplot | |
| 65 ]]></command> | |
| 66 <inputs> | |
| 67 <param name="input" type="data" multiple="True" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/> | |
| 68 <param name="mismatches" type="select" label="Number of mismatches allowed" help="specify the number of mismatches allowed during alignments"> | |
| 69 <option value="0">0</option> | |
| 70 <option value="1" selected="true">1</option> | |
| 71 <option value="2">2</option> | |
| 72 <option value="3">3</option> | |
| 73 </param> | |
| 74 <!-- First bowtie index selection --> | |
| 75 <conditional name="refGenomeSource1"> | |
| 76 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Bowtie Built-ins were indexed using default options"> | |
| 77 <option value="indexed">Use a built-in index</option> | |
| 78 <option value="history">Use one from the history</option> | |
| 79 </param> | |
| 80 <when value="indexed"> | |
| 81 <param name="index" type="select" label="Select a DNA reference index" help="if your genome of interest is not listed - contact instance administrator"> | |
| 82 <options from_data_table="bowtie_indexes"/> | |
| 83 </param> | |
| 84 </when> | |
| 85 <when value="history"> | |
| 86 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" /> | |
| 87 </when> | |
| 88 </conditional> | |
| 89 <!-- End of first bowtie index selection --> | |
| 90 <!-- other bowtie index selections from fasta in history (mandatory) --> | |
| 91 <repeat name="AdditionalQueries" title="Additional Alignment Step"> | |
| 92 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" /> | |
| 93 </repeat> | |
| 94 <!-- End of other bowtie index selections --> | |
| 95 </inputs> | |
| 96 <outputs> | |
| 97 <data format="tabular" name="output" label="Cascade Annotation Analysis"> | |
| 98 <actions> | |
| 99 <action name="column_names" type="metadata" default="Sample,Reference Index,Number of reads, Total reads" /> | |
| 100 </actions> | |
| 101 </data> | |
| 102 <data name="barplot" format="pdf" label="barplot from ${on_string}" /> | |
| 103 </outputs> | |
| 104 <tests> | |
| 105 <test> | |
| 106 <param name="input" value ="sample1.fa" ftype="fasta" /> | |
| 107 <param name="genomeSource" value="history" /> | |
| 108 <param name="ownFile" value ="2L-tail.fa" ftype="fasta" /> | |
| 109 <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" /> | |
| 110 <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> | |
| 111 <output name="output" ftype="tabular" file="sample1_output.tab" /> | |
| 112 <output name="barplot" ftype="pdf" file="sample1_output.pdf" compare="sim_size" delta="500"/> | |
| 113 </test> | |
| 114 <test> | |
| 115 <param name="input" value ="sample.fastq" ftype="fastq" /> | |
| 116 <param name="genomeSource" value="history" /> | |
| 117 <param name="ownFile" value ="2L-tail.fa" ftype="fasta" /> | |
| 118 <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" /> | |
| 119 <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> | |
| 120 <output name="output" ftype="tabular" file="sample_output.tab" /> | |
| 121 <output name="barplot" ftype="pdf" file="sample_output.pdf" compare="sim_size" delta="500"/> | |
| 122 </test> | |
| 123 <test> | |
| 124 <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" /> | |
| 125 <param name="genomeSource" value="history" /> | |
| 126 <param name="ownFile" value ="2L-tail.fa" ftype="fasta" /> | |
| 127 <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" /> | |
| 128 <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" /> | |
| 129 <output name="output" ftype="tabular" file="multisample5_output.tab" /> | |
| 130 <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" /> | |
| 131 </test> | |
| 132 </tests> | |
| 133 <help> | |
| 134 | |
| 135 **Introduction** | |
| 136 | |
| 137 Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient. | |
| 138 A generic "Map with Bowtie for Illumina" Galaxy tool is available in the main Galaxy distribution. | |
| 139 | |
| 140 Here The sRbowtie wrapper specifically works with short reads FASTA or FASTQ inputs | |
| 141 (-v bowtie mode, with -k 1) which has to be clipped from adapter before alignment. | |
| 142 | |
| 143 .. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml | |
| 144 | |
| 145 | |
| 146 ------ | |
| 147 | |
| 148 **What it does** | |
| 149 | |
| 150 .. class:: infomark | |
| 151 | |
| 152 This script uses the sRbowtie wrapper to iteratively match reads on a reference indexes. | |
| 153 Read that aligned to the first reference are realigned to the second reference. | |
| 154 From this point, unaligned reads are taken as input for alignment to the third reference, etc. | |
| 155 | |
| 156 | |
| 157 Reads are Matched on DNA references (both strands) as fast as possible, without taking care of mapping issues | |
| 158 | |
| 159 *-v [0,1,2,3] -k 1 --best -p 12 --suppress 6,7,8* | |
| 160 | |
| 161 unaligned reads at step N are used as input for sRbowtie at step N+1 | |
| 162 | |
| 163 ----- | |
| 164 | |
| 165 **Input formats** | |
| 166 | |
| 167 .. class:: warningmark | |
| 168 | |
| 169 *Reads must be clipped from their adapter and provided in a FASTA or FASTQ format* | |
| 170 | |
| 171 ----- | |
| 172 | |
| 173 **OUTPUTS** | |
| 174 | |
| 175 **Annotation table in a tabular format** | |
| 176 | |
| 177 </help> | |
| 178 </tool> |
