Mercurial > repos > artbio > small_rna_maps
diff small_rna_maps.xml @ 1:615fa2171a34 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit c3d728b98db4987821feae40952d9797c97eaf5a"
| author | artbio |
|---|---|
| date | Fri, 04 Oct 2019 04:33:08 -0400 |
| parents | 0a06985c0894 |
| children | 59d93aa7cc20 |
line wrap: on
line diff
--- a/small_rna_maps.xml Tue Aug 22 12:06:58 2017 -0400 +++ b/small_rna_maps.xml Fri Oct 04 04:33:08 2019 -0400 @@ -1,99 +1,368 @@ -<tool id="small_rna_maps" name="small_rna_maps" version="1.0.1"> +<tool id="small_rna_maps" name="small_rna_maps" version="2.15.0"> <description></description> <requirements> <requirement type="package" version="1.11.2=py27_0">numpy</requirement> - <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement> - <requirement type="package" version="1.3.2=r3.3.1_0">r-optparse</requirement> - <requirement type="package" version="0.6_28=r3.3.1_0">r-latticeextra</requirement> - <requirement type="package" version="2.2.1=r3.3.1_0">r-gridextra</requirement> + <requirement type="package" version="0.15.3=py27hda2845c_1">pysam</requirement> + <requirement type="package" version="1.6.4=r36h6115d3f_0">r-optparse</requirement> + <requirement type="package" version="0.6_28=r36h6115d3f_1002">r-latticeextra</requirement> + <requirement type="package" version="2.3=r36h6115d3f_1002">r-gridextra</requirement> + <requirement type="package" version="1.4.3=r36h29659fb_0">r-reshape2</requirement> + <requirement type="package" version="0.6.6">sambamba</requirement> + <requirement type="package" version="1.9=h10a08f8_12">samtools</requirement> + <requirement type="package" version="64.2=he1b5a44_1">icu</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal" description="Tool exception" /> </stdio> <command detect_errors="exit_code"><![CDATA[ - #for $file in $inputs - samtools index '$file' && - #end for - python '$__tool_directory__'/small_rna_maps.py - --inputs - #for $file in $inputs - '$file' - #end for - --sample_names - #for $sample in $inputs - '$sample.name' - #end for - --plot_methods Counts '$extra_plot' - --outputs '$output_tab' '$extra_output_tab' && - Rscript '$__tool_directory__'/small_rna_maps.r - --first_dataframe '$output_tab' - --extra_dataframe '$extra_output_tab' - --extra_plot_method '$extra_plot' - --output_pdf '$output_pdf' + #import json + #import os + #for $file in $inputs + sambamba view -t \${GALAXY_SLOTS} -F "not unmapped and sequence_length >= ${minsize} and sequence_length <= ${maxsize}" -f bam '$file' -o '$file.element_identifier' && + samtools index '$file.element_identifier' && + #end for + python '$__tool_directory__'/small_rna_maps.py + --inputs ${ ' '.join(['"%s"' % x.element_identifier for x in $inputs]) } + #set $labels = list() + #for $file in $inputs: + $labels.append(str($file.element_identifier)) + #end for + --sample_names ${ ' '.join(['"%s"' % x for x in $labels]) } + --minsize $minsize + --maxsize $maxsize + #if str($plots_options.plots_options_selector ) == "two_plot": + --plot_methods '${plots_options.first_plot}' '${plots_options.extra_plot}' + --outputs '$output_tab' '$extra_output_tab' && + #elif str($plots_options.plots_options_selector ) == "global": + --plot_methods 'Size' + --outputs '$output_tab' && + #elif str($plots_options.plots_options_selector ) == "cluster": + --plot_methods 'Counts' + --outputs '$output_tab' + --cluster ${plots_options.cluster} + --bed '$output_bed' + --bed_skipsize ${plots_options.skip_size} + --bed_skipcounts ${plots_options.skip_counts} + --bed_skipdensity ${plots_options.skip_density} + ${plots_options.strandness} && + #else: + --plot_methods '${plots_options.first_plot}' + --outputs '$output_tab' && + #end if + + + Rscript '$__tool_directory__'/small_rna_maps.r + --first_dataframe '$output_tab' + --extra_dataframe '$extra_output_tab' + #if len(str($normalization)) != 1: + --normalization "${ ' '.join( [factor for factor in $normalization.split()]) }" + #else: + --normalization "${ ' '.join( ["1" for factor in $inputs] )}" + #end if + #if $ylimits_cond.ylimits == "no": + --ymin '' --ymax '' + #else: + --ymin '${ylimits_cond.ymin}' --ymax '${ylimits_cond.ymax}' + #end if + #if str($plots_options.plots_options_selector ) == "two_plot": + --first_plot_method '${plots_options.first_plot}' + --extra_plot_method '${plots_options.extra_plot}' + #elif str($plots_options.plots_options_selector ) == "global": + --first_plot_method 'Size' + --extra_plot_method '' + --global '${plots_options.mergestrands}' + #else: + --first_plot_method '${plots_options.first_plot}' + --extra_plot_method '' + #end if + --output_pdf '$output_pdf' ]]></command> <inputs> - <param name="inputs" type="data" format="bam" label="Select multiple alignments to parse" multiple="True"/> - <param name="extra_plot" type="select" label="select the type of extra plot in addition to read map"> - <option value="Coverage">Coverage</option> - <option value="Mean">Mean Sizes</option> - <option value="Median">Median Sizes</option> - <option value="Size">Size Distributions</option> - </param> + <param name="inputs" type="data" format="bam" label="Select a alignment files to parse" multiple="true" + help="maps from these bam inputs will be collected in a single pdf output" /> + <param name="normalization" type="text" label="Enter a size/normalization factor." + help="Enter normalisation factors separated by space eg [0.75 1.23 1.1], no normalization if no values, + ignored if a single sample" + value="1"/> + <param name="minsize" type="integer" label="Minimal size of reads for inclusion in analysis" + value="19" help="default value: 19" /> + <param name="maxsize" type="integer" label="Maximal size of reads for inclusion in analysis" + value="29" help="default value: 29" /> + <conditional name="plots_options"> + <param name="plots_options_selector" type="select" display="radio" label="Plot Options"> + <option value="one_plot">Just one plot per chromosome</option> + <option value="two_plot" selected="True">Two plots per chromosome</option> + <option value="global">Global read size distributions of aligned reads</option> + <option value="cluster">Map read clusters</option> + </param> + <when value="two_plot"> + <param name="first_plot" type="select" display="radio" label="Select the type of the top plot"> + <option value="Counts">Counts</option> + <option value="Coverage">Coverage</option> + <option value="Mean">Mean Sizes</option> + <option value="Median">Median Sizes</option> + <option value="Size">Size Distributions</option> + </param> + <param name="extra_plot" type="select" display="radio" label="Select the type of the bottom plot"> + <option value="Counts">Counts</option> + <option value="Coverage">Coverage</option> + <option value="Mean">Mean Sizes</option> + <option value="Median">Median Sizes</option> + <option value="Size">Size Distributions</option> + </param> + </when> + <when value="one_plot"> + <param name="first_plot" type="select" display="radio" label="select the type of plot"> + <option value="Counts">Counts</option> + <option value="Coverage">Coverage</option> + <option value="Mean">Mean Sizes</option> + <option value="Median">Median Sizes</option> + <option value="Size">Size Distributions</option> + </param> + </when> + <when value="global"> + <param name="first_plot" type="hidden" value="Size"/> + <param name="mergestrands" type="select" display="radio" label="Whether forward and reverse aligned reads should be merged or not in the histogram"> + <option value="nomerge">Do not merge</option> + <option value="merge">Merge forward and reverse reads</option> + </param> + </when> + <when value="cluster"> + <param name="first_plot" type="hidden" value="Counts"/> + <param name="cluster" type="integer" label="Clustering distance in nucleotides" value="1" + help="Sets the distance (in nt) below which reads are clustered to a single median position" /> + <param name="strandness" argument="--nostrand" type="boolean" truevalue="--nostrand" falsevalue="" checked="false" + label="Ignore polarity of reads ?" help="Set if you wish to cluster reads regardless of whether they are forward or reverse"/> + <param name="skip_size" type="integer" label="do not report clusters whose size is less than the specified value" value="1" + help="Cluster size threshod (in nucleotides) for reporting. Set to 1 (default) reports all clusters, including singlets" /> + <param name="skip_counts" type="integer" label="do not report cluster with a number of reads lower than the specified value" value="1" + help="Number-of-reads threshod (in nucleotides) for cluster reporting. Set to 1 (default) reports all clusters, irrespective of their counts" /> + <param name="skip_density" type="float" label="do not report cluster with density equal or less than the specified value" value="0" + help="Density threshod (in reads per nucleotides) for reporting. Set to 0 (default) reports all cluster densities" /> + </when> + </conditional> + <conditional name="ylimits_cond"> + <param name="ylimits" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Do you wish to set an y axis limit to the plots?" + help="This limit won't be applied to size distribution plots"/> + <when value="yes"> + <param name="ymin" type="float" label="Enter minimum value" value="0.0" help="e.g. '-5.0'"/> + <param name="ymax" type="float" label="Enter maximum value" value="0.0" help="e.g. '5.0'"/> + </when> + <when value="no"> + </when> + </conditional> </inputs> <outputs> - <data format="tabular" name="output_tab" label="Read Count dataframe" /> - <data format="tabular" name="extra_output_tab" label="$extra_plot dataframe" /> - <data format="pdf" name="output_pdf" label="PDF file" /> - + <data format="tabular" name="output_tab" label="$plots_options.first_plot dataframe" /> + <data format="bed" name="output_bed" label="bed file for clusters" > + <filter>plots_options['plots_options_selector'] == 'cluster'</filter> + </data> + <data format="tabular" name="extra_output_tab" label="$plots_options.extra_plot dataframe"> + <filter>plots_options['plots_options_selector'] == 'two_plot'</filter> + </data> + <data format="pdf" name="output_pdf" label="small RNA maps" /> </outputs> <tests> - <test> - <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/> - <param name="extra_plot" value="Mean" /> - <output file="readmap.tab" name="output_tab" /> - <output file="mean.tab" name="extra_output_tab" /> - <output file="mean.pdf" name="output_pdf" /> + <test> <!-- 0 --> + <param name="inputs" value="input1.bam,input_new2.bam" ftype="bam" /> + <param name="normalization" value="1 2" /> + <param name="plots_options_selector" value="one_plot" /> + <param name="first_plot" value="Counts" /> + <output file="input1_input2new_norm_1_2_counts.tab" name="output_tab" /> + <output file="input1_input2new_norm_1_2_single_plot_counts.pdf" name="output_pdf" /> + </test> + <test> <!-- 1 --> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + <param name="ylimits" value="yes" /> + <param name="ymin" value="-5" /> + <param name="ymax" value="5" /> + <param name="plots_options_selector" value="one_plot" /> + <param name="first_plot" value="Counts" /> + <output file="input1_counts_yminneg5_5.tab" name="output_tab" /> + <output file="input1_yminneg5_5_single_plot_counts.pdf" name="output_pdf" /> + </test> + <test> <!-- 2 --> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + <param name="plots_options_selector" value="cluster" /> + <param name="first_plot" value="Counts" /> + <param name="cluster" value="5" /> + <param name="skip_size" value="1" /> + <param name="strandness" value="false" /> + <output file="clustering.tab" name="output_tab" /> + <output file="clustering.pdf" name="output_pdf" /> + <output file="bed1.bed" name="output_bed" /> </test> - <test> - <param name="inputs" value="input1.bam,input1.bam" ftype="bam"/> - <param name="extra_plot" value="Mean" /> - <output file="doubled_readmap.tab" name="output_tab" /> - <output file="double_mean.tab" name="extra_output_tab" /> - <output file="doubled_mean.pdf" name="output_pdf" /> + <test> <!-- 3 --> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + <param name="plots_options_selector" value="cluster" /> + <param name="first_plot" value="Counts" /> + <param name="cluster" value="5" /> + <param name="skip_size" value="1" /> + <param name="strandness" value="true" /> + <output file="clustering_unstranded.tab" name="output_tab" /> + <output file="clustering_unstranded.pdf" name="output_pdf" /> + <output file="bed2.bed" name="output_bed" /> + </test> + <test> <!-- 4 --> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + <param name="plots_options_selector" value="cluster" /> + <param name="first_plot" value="Counts" /> + <param name="cluster" value="5" /> + <param name="skip_size" value="2" /> + <param name="strandness" value="false" /> + <output file="clustering.tab" name="output_tab" /> + <output file="clustering.pdf" name="output_pdf" /> + <output file="bed3.bed" name="output_bed" /> + </test> + <test> <!-- 5 --> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + <param name="plots_options_selector" value="cluster" /> + <param name="first_plot" value="Counts" /> + <param name="cluster" value="5" /> + <param name="skip_size" value="2" /> + <param name="skip_counts" value="3" /> + <param name="skip_density" value="1.0" /> + <param name="strandness" value="false" /> + <output file="clustering.tab" name="output_tab" /> + <output file="clustering.pdf" name="output_pdf" /> + <output file="bed4.bed" name="output_bed" /> </test> - <test> - <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/> - <param name="extra_plot" value="Median" /> - <output file="readmap.tab" name="output_tab" /> - <output file="median.tab" name="extra_output_tab" /> - <output file="median.pdf" name="output_pdf" /> + <test> <!-- 6 --> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + <param name="plots_options_selector" value="cluster" /> + <param name="first_plot" value="Counts" /> + <param name="cluster" value="5" /> + <param name="skip_size" value="2" /> + <param name="skip_counts" value="2" /> + <param name="skip_density" value="0.4" /> + <param name="strandness" value="true" /> + <output file="clustering_unstranded.tab" name="output_tab" /> + <output file="clustering_unstranded.pdf" name="output_pdf" /> + <output file="bed5.bed" name="output_bed" /> + </test> + <test> <!-- 7 --> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + <param name="plots_options_selector" value="one_plot" /> + <param name="first_plot" value="Size" /> + <output file="input1_min20_max30_size.tab" name="output_tab" /> + <output file="input1_min20_max30_single_plot_size.pdf" name="output_pdf" /> + </test> + <test> <!-- 8 --> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + <param name="plots_options_selector" value="one_plot" /> + <param name="first_plot" value="Mean" /> + <output file="input1_mean.tab" name="output_tab" /> + <output file="input1__single_plot_mean.pdf" name="output_pdf" /> + </test> + <test> <!-- 9 --> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + <param name="plots_options_selector" value="one_plot" /> + <param name="first_plot" value="Median" /> + <output file="input1_median.tab" name="output_tab" /> + <output file="input1_single_plot_median.pdf" name="output_pdf" /> </test> - <test> - <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/> - <param name="extra_plot" value="Coverage" /> - <output file="readmap.tab" name="output_tab" /> - <output file="coverage.tab" name="extra_output_tab" /> - <output file="coverage.pdf" name="output_pdf" /> + <test> <!-- 10 --> + <param name="inputs" value="input1.bam,input2.bam" ftype="bam" /> + <param name="normalization" value="1.0 2.0" /> + <param name="plots_options_selector" value="one_plot" /> + <param name="first_plot" value="Counts" /> + <output file="input1_input2_norm_1_2_counts.tab" name="output_tab" /> + <output file="input1_input2_norm_1_2_single_plot_counts.pdf" name="output_pdf" /> + </test> + <test> <!-- 11 --> + <param name="inputs" value="input1.bam,input2.bam" ftype="bam" /> + <param name="normalization" value="1.0 1.0" /> + <param name="ylimits" value="yes" /> + <param name="ymin" value="-5" /> + <param name="ymax" value="5" /> + <param name="plots_options_selector" value="two_plot" /> + <param name="first_plot" value="Counts" /> + <param name="extra_plot" value="Size" /> + <output file="input1_input2_counts.tab" name="output_tab" /> + <output file="input1_input2_size.tab" name="extra_output_tab" /> + <output file="input1_input2_double_plot_counts_size_ylimneg5_5.pdf" name="output_pdf" /> </test> - <test> - <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/> - <param name="extra_plot" value="Size" /> - <output file="readmap.tab" name="output_tab" /> - <output file="size.tab" name="extra_output_tab" /> - <output file="sizes.pdf" name="output_pdf" /> + <test> <!-- 12 --> + <param name="inputs" value="input_single_chr.bam,input_single_chr.bam,input_single_chr.bam,input_single_chr.bam,input_single_chr.bam,input_single_chr.bam" ftype="bam" /> + <param name="normalization" value="1.0 1.0 1.0 1.0 1.0 1.0" /> + <param name="plots_options_selector" value="one_plot" /> + <param name="first_plot" value="Coverage" /> + <output file="input_single_chr_x_6_single_plot_coverage.tab" name="output_tab" /> + <output file="input_single_chr_x_6_single_plot_coverage.pdf" name="output_pdf" /> + </test> + <test> <!-- 13 --> + <param name="inputs" value="input1.bam,input2.bam" ftype="bam" /> + <param name="normalization" value="1.0 1.0" /> + <param name="plots_options_selector" value="global" /> + <param name="mergestrands" value="nomerge" /> + <param name="first_plot" value="Size" /> + <output file="size.tab" name="output_tab" /> + <output file="global_nomerge.pdf" name="output_pdf" /> + </test> + <test> <!-- 14 --> + <param name="inputs" value="input1.bam,input2.bam" ftype="bam" /> + <param name="normalization" value="1.0 1.0" /> + <param name="plots_options_selector" value="global" /> + <param name="mergestrands" value="merge" /> + <param name="first_plot" value="Size" /> + <output file="size.tab" name="output_tab" /> + <output file="global_merge.pdf" name="output_pdf" /> </test> </tests> - - <help> **What it does** -Generate read count maps from alignment BAM files, using pysam and lattice. +Plots mapping statistics of read alignments along reference chromosomes or genes or arbitrary regions : + + - counts + - mean sizes + - median sizes + - coverage depth + - size distribution + +Read counts, mean sizes and median sizes are computed by counting the number of 5' end of reads +in each position of a chromosome reference. +Coverage depths are computed from the input bam alignment files using the python pysam module. + +The metrics mentioned above can be plotted either separately: + +.. image:: one_plot.png + +Or in all possible pairwise combinations: + +.. image:: two_plot.png -In addition to the read counts (lower graphs), median size, mean size and coverage depth of reads(lower graphs) mapping at a given position are plotted. +For comparison purposes, values from bam alignment files can be normalized by a size factor +before plotting (Normalisation field) + +*Cluster mode* + +Cluster of read alignments are aggregated along regions of *variable* lengths. The Clustering +algorithm works as follows: + +A read is clustered with the following read on the genomic reference if the two reads are +separated by at maximum the clustering distance (set in nucleotides). If clustered, the step is +repeated with the following read until clustering fails. A new cluster is then searched. + +For clustering procedure, one has the possibility to consider the polarity of reads (only forward +reads or reverse reads can be clustered separately), or to ignore this polarity. + +Cluster reads are plotted as for single reads, their coordinate being the median of extrem coordinates of the cluster. + +In addition, cluster are reported in a bed file, where clusters can be filtered out upon various parameters, +cluster size, cluster read number or cluster read density (number of reads divided by the length of the cluster). **Inputs** @@ -101,13 +370,18 @@ - single-read - sorted - - mapping to the same reference + - mapped to the same reference + +.. class:: warningmark + +This tools follows a "map-reduce" procedure: multiple inputs, that can be arranged as a data collection, +are visualised side by side in a single pdf file. + + **Output** -A pdf file generated by the R package lattice - -One or two dataframes used to plot data +A pdf file generated by the R package lattice and one or two dataframes used to plot the data. </help> @@ -124,4 +398,3 @@ }</citation> </citations> </tool> -
