Mercurial > repos > yhoogstrate > varscan_mpileup2snp_from_bam
annotate samtools-parallel-mpileup.xml @ 47:4ae3d1a2acf7 draft
Uploaded
| author | yhoogstrate |
|---|---|
| date | Wed, 05 Mar 2014 08:00:55 -0500 |
| parents | c800d09b8cc3 |
| children | e3eb1ca50b8d |
| rev | line source |
|---|---|
| 37 | 1 <?xml version="1.0" encoding="UTF-8"?> |
| 2 <tool id="samtools_parallel_mpileup" name="Samtools parallel mpileup"> | |
| 38 | 3 <description>Samtools mpileup (supporting parallelization)</description> |
| 37 | 4 <requirements> |
| 5 <requirement type="package" version="0.1.19">samtools-parallel-mpileup</requirement> | |
| 6 <requirement type="package" version="0.1.19">samtools</requirement> | |
| 7 </requirements> | |
| 8 <command> | |
| 9 #if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1 | |
| 10 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&2 | |
| 11 #else | |
| 12 #if $mpileup_parallelization.mpileup_parallelization_select == "true" | |
| 13 samtools-parallel-mpileup mpileup | |
| 14 -t $mpileup_parallelization.samtools_threads | |
| 15 #else | |
| 16 samtools mpileup | |
| 17 #end if | |
| 18 -f | |
| 19 #if $reference_genome_source.source_select == "indexed_filtered" | |
| 20 "$reference_genome_source.reference_genome" | |
| 21 #else if $reference_genome_source.source_select == "indexed_all" | |
| 22 "$reference_genome_source.reference_genome" | |
| 23 #else if $reference_genome_source.source_select == "history" | |
| 24 "$reference_genome_source.reference_genome" | |
| 25 #else | |
| 26 <!-- | |
| 27 This is a workaround to obtain the "genome.fa" file that | |
| 28 corresponds to the dbkey of the alignments. | |
| 29 Because this file is "calculated" during run-time, it can | |
| 30 be used in a workflow. | |
| 31 --> | |
| 32 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }" | |
| 33 #end if | |
| 34 | |
| 35 #if $extended_parameters_regions.samtools_regions == "region" | |
| 36 -r $extended_parameters_regions.$samtools_r | |
| 37 #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed" | |
| 38 -l $extended_parameters_regions.$samtools_l | |
| 39 #end if | |
| 40 | |
| 41 #if $extended_parameters.parameters == "extended" | |
| 42 $extended_parameters.samtools_6 | |
| 43 $extended_parameters.samtools_A | |
| 44 $extended_parameters.samtools_B | |
| 45 -C $extended_parameters.samtools_C | |
| 46 -d $extended_parameters.samtools_d | |
| 47 $extended_parameters.samtools_E | |
| 48 -M $extended_parameters.samtools_M | |
| 49 $extended_parameters.samtools_R | |
| 50 -q $extended_parameters.samtools_q | |
| 51 -Q $extended_parameters.samtools_Q | |
| 52 | |
| 53 -e $extended_parameters.samtools_e | |
| 54 -F $extended_parameters.samtools_F | |
| 55 -h $extended_parameters.samtools_h | |
| 56 $extended_parameters.samtools_I | |
| 57 -L $extended_parameters.samtools_L | |
| 58 -m $extended_parameters.samtools_m | |
| 59 -o $extended_parameters.samtools_o | |
| 60 $extended_parameters.samtools_p | |
| 61 -P $extended_parameters.samtools_P | |
| 62 #end if | |
| 63 | |
| 64 #for $alignment in $alignments | |
| 65 ${alignment} | |
| 66 #end for | |
| 67 | |
| 68 2> stderr_1.txt | |
| 38 | 69 #if $sort_mpileup == "true" |
| 70 | sort -k 1,1 -k 2,2 | |
| 71 #end if | |
| 37 | 72 > $output ; |
| 73 cat stderr_1.txt | |
| 74 #end if | |
| 75 </command> | |
| 76 | |
| 77 <inputs> | |
| 78 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/> | |
| 79 | |
| 80 <!-- Find out how to access the reference genome from the BAM file(s) --> | |
| 81 <conditional name="reference_genome_source"> | |
| 82 <param name="source_select" type="select" label="Fasta Source"> | |
| 83 <option value="indexed_filtered">Use a built-in index (which fits your reference)</option> | |
| 84 <option value="history">Use reference from the history</option> | |
| 85 <option value="indexed_all">Use a built-in index (entire list) - avoid this option if possible; only useful if you design a workflow</option> | |
| 86 <option value="attribute">Use a built-in index based on the 'metadata.dbkey' attribute; ideal in workflows</option> | |
| 87 </param> | |
| 88 <when value="indexed_filtered"> | |
| 89 <param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" > | |
| 38 | 90 <options from_data_table="all_fasta"> |
| 91 <column name="name" index="2"/> | |
| 37 | 92 <column name="dbkey" index="1"/> |
| 38 | 93 <column name="value" index="3"/><!-- Value is the path of the fasta file --> |
| 37 | 94 <filter type="data_meta" ref="alignments" multiple="false" key="dbkey" column="1" /> |
| 95 <validator type="no_options" message="No indexes are available for the selected input dataset" /> | |
| 96 </options> | |
| 97 </param> | |
| 98 </when> | |
| 99 <when value="history"> | |
| 100 <param name="reference_genome" format="fasta" type="data" label="Reference Genome used during alignment (fasta)" help="Reference genome (genome.fa) that corresponds to the *.bam file." /> | |
| 101 </when> | |
| 102 <when value="indexed_all"> | |
| 103 <param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" > | |
| 38 | 104 <options from_data_table="all_fasta"> |
| 105 <column name="name" index="2"/> | |
| 37 | 106 <column name="dbkey" index="1"/> |
| 38 | 107 <column name="value" index="3"/><!-- Value is the path of the fasta file --> |
| 37 | 108 <validator type="no_options" message="No indexes are available for the selected input dataset" /> |
| 109 </options> | |
| 110 </param> | |
| 111 </when> | |
| 112 <when value="attribute" /> | |
| 113 </conditional> | |
| 114 | |
| 115 <conditional name="extended_parameters_regions"> | |
| 116 <param name="samtools_regions" type="select" label="Region specific parameters" help="Let samtools target specific genomic locations."> | |
| 117 <option value="entire_genome">Entire genome</option> | |
| 118 <option value="region">Specific region</option> | |
| 119 <option value="regions_file_pos">Specific positions (file); list of positions</option> | |
| 120 <option value="regions_file_bed">Specific regions (file); list of regions in BED</option> | |
| 121 </param> | |
| 122 <when value="entire_genome"> | |
| 123 </when> | |
| 124 <when value="region"> | |
| 125 <param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" /> | |
| 126 </when> | |
| 127 <when value="regions_file_pos"> | |
| 128 <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" /> | |
| 129 </when> | |
| 130 <when value="regions_file_bed"> | |
| 38 | 131 <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" /> |
| 37 | 132 </when> |
| 133 </conditional> | |
| 134 | |
| 135 <conditional name="mpileup_parallelization"> | |
| 136 <param name="mpileup_parallelization_select" type="select" label="Use parallelization for the mpileup generation (experimental)" help="Especially if larger numbers of bam/sam files are processed, or the file infrastructure is optimized for IO-paralellization, this feature might improve performance."> | |
| 137 <option value="false" >False - uses classical samtools</option> | |
| 138 <option value="true">True - uses (experimental) samtools mpileup-parallel</option> | |
| 139 </param> | |
| 140 <when value="false" /> | |
| 141 <when value="true"> | |
| 142 <param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" /> | |
| 143 </when> | |
| 144 </conditional> | |
| 145 | |
| 45 | 146 <param name="sort_mpileup" type="select" display="checkboxes" multiple="false" checked="false" truevalue="true" falsevalue="false" label="Sort mpileup file" help="Because parallelization may disrupt the outputs order, sorting can be conveniet for e.g. testing. Notice that this function has only use in a limited number of situations but costs CPU usage. Only use it if its really neccesairy." /> |
| 38 | 147 |
| 37 | 148 <conditional name="extended_parameters"> |
| 149 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings."> | |
| 150 <option value="default">Default settings</option> | |
| 151 <option value="extended">Extended settings</option> | |
| 152 </param> | |
| 38 | 153 <when value="default" /> |
| 37 | 154 <when value="extended"> |
| 155 <param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" /> | |
| 156 <param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" /> | |
| 157 <param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" /> | |
| 38 | 158 <param type="integer" name="samtools_C" value="0" label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" /> |
| 159 <param type="integer" name="samtools_d" value="250" label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" /> | |
| 37 | 160 <param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" /> |
| 38 | 161 <param type="integer" name="samtools_M" value="60" label="cap mapping quality at INT [60]" /> |
| 37 | 162 <param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" /> |
| 38 | 163 <param type="integer" name="samtools_q" value="0" label="Samtools: skip alignments with mapQ smaller than INT [0]" /> |
| 164 <param type="integer" name="samtools_Q" value="13" label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" /> | |
| 37 | 165 |
| 38 | 166 <param type="integer" name="samtools_e" value="20" label="Samtools: Phred-scaled gap extension seq error probability [20]" /> |
| 167 <param type="float" name="samtools_F" value="0.002" label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> | |
| 168 <param type="integer" name="samtools_h" value="100" label="Samtools: coefficient for homopolymer errors [100]" /> | |
| 37 | 169 <param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" /> |
| 38 | 170 <param type="integer" name="samtools_L" value="250" label="Samtools: max per-sample depth for INDEL calling [250]" /> |
| 171 <param type="integer" name="samtools_m" value="1" label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" /> | |
| 172 <param type="integer" name="samtools_o" value="40" label="Samtools: Phred-scaled gap open sequencing error probability [40]" /> | |
| 37 | 173 <param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" /> |
| 38 | 174 <param type="text" name="samtools_P" value="all" label="Samtools: comma separated list of platforms for indels [all]" /> |
| 37 | 175 </when> |
| 176 </conditional> | |
| 177 </inputs> | |
| 178 | |
| 179 <outputs> | |
| 180 <data format="mpileup" name="output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}" /> | |
| 181 </outputs> | |
| 182 | |
| 38 | 183 <tests> |
| 184 <test><!-- Use classical samtools --> | |
|
42
bf9c5a61b200
overwrite readonly files
yhoogstrate <y.hoogstrate@erasmusmc.nl>
parents:
38
diff
changeset
|
185 <param name="alignments" value="hg19_mutant.bam.txt" dbkey="hg19" /> |
| 38 | 186 <param name="reference_genome_source.source_select" value="attribute" /> |
| 187 <param name="extended_parameters_regions.samtools_regions" value="entire_genome" /> | |
| 188 | |
| 189 <param name="mpileup_parallelization.mpileup_parallelization_select" value="false" /> | |
| 190 <param name="sort_mpileup" value="true" /> | |
| 191 | |
| 192 <param name="extended_parameters.parameters" value="default" /> | |
| 193 | |
| 194 | |
| 195 <output name="output" file="hg19_mutant.mpileup" /> | |
| 196 </test> | |
| 197 <test><!-- Use parallelized samtools --> | |
|
42
bf9c5a61b200
overwrite readonly files
yhoogstrate <y.hoogstrate@erasmusmc.nl>
parents:
38
diff
changeset
|
198 <param name="alignments" value="hg19_mutant.bam.txt" dbkey="hg19" /> |
| 38 | 199 <param name="reference_genome_source.source_select" value="attribute" /> |
| 200 <param name="extended_parameters_regions.samtools_regions" value="entire_genome" /> | |
| 201 | |
| 202 <param name="mpileup_parallelization.mpileup_parallelization_select" value="true" /> | |
| 203 <param name="mpileup_parallelization.samtools_threads" value="2" /> | |
| 204 <param name="sort_mpileup" value="true" /> | |
| 205 | |
| 206 <param name="extended_parameters.parameters" value="default" /> | |
| 207 | |
| 208 | |
| 209 <output name="output" file="hg19_mutant.mpileup" /> | |
| 210 </test> | |
| 211 </tests> | |
| 212 | |
| 37 | 213 <help> |
| 38 | 214 **Samtools mpileup (supporting parallelization)** |
| 37 | 215 |
| 38 | 216 SAM (Sequence Alignment/Map) format is a generic format for storing large nucleotide sequence alignments. SAM aims to be a format that: |
| 217 | |
| 218 Is flexible enough to store all the alignment information generated by various alignment programs; | |
| 219 Is simple enough to be easily generated by alignment programs or converted from existing alignment formats; | |
| 220 Is compact in file size; | |
| 221 Allows most of operations on the alignment to work on a stream without loading the whole alignment into memory; | |
| 222 Allows the file to be indexed by genomic position to efficiently retrieve all reads aligning to a locus. | |
| 223 SAM Tools provide various utilities for manipulating alignments in the SAM format, including sorting, merging, indexing and generating alignments in a per-position format. | |
| 37 | 224 |
| 38 | 225 SAMtools is hosted by SourceForge.net. The project page is http://samtools.sourceforge.net/. The source code releases are available from the download page. You can check out the most recent source code from the github project page with: |
| 226 git clone git://github.com/samtools/samtools.git | |
| 227 https://github.com/mydatascience/parallel-mpileup/ | |
| 37 | 228 |
| 38 | 229 Because samtools does not support parallization of the mpileup command, the project was forked to include paralellization support: |
| 230 | |
| 231 | |
| 232 However, since the project seems to lack support and contains fatal bugs this project was continued at: | |
| 233 https://github.com/yhoogstrate/parallel-mpileup/ | |
| 234 | |
| 37 | 235 |
| 236 **Input formats** | |
| 237 | |
| 38 | 238 Satmools accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing. |
| 37 | 239 |
| 240 **Installation** | |
| 241 | |
| 38 | 242 The installation is fully automatic. |
| 37 | 243 |
| 244 **License** | |
| 245 | |
| 38 | 246 * parallel-mpileup: MIT License (https://github.com/yhoogstrate/parallel-mpileup/blob/master/samtools-0.1.19/COPYING) |
| 37 | 247 * samtool: MIT License |
| 248 | |
| 249 | |
| 250 **Contact** | |
| 251 | |
| 252 The tool wrapper has been written by Youri Hoogstrate from the Erasmus Medical Center (Rotterdam, Netherlands) on behalf of the Translational Research IT (TraIT) project: | |
| 253 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch | |
| 254 | |
| 255 More tools by the Translational Research IT (TraIT) project can be found in the following repository: | |
| 256 http://toolshed.dtls.nl/ | |
| 257 </help> | |
| 38 | 258 </tool> |
