comparison bamCorrelate.xml @ 10:a68a771625d2 draft

Uploaded
author bgruening
date Tue, 29 Oct 2013 17:26:28 -0400
parents 73761f33f198
children b0d64a9930d6
comparison
equal deleted inserted replaced
9:73761f33f198 10:a68a771625d2
1 <tool id="deeptools_bamCorrelate" name="bamCorrelate" version="1.0.1"> 1 <tool id="deeptools_bamCorrelate" name="bamCorrelate" version="1.0.1">
2 <description>correlates pairs of BAM files</description> 2 <description>correlates pairs of BAM files</description>
3 <requirements> 3 <expand macro="requirements" />
4 <requirement type="package" version="1.5.1_3e13687c89e951476776b15afb4bbbc3b906f761">deepTools</requirement> 4 <macros>
5 <requirement type="package" >deepTools</requirement> 5 <import>deepTools_macros.xml</import>
6 </requirements> 6 </macros>
7 <command> 7 <command>
8 #import tempfile 8 #import tempfile
9 #set $temp_dir = os.path.abspath(tempfile.mkdtemp()) 9 #set $temp_dir = os.path.abspath(tempfile.mkdtemp())
10 10
11 #set files=[] 11 #set files=[]
12 #set labels=[] 12 #set labels=[]
13 #for $i in $inputs 13 #for $i in $inputs
14 14
15 #set $temp_input_handle = tempfile.NamedTemporaryFile( dir=$temp_dir ) 15 #set $temp_input_handle = tempfile.NamedTemporaryFile( dir=$temp_dir )
16 #set $temp_input_path = $temp_input_handle.name 16 #set $temp_input_path = $temp_input_handle.name
17 #silent $temp_input_handle.close() 17 #silent $temp_input_handle.close()
18 #silent os.system("ln -s %s %s.bam" % (str($i.bamfile), $temp_input_path)) 18 #silent os.system("ln -s %s %s.bam" % (str($i.bamfile), $temp_input_path))
19 #silent os.system("ln -s %s %s.bam.bai" % (str($i.bamfile.metadata.bam_index), $temp_input_path)) 19 #silent os.system("ln -s %s %s.bam.bai" % (str($i.bamfile.metadata.bam_index), $temp_input_path))
20 #silent $files.append('%s.bam' % $temp_input_path) 20 #silent $files.append('%s.bam' % $temp_input_path)
21 21
22 ##set $files += [str($i.bamfile)]
23 #if str($i.label.value) != "":
24 #set $labels += ["\"%s\"" % ($i.label.value)]
25 #else
26 #set $labels += ["\"%s\"" % ($i.bamfile.name)]
27 #end if
28 #end for
22 29
23 ##set $files += [str($i.bamfile)] 30 bamCorrelate
24 #if str($i.label.value) != "": 31
25 #set $labels += ["\"%s\"" % ($i.label.value)] 32 @THREADS@
26 #else 33
27 #set $labels += ["\"%s\"" % ($i.bamfile.name)] 34 --bamfiles #echo " ".join($files)
35 --labels #echo " ".join($labels)
36
37 --fragmentLength $fragmentLength
38 --corMethod $corMethod
39
40 #set newoutFileName=str($outFileName)+".png"
41 --plotFile $newoutFileName
42
43 #if $outputOpt.showOutputOpt == "yes"
44 --outRawCounts '$outFileRawCounts'
45 --outFileCorMatrix '$outFileCorMatrix'
28 #end if 46 #end if
29 #end for
30 bamCorrelate
31 47
32 ##ToDo 48 #if $mode.modeOpt == "bins":
33 --numberOfProcessors 4 49 --binSize '$mode.binSize'
50 --numberOfSamples '$modenumberOfSamples'
51 #else:
52 --BED $mode.region_file
53 #end if
34 54
35 --bamfiles #echo " ".join($files) 55 ## options available in both modes
36 --labels #echo " ".join($labels) 56 #if $mode.advancedOpt.showAdvancedOpt == "yes":
37
38 --fragmentLength $fragmentLength
39 --corMethod $corMethod
40
41 #set newoutFileName=str($outFileName)+".png"
42 --plotFile $newoutFileName
43
44 #if $outputOpt.showOutputOpt == "yes"
45 --outRawCounts '$outFileRawCounts'
46 --outFileCorMatrix '$outFileCorMatrix'
47 #end if
48
49 #if $advancedOpt.showAdvancedOpt == "yes":
50
51 #if str($advancedOpt.region.value) != '':
52 --region '$advancedOpt.region'
53 #end if
54
55 --binSize '$advancedOpt.binSize'
56 --numberOfSamples '$advancedOpt.numberOfSamples'
57
58 $advancedOpt.doNotExtendPairedEnds
59 $advancedOpt.ignoreDuplicates
60 $advancedOpt.includeZeros
61
62 #if $advancedOpt.minMappingQuality:
63 --minMappingQuality '$advancedOpt.minMappingQuality'
64 #end if
65 #end if
66 ; mv $newoutFileName $outFileName
67 ; rm $temp_dir -rf
68 </command>
69 57
70 <inputs> 58 #if str($mode.advancedOpt.region.value) != '':
71 <repeat name="inputs" title="Input files" min="2"> 59 --region '$mode.advancedOpt.region'
72 <param name="bamfile" type="data" format="bam" 60 #end if
73 label="Bam file"
74 help="The BAM file must be sorted."/>
75 <param name="label" type="text" size="30" optional="true" value=""
76 label="Label"
77 help="Label to use in the output. If not given the dataset name will be used instead."/>
78 </repeat>
79 61
80 <param name="fragmentLength" type="integer" value="300" min="1" 62 $mode.advancedOpt.doNotExtendPairedEnds
81 label="Length of the average fragment size" 63 $mode.advancedOpt.ignoreDuplicates
82 help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/> 64 $mode.advancedOpt.includeZeros
83 65
84 <param name="corMethod" type="select" label="Correlation method"> 66 #if $mode.advancedOpt.minMappingQuality:
85 <option value="pearson">Pearson</option> 67 --minMappingQuality '$mode.advancedOpt.minMappingQuality'
86 <option value="spearman">Spearman</option> 68 #end if
87 </param>
88 69
89 <conditional name="advancedOpt"> 70 #if $mode.advancedOpt.zMin:
90 <param name="showAdvancedOpt" type="select" label="Show advanced options" > 71 --zMin $mode.advancedOpt.zMin
91 <option value="no" selected="true">no</option> 72 #end if
92 <option value="yes">yes</option> 73 #if $mode.advancedOpt.zMax:
74 --zMax $mode.advancedOpt.zMax
75 #end if
76 --colorMap '$mode.advancedOpt.colorMap'
77
78 #end if
79
80 ; mv $newoutFileName $outFileName
81 ; rm $temp_dir -rf
82 </command>
83
84 <inputs>
85 <expand macro="multiple_input_bams" />
86
87 <param name="fragmentLength" type="integer" value="300" min="1"
88 label="Length of the average fragment size"
89 help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/>
90
91 <param name="corMethod" type="select" label="Correlation method">
92 <option value="pearson">Pearson</option>
93 <option value="spearman">Spearman</option>
93 </param> 94 </param>
94 <when value="no" />
95 <when value="yes">
96
97
98 <param name="region" type="text" value=""
99 label="Region of the genome to limit the operation to"
100 help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example &quot;chr10&quot; or &quot;chr10:456700:891000&quot;" />
101 95
102 <param name="binSize" type="integer" value="10000" min="1" 96 <conditional name="mode">
103 label="Bin size in bp" 97 <param name="modeOpt" type="select" label="Choose computation mode"
104 help="Length in base pairs for a window used to sample the genome."/> 98 help="In the bins mode, the correlation is computed based on equal length bins. In the BED file mode, as list of genomic regions in BED format has to be given. For each region in the BED file the number of overlapping reads is counted in each of the BAM files. Then the correlation is computed.">
99 <option value="bins" selected="true">Bins</option>
100 <option value="BED-file">Limit correlation to certain regions (BED file)</option>
101 </param>
102 <when value="bins">
103 <param name="binSize" type="integer" value="10000" min="1"
104 label="Bin size in bp"
105 help="Length in base pairs for a window used to sample the genome."/>
105 106
106 <param name="numberOfSamples" type="integer" value="100000" min="1" 107 <param name="numberOfSamples" type="integer" value="100000" min="1"
107 label="Number of samples" 108 label="Number of samples"
108 help="Number of samples taken from the genome to compute the scaling factors"/> 109 help="Number of samples taken from the genome to compute the scaling factors"/>
110 <expand macro="bamCorrelate_mode_actions" />
111 </when>
112 <when value="BED-file">
113 <param name="region_file" type="data" format="bed" label="Region file in BED format" help="Correlation is computed for the number of reads that overlap such regions."/>
114 <expand macro="bamCorrelate_mode_actions" />
115 </when>
109 116
110 <param name="doNotExtendPairedEnds" type="boolean" truevalue="--doNotExtendPairedEnds" falsevalue="" 117 </conditional>
111 label="Do not extend paired ends"
112 help="If set, reads are not extended to match the fragment length reported in the BAM file, instead they will be extended to match the fragment length. Default is to extend the reads if paired end information is available."/>
113 118
114 <param name="ignoreDuplicates" type="boolean" truevalue="--ignoreDuplicates" falsevalue="" 119 <conditional name="outputOpt">
115 label="Ignore duplicates" 120 <param name="showOutputOpt" type="select" label="Show additional output options" >
116 help="If set, reads that have the same orientation and start position will be considered only once. If reads are paired, the mate position also has to coincide to ignore a read." /> 121 <option value="no" selected="true">no</option>
117 122 <option value="yes">yes</option>
118 <param name="minMappingQuality" type="integer" optional="true" value="1" min="1" 123 </param>
119 label="Minimum mapping quality" 124 <when value="no" />
120 help= "If set, only reads that have a mapping quality score higher than the given value are considered. *Note* Bowtie's Mapping quality is related to uniqueness: the higher the score, the more unique is a read. A mapping quality defined by Bowtie of 10 or less indicates that there is at least a 1 in 10 chance that the read truly originated elsewhere."/> 125 <when value="yes">
121 126 <param name="saveRawCounts" type="boolean" label="Save the bin counts"/>
122 <param name="includeZeros" type="boolean" truevalue="--includeZeros" falsevalue="" 127 <param name="saveCorMatrix" type="boolean" label="Save the correlation matrix"/>
123 label ="Include zeros" 128 </when>
124 help ="If set, then regions with zero counts for *all* BAM files given are included. The default behavior is to ignore those cases." /> 129 </conditional>
125 130
126 </when> 131 </inputs>
127 </conditional> 132 <outputs>
128 133 <data format="png" name="outFileName" />
129 <conditional name="outputOpt"> 134 <data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts">
130 <param name="showOutputOpt" type="select" label="Show additional output options" > 135 <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)</filter>
131 <option value="no" selected="true">no</option> 136 </data>
132 <option value="yes">yes</option> 137 <data format="tabular" name="outFileCorMatrix" label="${tool.name} on ${on_string}: correlation matrix">
133 </param> 138 <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveCorMatrix'] == True)</filter>
134 <when value="no" /> 139 </data>
135 <when value="yes"> 140 </outputs>
136 <param name="saveRawCounts" type="boolean" label="Save the bin counts"/> 141 <help>
137 <param name="saveCorMatrix" type="boolean" label="Save the correlation matrix"/>
138 </when>
139 </conditional>
140
141 </inputs>
142 <outputs>
143 <data format="png" name="outFileName" />
144 <data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts">
145 <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)</filter>
146 </data>
147 <data format="tabular" name="outFileCorMatrix" label="${tool.name} on ${on_string}: correlation matrix">
148 <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveCorMatrix'] == True)</filter>
149 </data>
150 </outputs>
151 <help>
152 142
153 **What it does** 143 **What it does**
154 144
155 This tool is useful to assess the overall similarity of different BAM files. A typical application 145 This tool is useful to assess the overall similarity of different BAM files. A typical application
156 is to check the correlation between replicates or published data sets. 146 is to check the correlation between replicates or published data sets.
157 147
158 The tool splits the genomes into bins of given length. For each bin, the number of reads 148 The tool splits the genomes into bins of given length. For each bin, the number of reads
159 found in each BAM file is counted and a correlation is computed for all 149 found in each BAM file is counted and a correlation is computed for all
160 pairs of BAM files. 150 pairs of BAM files.
161 151
152
153 .. image:: $PATH_TO_IMAGES/QC_bamCorrelate_humanSamples.png
154 :alt: Heatmap of RNA Polymerase II ChIP-seq
155
156
157 **Output files**:
158
159 - diagnostic plot produced by bamCorrelate is a clustered heatmap displaying the values for each pair-wise correlation, see below for an example
160 - data matrix (optional) in case you want to plot the correlation values using a different program, e.g. R, this matrix can be used
161
162
163
164
162 ----- 165 -----
163 166
164 .. class:: infomark 167 .. class:: infomark
165 168
166 If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com 169 @REFERENCES@
167 170
168 This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. 171 </help>
169
170
171 .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
172 .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
173
174
175 </help>
176
177 </tool> 172 </tool>