Mercurial > repos > bgruening > deeptools
comparison bamCorrelate.xml @ 10:a68a771625d2 draft
Uploaded
author | bgruening |
---|---|
date | Tue, 29 Oct 2013 17:26:28 -0400 |
parents | 73761f33f198 |
children | b0d64a9930d6 |
comparison
equal
deleted
inserted
replaced
9:73761f33f198 | 10:a68a771625d2 |
---|---|
1 <tool id="deeptools_bamCorrelate" name="bamCorrelate" version="1.0.1"> | 1 <tool id="deeptools_bamCorrelate" name="bamCorrelate" version="1.0.1"> |
2 <description>correlates pairs of BAM files</description> | 2 <description>correlates pairs of BAM files</description> |
3 <requirements> | 3 <expand macro="requirements" /> |
4 <requirement type="package" version="1.5.1_3e13687c89e951476776b15afb4bbbc3b906f761">deepTools</requirement> | 4 <macros> |
5 <requirement type="package" >deepTools</requirement> | 5 <import>deepTools_macros.xml</import> |
6 </requirements> | 6 </macros> |
7 <command> | 7 <command> |
8 #import tempfile | 8 #import tempfile |
9 #set $temp_dir = os.path.abspath(tempfile.mkdtemp()) | 9 #set $temp_dir = os.path.abspath(tempfile.mkdtemp()) |
10 | 10 |
11 #set files=[] | 11 #set files=[] |
12 #set labels=[] | 12 #set labels=[] |
13 #for $i in $inputs | 13 #for $i in $inputs |
14 | 14 |
15 #set $temp_input_handle = tempfile.NamedTemporaryFile( dir=$temp_dir ) | 15 #set $temp_input_handle = tempfile.NamedTemporaryFile( dir=$temp_dir ) |
16 #set $temp_input_path = $temp_input_handle.name | 16 #set $temp_input_path = $temp_input_handle.name |
17 #silent $temp_input_handle.close() | 17 #silent $temp_input_handle.close() |
18 #silent os.system("ln -s %s %s.bam" % (str($i.bamfile), $temp_input_path)) | 18 #silent os.system("ln -s %s %s.bam" % (str($i.bamfile), $temp_input_path)) |
19 #silent os.system("ln -s %s %s.bam.bai" % (str($i.bamfile.metadata.bam_index), $temp_input_path)) | 19 #silent os.system("ln -s %s %s.bam.bai" % (str($i.bamfile.metadata.bam_index), $temp_input_path)) |
20 #silent $files.append('%s.bam' % $temp_input_path) | 20 #silent $files.append('%s.bam' % $temp_input_path) |
21 | 21 |
22 ##set $files += [str($i.bamfile)] | |
23 #if str($i.label.value) != "": | |
24 #set $labels += ["\"%s\"" % ($i.label.value)] | |
25 #else | |
26 #set $labels += ["\"%s\"" % ($i.bamfile.name)] | |
27 #end if | |
28 #end for | |
22 | 29 |
23 ##set $files += [str($i.bamfile)] | 30 bamCorrelate |
24 #if str($i.label.value) != "": | 31 |
25 #set $labels += ["\"%s\"" % ($i.label.value)] | 32 @THREADS@ |
26 #else | 33 |
27 #set $labels += ["\"%s\"" % ($i.bamfile.name)] | 34 --bamfiles #echo " ".join($files) |
35 --labels #echo " ".join($labels) | |
36 | |
37 --fragmentLength $fragmentLength | |
38 --corMethod $corMethod | |
39 | |
40 #set newoutFileName=str($outFileName)+".png" | |
41 --plotFile $newoutFileName | |
42 | |
43 #if $outputOpt.showOutputOpt == "yes" | |
44 --outRawCounts '$outFileRawCounts' | |
45 --outFileCorMatrix '$outFileCorMatrix' | |
28 #end if | 46 #end if |
29 #end for | |
30 bamCorrelate | |
31 | 47 |
32 ##ToDo | 48 #if $mode.modeOpt == "bins": |
33 --numberOfProcessors 4 | 49 --binSize '$mode.binSize' |
50 --numberOfSamples '$modenumberOfSamples' | |
51 #else: | |
52 --BED $mode.region_file | |
53 #end if | |
34 | 54 |
35 --bamfiles #echo " ".join($files) | 55 ## options available in both modes |
36 --labels #echo " ".join($labels) | 56 #if $mode.advancedOpt.showAdvancedOpt == "yes": |
37 | |
38 --fragmentLength $fragmentLength | |
39 --corMethod $corMethod | |
40 | |
41 #set newoutFileName=str($outFileName)+".png" | |
42 --plotFile $newoutFileName | |
43 | |
44 #if $outputOpt.showOutputOpt == "yes" | |
45 --outRawCounts '$outFileRawCounts' | |
46 --outFileCorMatrix '$outFileCorMatrix' | |
47 #end if | |
48 | |
49 #if $advancedOpt.showAdvancedOpt == "yes": | |
50 | |
51 #if str($advancedOpt.region.value) != '': | |
52 --region '$advancedOpt.region' | |
53 #end if | |
54 | |
55 --binSize '$advancedOpt.binSize' | |
56 --numberOfSamples '$advancedOpt.numberOfSamples' | |
57 | |
58 $advancedOpt.doNotExtendPairedEnds | |
59 $advancedOpt.ignoreDuplicates | |
60 $advancedOpt.includeZeros | |
61 | |
62 #if $advancedOpt.minMappingQuality: | |
63 --minMappingQuality '$advancedOpt.minMappingQuality' | |
64 #end if | |
65 #end if | |
66 ; mv $newoutFileName $outFileName | |
67 ; rm $temp_dir -rf | |
68 </command> | |
69 | 57 |
70 <inputs> | 58 #if str($mode.advancedOpt.region.value) != '': |
71 <repeat name="inputs" title="Input files" min="2"> | 59 --region '$mode.advancedOpt.region' |
72 <param name="bamfile" type="data" format="bam" | 60 #end if |
73 label="Bam file" | |
74 help="The BAM file must be sorted."/> | |
75 <param name="label" type="text" size="30" optional="true" value="" | |
76 label="Label" | |
77 help="Label to use in the output. If not given the dataset name will be used instead."/> | |
78 </repeat> | |
79 | 61 |
80 <param name="fragmentLength" type="integer" value="300" min="1" | 62 $mode.advancedOpt.doNotExtendPairedEnds |
81 label="Length of the average fragment size" | 63 $mode.advancedOpt.ignoreDuplicates |
82 help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/> | 64 $mode.advancedOpt.includeZeros |
83 | 65 |
84 <param name="corMethod" type="select" label="Correlation method"> | 66 #if $mode.advancedOpt.minMappingQuality: |
85 <option value="pearson">Pearson</option> | 67 --minMappingQuality '$mode.advancedOpt.minMappingQuality' |
86 <option value="spearman">Spearman</option> | 68 #end if |
87 </param> | |
88 | 69 |
89 <conditional name="advancedOpt"> | 70 #if $mode.advancedOpt.zMin: |
90 <param name="showAdvancedOpt" type="select" label="Show advanced options" > | 71 --zMin $mode.advancedOpt.zMin |
91 <option value="no" selected="true">no</option> | 72 #end if |
92 <option value="yes">yes</option> | 73 #if $mode.advancedOpt.zMax: |
74 --zMax $mode.advancedOpt.zMax | |
75 #end if | |
76 --colorMap '$mode.advancedOpt.colorMap' | |
77 | |
78 #end if | |
79 | |
80 ; mv $newoutFileName $outFileName | |
81 ; rm $temp_dir -rf | |
82 </command> | |
83 | |
84 <inputs> | |
85 <expand macro="multiple_input_bams" /> | |
86 | |
87 <param name="fragmentLength" type="integer" value="300" min="1" | |
88 label="Length of the average fragment size" | |
89 help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/> | |
90 | |
91 <param name="corMethod" type="select" label="Correlation method"> | |
92 <option value="pearson">Pearson</option> | |
93 <option value="spearman">Spearman</option> | |
93 </param> | 94 </param> |
94 <when value="no" /> | |
95 <when value="yes"> | |
96 | |
97 | |
98 <param name="region" type="text" value="" | |
99 label="Region of the genome to limit the operation to" | |
100 help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> | |
101 | 95 |
102 <param name="binSize" type="integer" value="10000" min="1" | 96 <conditional name="mode"> |
103 label="Bin size in bp" | 97 <param name="modeOpt" type="select" label="Choose computation mode" |
104 help="Length in base pairs for a window used to sample the genome."/> | 98 help="In the bins mode, the correlation is computed based on equal length bins. In the BED file mode, as list of genomic regions in BED format has to be given. For each region in the BED file the number of overlapping reads is counted in each of the BAM files. Then the correlation is computed."> |
99 <option value="bins" selected="true">Bins</option> | |
100 <option value="BED-file">Limit correlation to certain regions (BED file)</option> | |
101 </param> | |
102 <when value="bins"> | |
103 <param name="binSize" type="integer" value="10000" min="1" | |
104 label="Bin size in bp" | |
105 help="Length in base pairs for a window used to sample the genome."/> | |
105 | 106 |
106 <param name="numberOfSamples" type="integer" value="100000" min="1" | 107 <param name="numberOfSamples" type="integer" value="100000" min="1" |
107 label="Number of samples" | 108 label="Number of samples" |
108 help="Number of samples taken from the genome to compute the scaling factors"/> | 109 help="Number of samples taken from the genome to compute the scaling factors"/> |
110 <expand macro="bamCorrelate_mode_actions" /> | |
111 </when> | |
112 <when value="BED-file"> | |
113 <param name="region_file" type="data" format="bed" label="Region file in BED format" help="Correlation is computed for the number of reads that overlap such regions."/> | |
114 <expand macro="bamCorrelate_mode_actions" /> | |
115 </when> | |
109 | 116 |
110 <param name="doNotExtendPairedEnds" type="boolean" truevalue="--doNotExtendPairedEnds" falsevalue="" | 117 </conditional> |
111 label="Do not extend paired ends" | |
112 help="If set, reads are not extended to match the fragment length reported in the BAM file, instead they will be extended to match the fragment length. Default is to extend the reads if paired end information is available."/> | |
113 | 118 |
114 <param name="ignoreDuplicates" type="boolean" truevalue="--ignoreDuplicates" falsevalue="" | 119 <conditional name="outputOpt"> |
115 label="Ignore duplicates" | 120 <param name="showOutputOpt" type="select" label="Show additional output options" > |
116 help="If set, reads that have the same orientation and start position will be considered only once. If reads are paired, the mate position also has to coincide to ignore a read." /> | 121 <option value="no" selected="true">no</option> |
117 | 122 <option value="yes">yes</option> |
118 <param name="minMappingQuality" type="integer" optional="true" value="1" min="1" | 123 </param> |
119 label="Minimum mapping quality" | 124 <when value="no" /> |
120 help= "If set, only reads that have a mapping quality score higher than the given value are considered. *Note* Bowtie's Mapping quality is related to uniqueness: the higher the score, the more unique is a read. A mapping quality defined by Bowtie of 10 or less indicates that there is at least a 1 in 10 chance that the read truly originated elsewhere."/> | 125 <when value="yes"> |
121 | 126 <param name="saveRawCounts" type="boolean" label="Save the bin counts"/> |
122 <param name="includeZeros" type="boolean" truevalue="--includeZeros" falsevalue="" | 127 <param name="saveCorMatrix" type="boolean" label="Save the correlation matrix"/> |
123 label ="Include zeros" | 128 </when> |
124 help ="If set, then regions with zero counts for *all* BAM files given are included. The default behavior is to ignore those cases." /> | 129 </conditional> |
125 | 130 |
126 </when> | 131 </inputs> |
127 </conditional> | 132 <outputs> |
128 | 133 <data format="png" name="outFileName" /> |
129 <conditional name="outputOpt"> | 134 <data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts"> |
130 <param name="showOutputOpt" type="select" label="Show additional output options" > | 135 <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)</filter> |
131 <option value="no" selected="true">no</option> | 136 </data> |
132 <option value="yes">yes</option> | 137 <data format="tabular" name="outFileCorMatrix" label="${tool.name} on ${on_string}: correlation matrix"> |
133 </param> | 138 <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveCorMatrix'] == True)</filter> |
134 <when value="no" /> | 139 </data> |
135 <when value="yes"> | 140 </outputs> |
136 <param name="saveRawCounts" type="boolean" label="Save the bin counts"/> | 141 <help> |
137 <param name="saveCorMatrix" type="boolean" label="Save the correlation matrix"/> | |
138 </when> | |
139 </conditional> | |
140 | |
141 </inputs> | |
142 <outputs> | |
143 <data format="png" name="outFileName" /> | |
144 <data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts"> | |
145 <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)</filter> | |
146 </data> | |
147 <data format="tabular" name="outFileCorMatrix" label="${tool.name} on ${on_string}: correlation matrix"> | |
148 <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveCorMatrix'] == True)</filter> | |
149 </data> | |
150 </outputs> | |
151 <help> | |
152 | 142 |
153 **What it does** | 143 **What it does** |
154 | 144 |
155 This tool is useful to assess the overall similarity of different BAM files. A typical application | 145 This tool is useful to assess the overall similarity of different BAM files. A typical application |
156 is to check the correlation between replicates or published data sets. | 146 is to check the correlation between replicates or published data sets. |
157 | 147 |
158 The tool splits the genomes into bins of given length. For each bin, the number of reads | 148 The tool splits the genomes into bins of given length. For each bin, the number of reads |
159 found in each BAM file is counted and a correlation is computed for all | 149 found in each BAM file is counted and a correlation is computed for all |
160 pairs of BAM files. | 150 pairs of BAM files. |
161 | 151 |
152 | |
153 .. image:: $PATH_TO_IMAGES/QC_bamCorrelate_humanSamples.png | |
154 :alt: Heatmap of RNA Polymerase II ChIP-seq | |
155 | |
156 | |
157 **Output files**: | |
158 | |
159 - diagnostic plot produced by bamCorrelate is a clustered heatmap displaying the values for each pair-wise correlation, see below for an example | |
160 - data matrix (optional) in case you want to plot the correlation values using a different program, e.g. R, this matrix can be used | |
161 | |
162 | |
163 | |
164 | |
162 ----- | 165 ----- |
163 | 166 |
164 .. class:: infomark | 167 .. class:: infomark |
165 | 168 |
166 If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com | 169 @REFERENCES@ |
167 | 170 |
168 This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. | 171 </help> |
169 | |
170 | |
171 .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ | |
172 .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de | |
173 | |
174 | |
175 </help> | |
176 | |
177 </tool> | 172 </tool> |