# HG changeset patch
# User bgruening
# Date 1375461554 14400
# Node ID 894ba1eba7347c046b88af87dd66f92c837172ce
# Parent 975312d6c591aa1630c015ba9ef80f8caa0bb957
Uploaded
diff -r 975312d6c591 -r 894ba1eba734 bamCompare.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamCompare.xml Fri Aug 02 12:39:14 2013 -0400
@@ -0,0 +1,213 @@
+
+ Normalize and compare two BAM files to output ratio, log2ratio or difference.
+
+ numpy
+ argsparse
+ pysam
+ numpy
+
+
+ bamCompare
+ --bamfile1 '$bamFile1'
+ -bai1 '${bamFile1.metadata.bam_index}'
+ --bamfile2 '$bamFile2'
+ -bai2 '${bamFile2.metadata.bam_index}'
+
+ --outFileName '$outFileName'
+ --outFileFormat '$outFileFormat'
+
+ --fragmentLength $fragmentLength
+ --binSize $binSize
+
+ #if $scaling.method == 'SES':
+ --scaleFactorsMethod SES
+ --sampleLength $scaling.sampleLength
+ #elif $scaling.method == 'readCount':
+ --scaleFactorsMethod readCount
+ #elif $scaling.method == 'own':
+ --scaleFactors '$scaling.scaleFactor1:$scaling.scaleFactor2'
+ #end if
+
+ --ratio $comparison.type
+
+
+ #if $comparison.type=='subtract':
+ #if $comparison.normalization.type=='rpkm':
+ --normalizeUsingRPKM
+ #elif $comparison.normalization.type=='1x':
+ --normalizeTo1x $comparison.normalization.normalizeTo1x
+ #end if
+ #end if
+
+ #if $advancedOpt.showAdvancedOpt == "yes":
+ #if $advancedOpt.smoothLength:
+ --smoothLength '$advancedOpt.smoothLength'
+ #end if
+
+ #if str($advancedOpt.region.value) != '':
+ --region '$advancedOpt.region'
+ #end if
+ $advancedOpt.doNotExtendPairedEnds
+ $advancedOpt.ignoreDuplicates
+
+ #if $advancedOpt.minMappingQuality:
+ --minMappingQuality '$advancedOpt.minMappingQuality'
+ #end if
+
+ --missingDataAsZero $advancedOpt.missingDataAsZero
+
+ #end if
+ --numberOfProcessors 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool compares two BAM files based on the number of mapped reads. To
+compare the BAM files the genome is partitioned into bins of equal size, then
+the number of reads found in each BAM file are counted for such bins and
+finally a summarizing value is reported. This vaule can be the ratio of the
+number of reads per bin, the log2 of the ratio or the difference. This tool
+can normalize the number of reads on each BAM file using the SES method
+proposed by Diaz et al. (2012). "Normalization, bias correction, and peak
+calling for ChIP-seq". Statistical applications in genetics and molecular
+biology, 11(3). Normalization based on read counts is also available. The
+output is either a bedgraph or a bigwig file containing the bin location and
+the resulting comparison values. By default if reads are mated the fragment
+length reported in the BAM file is used.
+
+-----
+
+.. class:: infomark
+
+Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
+
+This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
+
+
+.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
+.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
+.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
+
+
+
+
diff -r 975312d6c591 -r 894ba1eba734 bamCorrelate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamCorrelate.xml Fri Aug 02 12:39:14 2013 -0400
@@ -0,0 +1,165 @@
+
+ corrlates pairs of bam files
+
+
+ #set files=[]
+ #set labels=[]
+ #for $i in $inputs
+ #set $files += [str($i.bamfile)]
+ #if str($i.label.value) != "":
+ #set $labels += ["\"%s\"" % ($i.label.value)]
+ #else
+ #set $labels += ["\"%s\"" % ($i.bamfile.name)]
+ #end if
+ #end for
+ bamCorrelate
+ --bamfiles #echo " ".join($files)
+ --labels #echo " ".join($labels)
+
+ --fragmentLength $fragmentLength
+ --corMethod $corMethod
+
+ #set newoutFileName=str($outFileName)+".png"
+ --plotFile $newoutFileName
+
+ #if $outputOpt.showOutputOpt == "yes"
+ #if $outputOpt.outFileRawCounts:
+ --outRawCounts '$outputOpt.outFileRawCounts'
+ #end if
+ #if $outputOpt.outFileCorMatrix:
+ --outFileCorMatrix '$outputOpt.outFileCorMatrix'
+ #end if
+ #end if
+
+ #if $advancedOpt.showAdvancedOpt == "yes":
+ #if $advancedOpt.smoothLength:
+ --smoothLength '$advancedOpt.smoothLength'
+ #end if
+
+ #if str($advancedOpt.region.value) != '':
+ --region '$advancedOpt.region'
+ #end if
+
+ --binSize '$advancedOpt.binSize'
+ --numberOfSamples '$advancedOpt.numberOfSamples'
+
+ $advancedOpt.doNotExtendPairedEnds
+ $advancedOpt.ignoreDuplicates
+ $advancedOpt.includeZeros
+
+ #if $advancedOpt.minMappingQuality:
+ --minMappingQuality '$advancedOpt.minMappingQuality'
+ #end if
+ #end if
+
+ --numberOfProcessors 4; mv $newoutFileName $outFileName
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)
+
+
+ (outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveCorMatrix'] == True)
+
+
+
+
+**What it does**
+
+Genomes are split into bins of given length. For each bin the number of reads
+found for each of the bam files is counted. A correlation is computed for all
+pairs of bam files.
+
+-----
+
+.. class:: infomark
+
+Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
+
+This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
+
+
+.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
+.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
+.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
+
+
+
+
diff -r 975312d6c591 -r 894ba1eba734 bamFingerprint.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamFingerprint.xml Fri Aug 02 12:39:14 2013 -0400
@@ -0,0 +1,151 @@
+
+ plots profiles of bam files
+
+
+ #set files=[]
+ #set labels=[]
+ #for $i in $inputs
+ #set $files += [str($i.bamfile)]
+ #if str($i.label.value) != "":
+ #set $labels += ["\"%s\"" % ($i.label.value)]
+ #else
+ #set $labels += ["\"%s\"" % ($i.bamfile.name)]
+ #end if
+ #end for
+ bamFingerprint
+ --bamfiles #echo " ".join($files)
+ --labels #echo " ".join($labels)
+
+ --fragmentLength $fragmentLength
+
+ #set newoutFileName=str($outFileName)+".png"
+ --plotFile $newoutFileName
+
+ #if $outputOpt.showOutputOpt == "yes"
+ #if $outputOpt.saveRawCounts:
+ --outRawCounts '$outFileRawCounts'
+ #end if
+ #end if
+
+ #if $advancedOpt.showAdvancedOpt == "yes":
+ #if $advancedOpt.smoothLength:
+ --smoothLength '$advancedOpt.smoothLength'
+ #end if
+
+ #if str($advancedOpt.region.value) != '':
+ --region '$advancedOpt.region'
+ #end if
+
+ --binSize '$advancedOpt.binSize'
+ --numberOfSamples '$advancedOpt.numberOfSamples'
+
+ $advancedOpt.doNotExtendPairedEnds
+ $advancedOpt.ignoreDuplicates
+ $advancedOpt.skipZeros
+
+ #if $advancedOpt.minMappingQuality:
+ --minMappingQuality '$advancedOpt.minMappingQuality'
+ #end if
+ #end if
+
+ --numberOfProcessors 4; mv $newoutFileName $outFileName
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)
+
+
+
+
+**What it does**
+
+Samples indexed bam files and plots a profile for each bam file. At each
+sample position all reads overlaping a window (bin) of specified length are
+counted. This counts are then sorted and the cumulative sum plotted
+
+-----
+
+.. class:: infomark
+
+Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
+
+This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
+
+
+.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
+.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
+.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
+
+
+
+
diff -r 975312d6c591 -r 894ba1eba734 computeGCBias.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/computeGCBias.xml Fri Aug 02 12:39:14 2013 -0400
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+ computeGCBias
+ --bamfile '$bamInput'
+ --species '$species'
+ --GCbiasFrequenciesFile $outFileName
+ --fragmentLength $fragmentLength
+
+ #if $source.ref_source=="history":
+ --genome $source.input1
+ #else:
+ --genome "${source.input1_2bit.fields.path}"
+ #end if
+
+ #if $advancedOpt.showAdvancedOpt == "yes":
+ #if str($advancedOpt.region.value) != '':
+ --region '$advancedOpt.region'
+ #end if
+
+ --binSize '$advancedOpt.binSize'
+ --sampleSize '$advancedOpt.sampleSize'
+ --regionSize '$advancedOpt.regionSize'
+
+ #if $advancedOpt.filterOut:
+ --filterOut $advancedOpt.filterOut
+ #end if
+
+ #if $advancedOpt.extraSampling:
+ --extraSampling $advancedOpt.extraSampling
+ #end if
+
+ #end if
+
+ #set move=""
+ #if $output.showOutputSettings == "yes"
+ #if $output.saveBiasPlot:
+ --biasPlot biasPlot.png
+ #set move="mv biasPlot.png $biasPlot"
+ #end if
+ #end if
+ ; $move
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True)
+
+
+
+
+**What it does**
+
+Computes the GC bias ussing Benjamini's method [citation]. The resulting GC
+bias can later be used to plot the bias or to correct the bias.
+
+-----
+
+.. class:: infomark
+
+Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
+
+This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
+
+
+.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
+.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
+.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
+
+
+
+
diff -r 975312d6c591 -r 894ba1eba734 computeMatrix.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/computeMatrix.xml Fri Aug 02 12:39:14 2013 -0400
@@ -0,0 +1,191 @@
+
+ summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile
+
+ computeMatrix
+ $mode.mode_select
+ --regionsFileName '$regionsFile'
+ --scoreFileName '$scoreFile'
+ --outFileName '$outFileName'
+
+ #if $output.showOutputSettings == "yes"
+ #if $output.saveData:
+ --outFileNameData '$outFileNameData'
+ #end if
+ #if $output.saveMatrix:
+ --outFileNameMatrix '$outFileNameMatrix'
+ #end if
+
+ #if $output.saveSortedRegions:
+ --outFileSortedRegions '$outFileSortedRegions'
+ #end if
+ #end if
+
+ #if $mode.mode_select == "reference-point":
+ --referencePoint $mode.referencePoint
+ $mode.nanAfterEnd
+ --beforeRegionStartLength $mode.beforeRegionStartLength
+ --afterRegionStartLength $mode.afterRegionStartLength
+ #else
+ --regionBodyLength $mode.regionBodyLength
+ --startLabel $mode.startLabel
+ --endLabel $mode.endLabel
+ #if $mode.regionStartLength.regionStartLength_select == "yes":
+ --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength
+ --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength
+ #end if
+ #end if
+
+ #if $advancedOpt.showAdvancedOpt == "yes":
+ --sortRegions '$advancedOpt.sortRegions'
+ --sortUsing '$advancedOpt.sortUsing'
+ --averageTypeBins '$advancedOpt.averageTypeBins'
+ $advancedOpt.missingDataAsZero
+ $advancedOpt.skipZeros
+
+ #if $advancedOpt.minThreshold:
+ --minThreshold $advancedOpt.minThreshold
+ #end if
+ #if $advancedOpt.maxThreshold:
+ --maxThreshold $advancedOpt.maxThreshold
+ #end if
+ #if $advancedOpt.scale:
+ --scale $advancedOpt.scale
+ #end if
+
+ #end if
+ --numberOfProcessors 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (output['showOutputSettings'] == 'yes' and output['saveData'] == True)
+
+
+ (output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)
+
+
+ (output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)
+
+
+
+**What it does**
+
+This tool summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile. Typically, these genomic regions are genes, but any other regions defined in a BED or GFF format can be used. This tool can also be used to filter and sort regions according to their score.
+
+-----
+
+.. class:: infomark
+
+Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
+
+This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
+
+
+.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
+.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
+.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
+
+
+
diff -r 975312d6c591 -r 894ba1eba734 correctGCBias.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/correctGCBias.xml Fri Aug 02 12:39:14 2013 -0400
@@ -0,0 +1,108 @@
+
+
+
+
+ correctGCBias
+ --bamfile '$bamInput'
+ --species '$species'
+ --GCbiasFrequenciesFile $GCbiasFrequenciesFile
+
+ #if $source.ref_source=="history":
+ --genome $source.input1
+ #else:
+ --genome "${source.input1_2bit.fields.path}"
+ #end if
+
+ #if $advancedOpt.showAdvancedOpt == "yes":
+ #if str($advancedOpt.region.value) != '':
+ --region '$advancedOpt.region'
+ #end if
+
+ --binSize '$advancedOpt.binSize'
+ #end if
+
+ #set newoutFileName="corrected."+str($outFileFormat)
+
+ --correctedFile $newoutFileName; mv $newoutFileName $outFileName
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Computes the GC bias ussing Benjamini's method [citation]. The resulting GC
+bias can later be used to plot the bias or to correct the bias.
+
+-----
+
+.. class:: infomark
+
+Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
+
+This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
+
+
+.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
+.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
+.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
+
+
+
+