# HG changeset patch # User bgruening # Date 1375461554 14400 # Node ID 894ba1eba7347c046b88af87dd66f92c837172ce # Parent 975312d6c591aa1630c015ba9ef80f8caa0bb957 Uploaded diff -r 975312d6c591 -r 894ba1eba734 bamCompare.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamCompare.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,213 @@ + + Normalize and compare two BAM files to output ratio, log2ratio or difference. + + numpy + argsparse + pysam + numpy + + + bamCompare + --bamfile1 '$bamFile1' + -bai1 '${bamFile1.metadata.bam_index}' + --bamfile2 '$bamFile2' + -bai2 '${bamFile2.metadata.bam_index}' + + --outFileName '$outFileName' + --outFileFormat '$outFileFormat' + + --fragmentLength $fragmentLength + --binSize $binSize + + #if $scaling.method == 'SES': + --scaleFactorsMethod SES + --sampleLength $scaling.sampleLength + #elif $scaling.method == 'readCount': + --scaleFactorsMethod readCount + #elif $scaling.method == 'own': + --scaleFactors '$scaling.scaleFactor1:$scaling.scaleFactor2' + #end if + + --ratio $comparison.type + + + #if $comparison.type=='subtract': + #if $comparison.normalization.type=='rpkm': + --normalizeUsingRPKM + #elif $comparison.normalization.type=='1x': + --normalizeTo1x $comparison.normalization.normalizeTo1x + #end if + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if $advancedOpt.smoothLength: + --smoothLength '$advancedOpt.smoothLength' + #end if + + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + $advancedOpt.doNotExtendPairedEnds + $advancedOpt.ignoreDuplicates + + #if $advancedOpt.minMappingQuality: + --minMappingQuality '$advancedOpt.minMappingQuality' + #end if + + --missingDataAsZero $advancedOpt.missingDataAsZero + + #end if + --numberOfProcessors 4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool compares two BAM files based on the number of mapped reads. To +compare the BAM files the genome is partitioned into bins of equal size, then +the number of reads found in each BAM file are counted for such bins and +finally a summarizing value is reported. This vaule can be the ratio of the +number of reads per bin, the log2 of the ratio or the difference. This tool +can normalize the number of reads on each BAM file using the SES method +proposed by Diaz et al. (2012). "Normalization, bias correction, and peak +calling for ChIP-seq". Statistical applications in genetics and molecular +biology, 11(3). Normalization based on read counts is also available. The +output is either a bedgraph or a bigwig file containing the bin location and +the resulting comparison values. By default if reads are mated the fragment +length reported in the BAM file is used. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + + + diff -r 975312d6c591 -r 894ba1eba734 bamCorrelate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamCorrelate.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,165 @@ + + corrlates pairs of bam files + + + #set files=[] + #set labels=[] + #for $i in $inputs + #set $files += [str($i.bamfile)] + #if str($i.label.value) != "": + #set $labels += ["\"%s\"" % ($i.label.value)] + #else + #set $labels += ["\"%s\"" % ($i.bamfile.name)] + #end if + #end for + bamCorrelate + --bamfiles #echo " ".join($files) + --labels #echo " ".join($labels) + + --fragmentLength $fragmentLength + --corMethod $corMethod + + #set newoutFileName=str($outFileName)+".png" + --plotFile $newoutFileName + + #if $outputOpt.showOutputOpt == "yes" + #if $outputOpt.outFileRawCounts: + --outRawCounts '$outputOpt.outFileRawCounts' + #end if + #if $outputOpt.outFileCorMatrix: + --outFileCorMatrix '$outputOpt.outFileCorMatrix' + #end if + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if $advancedOpt.smoothLength: + --smoothLength '$advancedOpt.smoothLength' + #end if + + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + + --binSize '$advancedOpt.binSize' + --numberOfSamples '$advancedOpt.numberOfSamples' + + $advancedOpt.doNotExtendPairedEnds + $advancedOpt.ignoreDuplicates + $advancedOpt.includeZeros + + #if $advancedOpt.minMappingQuality: + --minMappingQuality '$advancedOpt.minMappingQuality' + #end if + #end if + + --numberOfProcessors 4; mv $newoutFileName $outFileName + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True) + + + (outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveCorMatrix'] == True) + + + + +**What it does** + +Genomes are split into bins of given length. For each bin the number of reads +found for each of the bam files is counted. A correlation is computed for all +pairs of bam files. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + + + diff -r 975312d6c591 -r 894ba1eba734 bamFingerprint.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamFingerprint.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,151 @@ + + plots profiles of bam files + + + #set files=[] + #set labels=[] + #for $i in $inputs + #set $files += [str($i.bamfile)] + #if str($i.label.value) != "": + #set $labels += ["\"%s\"" % ($i.label.value)] + #else + #set $labels += ["\"%s\"" % ($i.bamfile.name)] + #end if + #end for + bamFingerprint + --bamfiles #echo " ".join($files) + --labels #echo " ".join($labels) + + --fragmentLength $fragmentLength + + #set newoutFileName=str($outFileName)+".png" + --plotFile $newoutFileName + + #if $outputOpt.showOutputOpt == "yes" + #if $outputOpt.saveRawCounts: + --outRawCounts '$outFileRawCounts' + #end if + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if $advancedOpt.smoothLength: + --smoothLength '$advancedOpt.smoothLength' + #end if + + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + + --binSize '$advancedOpt.binSize' + --numberOfSamples '$advancedOpt.numberOfSamples' + + $advancedOpt.doNotExtendPairedEnds + $advancedOpt.ignoreDuplicates + $advancedOpt.skipZeros + + #if $advancedOpt.minMappingQuality: + --minMappingQuality '$advancedOpt.minMappingQuality' + #end if + #end if + + --numberOfProcessors 4; mv $newoutFileName $outFileName + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True) + + + + +**What it does** + +Samples indexed bam files and plots a profile for each bam file. At each +sample position all reads overlaping a window (bin) of specified length are +counted. This counts are then sorted and the cumulative sum plotted + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + + + diff -r 975312d6c591 -r 894ba1eba734 computeGCBias.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/computeGCBias.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,144 @@ + + + + + + + computeGCBias + --bamfile '$bamInput' + --species '$species' + --GCbiasFrequenciesFile $outFileName + --fragmentLength $fragmentLength + + #if $source.ref_source=="history": + --genome $source.input1 + #else: + --genome "${source.input1_2bit.fields.path}" + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + + --binSize '$advancedOpt.binSize' + --sampleSize '$advancedOpt.sampleSize' + --regionSize '$advancedOpt.regionSize' + + #if $advancedOpt.filterOut: + --filterOut $advancedOpt.filterOut + #end if + + #if $advancedOpt.extraSampling: + --extraSampling $advancedOpt.extraSampling + #end if + + #end if + + #set move="" + #if $output.showOutputSettings == "yes" + #if $output.saveBiasPlot: + --biasPlot biasPlot.png + #set move="mv biasPlot.png $biasPlot" + #end if + #end if + ; $move + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True) + + + + +**What it does** + +Computes the GC bias ussing Benjamini's method [citation]. The resulting GC +bias can later be used to plot the bias or to correct the bias. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + + + diff -r 975312d6c591 -r 894ba1eba734 computeMatrix.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/computeMatrix.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,191 @@ + + summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile + + computeMatrix + $mode.mode_select + --regionsFileName '$regionsFile' + --scoreFileName '$scoreFile' + --outFileName '$outFileName' + + #if $output.showOutputSettings == "yes" + #if $output.saveData: + --outFileNameData '$outFileNameData' + #end if + #if $output.saveMatrix: + --outFileNameMatrix '$outFileNameMatrix' + #end if + + #if $output.saveSortedRegions: + --outFileSortedRegions '$outFileSortedRegions' + #end if + #end if + + #if $mode.mode_select == "reference-point": + --referencePoint $mode.referencePoint + $mode.nanAfterEnd + --beforeRegionStartLength $mode.beforeRegionStartLength + --afterRegionStartLength $mode.afterRegionStartLength + #else + --regionBodyLength $mode.regionBodyLength + --startLabel $mode.startLabel + --endLabel $mode.endLabel + #if $mode.regionStartLength.regionStartLength_select == "yes": + --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength + --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength + #end if + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + --sortRegions '$advancedOpt.sortRegions' + --sortUsing '$advancedOpt.sortUsing' + --averageTypeBins '$advancedOpt.averageTypeBins' + $advancedOpt.missingDataAsZero + $advancedOpt.skipZeros + + #if $advancedOpt.minThreshold: + --minThreshold $advancedOpt.minThreshold + #end if + #if $advancedOpt.maxThreshold: + --maxThreshold $advancedOpt.maxThreshold + #end if + #if $advancedOpt.scale: + --scale $advancedOpt.scale + #end if + + #end if + --numberOfProcessors 4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (output['showOutputSettings'] == 'yes' and output['saveData'] == True) + + + (output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True) + + + (output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True) + + + +**What it does** + +This tool summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile. Typically, these genomic regions are genes, but any other regions defined in a BED or GFF format can be used. This tool can also be used to filter and sort regions according to their score. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + + diff -r 975312d6c591 -r 894ba1eba734 correctGCBias.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/correctGCBias.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,108 @@ + + + + + correctGCBias + --bamfile '$bamInput' + --species '$species' + --GCbiasFrequenciesFile $GCbiasFrequenciesFile + + #if $source.ref_source=="history": + --genome $source.input1 + #else: + --genome "${source.input1_2bit.fields.path}" + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + + --binSize '$advancedOpt.binSize' + #end if + + #set newoutFileName="corrected."+str($outFileFormat) + + --correctedFile $newoutFileName; mv $newoutFileName $outFileName + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Computes the GC bias ussing Benjamini's method [citation]. The resulting GC +bias can later be used to plot the bias or to correct the bias. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + + +