# HG changeset patch # User fcaramia # Date 1371446062 14400 # Node ID 130a52d03a505468e6cc7456f5420075349aa0c6 # Parent cd31a9b4744ec49fd3dd02535308dd0db4bc9c96 Uploaded diff -r cd31a9b4744e -r 130a52d03a50 contra.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contra.xml Mon Jun 17 01:14:22 2013 -0400 @@ -0,0 +1,288 @@ + + : Copy Number Analysis for Targeted Resequencing + + bedtools + samtools + contra + + + + contra_wrapper.pl + + ##Ref Genome + #if $genomeSource.refGenomeSource == "history": + "PLAYEROPTION::-f=${genomeSource.ownFile}" + #else: + ##use precomputed indexes + "PLAYEROPTION::-f=${genomeSource.indices.fields.path}" + #end if + + ##Required files + "PLAYEROPTION::-t=$target_file" + "PLAYEROPTION::-s=$alignment_file" + #if $controlSource.refControlSource == "history": + "PLAYEROPTION::-c=${controlSource.control_file}" + #else: + ##use precomputed indexes + "PLAYEROPTION::-c=${controlSource.indices.fields.path}" + #end if + + ##Optional parameter + + #if $option.option == "modify_parameters": + + "PLAYEROPTION::--numBin=$option.numBin" + "PLAYEROPTION::--minReadDepth=$option.minReadDepth" + "PLAYEROPTION::--minNBases=$option.minNbases" + + #if str($option.sam) == "true": + "PLAYEROPTION::--sam" + #end if + + #if str($option.bed) == "true": + "PLAYEROPTION::--bed" + #end if + + "PLAYEROPTION::--pval=$option.pval" + "PLAYEROPTION::--sampleName=$option.sampleName" + + #if str($option.nomultimapped) == "true": + "PLAYEROPTION::--nomultimapped" + #end if + + #if str($option.plot) == "true": + "PLAYEROPTION::--plot" + #end if + + "PLAYEROPTION::--minExon=$option.minExon" + "PLAYEROPTION::--minControlRdForCall=$option.minControlRdForCall" + "PLAYEROPTION::--minTestRdForCall=$option.minTestRdForCall" + "PLAYEROPTION::--minAvgForCall=$option.minAvgForCall" + "PLAYEROPTION::--maxRegionSize=$option.maxRegionSize" + "PLAYEROPTION::--targetRegionSize=$option.targetRegionSize" + + #if str($option.largedeletion) == "true": + "PLAYEROPTION::--largedeletion" + #end if + + "PLAYEROPTION::--smallSegment=$option.smallSegment" + "PLAYEROPTION::--targetRegionSize=$option.targetRegionSize" + "PLAYEROPTION::--largeSegment=$option.largeSegment" + "PLAYEROPTION::--lrCallStart=$option.lrCallStart" + "PLAYEROPTION::--lrCallEnd=$option.lrCallEnd" + "PLAYEROPTION::--passSize=$option.passSize" + #end if + + ##File to generate the bam list + CONTRAOUTPUT::$html_file + CONTRADIR::$html_file.files_path + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +| + + +**Reference** + http://contra-cnv.sourceforge.net/ + +----- + +**What it does** + +CONTRA is a tool for copy number variation (CNV) detection for targeted resequencing data such as those from whole-exome capture data. CONTRA calls copy number gains and losses for each target region with key strategies include the use of base-level log-ratios to remove GC-content bias, correction for an imbalanced library size effect on log-ratios, and the estimation of log-ratio variations via binning and interpolation. It takes standard alignment formats (BAM/SAM) and output in variant call format (VCF 4.0) for easy integration with other next generation sequencing analysis package. + + +----- + +**Required Parameters** + +:: + + -t, --target Target region definition file [BED format] + + -s, --test Alignment file for the test sample [BAM/SAM] + + -c, --control Alignment file for the control sample + [BAM/SAM/BED – baseline file] + + --bed **option has to be supplied for control + with baseline file.** + + -f, --fasta Reference genome [FASTA] + + -o, --outFolder the folder name (and its path) to store the output + of the analysis (this new folder will be created – + error message occur if the folder exists) + +----- + +**Optional Parameters** + +:: + + --numBin Numbers of bins to group the regions. User can + specify multiple experiments with different numbers + of bins (comma separated). [Default: 20] + + --minReadDepth The threshold for minimum read depth for each bases + (see Step 2 in CONTRA workflow) [Default: 10] + + --minNBases The threshold for minimum number of bases for each + target regions (see Step 2 in CONTRA workflow) + [Default: 10] + + --sam If the specified test and control samples are in + SAM format. [Default: False] (It will always take + BAM samples as default) + + --bed If specified, control will be a baseline file in + BED format. [Default: False] + Please refer to the Baseline Script section for + instruction how to create baseline files from set + of BAMfiles. A set of baseline files from different + platform have also been provided in the CONTRA + download page. + + --pval The p-value threshold for filtering. Based on Adjusted + P-Values. Only regions that pass this threshold will + be included in the VCF file. [Default: 0.05] + + --sampleName The name to be appended to the front of the default output + name. By default, there will be nothing appended. + + --nomultimapped The option to remove multi-mapped reads + (using SAMtools with mapping quality > 0). + [default: FALSE] + + -p, --plot If specified, plots of log-ratio distribution for each + bin will be included in the output folder [default: FALSE] + + --minExon Minimum number of exons in one bin (if less than this number + , bin that contains small number of exons will be merged to + the adjacent bins) [Default : 2000] + + --minControlRdForCall Minimum Control ReadDepth for call [Default: 5] + + --minTestRdForCall Minimum Test ReadDepth for call [Default: 0] + + --minAvgForCall Minimum average coverage for call [Default: 20] + + --maxRegionSize Maximum region size in target region (for breaking + large regions into smaller regions. By default, + maxRegionSize=0 means no breakdown). [Default : 0] + + --targetRegionSize Target region size for breakdown (if maxRegionSize + is non-zero) [Default: 200] + + -l, --largeDeletion If specified, CONTRA will run large deletion analysis (CBS). + User must have DNAcopy R-library installed to run the + analysis. [False] + + --smallSegment CBS segment size for calling large variations [Default : 1] + + --largeSegment CBS segment size for calling large variations [Default : 25] + + --lrCallStart Log ratios start range that will be used to call CNV + [Default : -0.3] + + --lrCallEnd Log ratios end range that will be used to call CNV + [Default : 0.3] + + --passSize Size of exons that passed the p-value threshold compare + to the original exons size [Default: 0.5] + + + +