changeset 6:212478caed9b draft

Uploaded
author jeremie
date Mon, 07 Jul 2014 04:56:34 -0400
parents 8df51fff179b
children 74009865b3ad
files pindel.xml
diffstat 1 files changed, 111 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/pindel.xml	Wed Jul 02 06:27:43 2014 -0400
+++ b/pindel.xml	Mon Jul 07 04:56:34 2014 -0400
@@ -12,16 +12,17 @@
 		-r $inputReferenceFile
 		-b $inputBamFile
 		-s $insertSize
-		-o $outputVcfFile
+		<!-- -o1 $outputRaw -->
+		-o2 $outputVcfFile
 		-n $sampleTag
+
 		<!-- optional -->
-	#if $breakdancer.checking == "yes":
-		-input1 $breakdancer.input1
-	#end if
+
+		<!-- Parameters affecting runtime and memory usage -->
 		--number_of_threads $number_of_threads
 		--window_size $window_size
-		--sequencing_error_rate $sequencing_error_rate
-		--sensitivity $sensitivity
+
+		<!-- Parameters affecting which structural variants are reported -->
 	#if $reportInversions:
 		--report_inversions
 	#end if
@@ -34,8 +35,59 @@
 	#if $reportBreakpoints:
 		--report_breakpoints
 	#end if
+	#if $report_close_mapped_reads:
+		-s
+	#end if
+	#if $report_only_close_mapped_reads:
+		-S
+	#end if
+	#if $report_interchromosomal_events:
+		-I
+	#end if
+		-v $min_inversion_size
+	#if $detect_DD:
+		-q
+	#end if
+		--MAX_DD_BREAKPOINT_DISTANCE $MAX_DD_BREAKPOINT_DISTANCE
+		--MAX_DISTANCE_CLUSTER_READS $MAX_DISTANCE_CLUSTER_READS
+		--MIN_DD_CLUSTER_SIZE $MIN_DD_CLUSTER_SIZE
+		--MIN_DD_BREAKPOINT_SUPPORT $MIN_DD_BREAKPOINT_SUPPORT
+		--MIN_DD_MAP_DISTANCE $MIN_DD_MAP_DISTANCE
+	#if $DD_REPORT_DUPLICATION_READS:
+		--DD_REPORT_DUPLICATION_READS 
+	#end if
+
+		<!-- Parameters affecting sensitivity and selectivity -->
+		-d $min_num_matched_bases
+		-a $additional_mismatch
+		-m $min_perfect_match_around_BP
+		--sequencing_error_rate $sequencing_error_rate
+		-u $maximum_allowed_mismatch_rate
+		--sensitivity $sensitivity
+		-n $NM
+	#if $NormalSamples:
+		-N
+	#end if
+		-B $balance_cutoff
+		-A $anchor_quality
+		-M $minimum_support_for_event
+
+		<!-- Miscellaneous parameters -->
+	#if $breakdancer.checking == "yes":
+		-input1 $breakdancer.input1
+	#end if
+		
+	#if $IndelCorrection:
+		-C
+	#end if
+	
+	#if $input_SV_Calls_for_assembly.checking == "yes":
+		-z $input_SV_Calls_for_assembly.input_SV_Calls
+	#end if
+
 	</command>
 
+
 	<inputs>
 		<!-- required -->
 		<param format="fasta" name="inputReferenceFile" type="data" label="reference file" help="" />
@@ -50,64 +102,28 @@
 
 		<param name="sequencing_error_rate" type="float" value="0.01" />
 
-		<param name="sensitivity" type="float" value="0.95" label="sensitivity"
- help="Pindel only reports reads if they can be fit around an event within a certain number of mismatches. If the fraction of sequencing errors is 0.01, (so we'd expect a total error rate of 0.011 since on average 1 in 1000 bases is a SNP) and pindel calls a deletion, but there are 4 mismatched bases in the new fit of the pindel read (100 bases) to the reference genome, Pindel would calculate that with an error rate of 0.01 (=0.011 including SNPs) the chance that there are 0, 1 or 2 mismatched bases in the reference genome is 90%. Setting -E to .90 (=90%) will thereforethrow away all reads with 3 or more mismatches, even though that means that you throw away 1 in 10 valid reads. Increasing this parameter to say 0.99 will increase the sensitivity of pindel though you may get more false positives, decreasing the parameter ensures you only get very good matches but pindel may not find as many events. (default 0.95)" />
+		<param name="sensitivity" type="float" value="0.95" label="sensitivity" />
+		<!--
+  		help="Pindel only reports reads if they can be fit around an event within a certain number of mismatches. If the fraction of sequencing errors is 0.01, (so we'd expect a total error rate of 0.011 since on average 1 in 1000 bases is a SNP) and pindel calls a deletion, but there are 4 mismatched bases in the new fit of the pindel read (100 bases) to the reference genome, Pindel would calculate that with an error rate of 0.01 (=0.011 including SNPs) the chance that there are 0, 1 or 2 mismatched bases in the reference genome is 90%. Setting -E to .90 (=90%) will thereforethrow away all reads with 3 or more mismatches, even though that means that you throw away 1 in 10 valid reads. Increasing this parameter to say 0.99 will increase the sensitivity of pindel though you may get more false positives, decreasing the parameter ensures you only get very good matches but pindel may not find as many events. (default 0.95)" -->
 
-		<!-- <param name="maximum_allowed_mismatch_rate" type="float" value="0.02" label="maximum allowed mismatch rate" help="Only reads with more than this fraction of mismatches than the reference genome will be considered as harboring potential SVs." />
+		<param name="maximum_allowed_mismatch_rate" type="float" value="0.02" label="maximum allowed mismatch rate" help="Only reads with more than this fraction of mismatches than the reference genome will be considered as harboring potential SVs." />
 
-		<param name="NM" type="integer" value="2" label="NM" help="the minimum number of edit distance between reads and reference genome (default 2). reads at least NM edit distance (>= NM) will be realigned" /> -->
+		<param name="NM" type="integer" value="2" label="NM" help="the minimum number of edit distance between reads and reference genome. reads at least NM edit distance (>= NM) will be realigned" />
 
 		<param name="reportInversions" type="boolean" label="report inversions" checked="true"/>
 		<param name="reportDuplications" type="boolean" label="report duplications" checked="true"/>
 		<param name="reportLongInsertions" type="boolean" label="report long insertions" checked="true"/>
 		<param name="reportBreakpoints" type="boolean" label="report breakpoints" checked="true"/>
 
+		<param name="report_close_mapped_reads" type="boolean" label="report reads of which only one end (the one closest to the mapped read of the paired-end read) could be mapped." checked="false" />
 
-<!-- 		<conditional name="report_close_mapped_reads">
-			<param name="checking" type="select" label="report reads of which only one end (the one closest to the mapped read of the paired-end read) could be mapped.">
-				<option value="yes">Yes</option>
-				<option value="no" selected="True">No</option>
-			</param>
-			<when value="yes" />
-			<when value="no" />
-		</conditional>
-
-		<conditional name="report_only_close_mapped_reads">
-			<param name="checking" type="select" label="do not search for SVs, only report reads of which only one end (the one closest to the mapped read of the paired-end read) could be mapped (the output file can then be used as an input file for another run of pindel, which may save size if you need to transfer files). (default false)">
-				<option value="yes">Yes</option>
-				<option value="no" selected="True">No</option>
-			</param>
-			<when value="yes" />
-			<when value="no" />
-		</conditional>
+		<param name="report_only_close_mapped_reads" type="boolean" label="do not search for SVs, only report reads of which only one end (the one closest to the mapped read of the paired-end read) could be mapped (the output file can then be used as an input file for another run of pindel, which may save size if you need to transfer files)." checked="false" />
 
-		<conditional name="report_interchromosomal_events">
-			<param name="checking" type="select" label="search for interchromosomal events. Note: will require the computer to have at least 4 GB of memory">
-				<option value="yes">Yes</option>
-				<option value="no" selected="True">No</option>
-			</param>
-			<when value="yes" />
-			<when value="no" />
-		</conditional> -
-		
-		<conditional name="IndelCorrection">
-			<param name="checking" type="select" label="search for consensus indels to corret contigs (default false)">
-				<option value="yes">Yes</option>
-				<option value="no" selected="True">No</option>
-			</param>
-			<when value="yes" />
-			<when value="no" />
-		</conditional>
+		<param name="report_interchromosomal_events" type="boolean" label="search for interchromosomal events. Note: will require the computer to have at least 4 GB of memory" checked="false" />
 
-		<conditional name="NormalSamples">
-			<param name="checking" type="select" label="Turn on germline filtering, less sensistive and you may miss somatic calls (default false)">
-				<option value="yes">Yes</option>
-				<option value="no" selected="True">No</option>
-			</param>
-			<when value="yes" />
-			<when value="no" />
-		</conditional>
-		-->
+		<param name="IndelCorrection" type="boolean" label="search for consensus indels to corret contigs (default false)" checked="false" />
+
+		<param name="NormalSamples" type="boolean" label="Turn on germline filtering, less sensistive and you may miss somatic calls" checked="false" />
 
 		<conditional name="breakdancer">
 			<param name="checking" type="select" label="use calls from an other method?">
@@ -117,35 +133,61 @@
 			<when value="yes">
 				<param name="input1" type="data" format="tabular" label="variant call file" help="Pindel is able to use calls from other SV methods such as BreakDancer to further increase sensitivity and specificity. BreakDancer result or calls from any methods must in the format: ChrA LocA stringA ChrB LocB stringB other" />
 			</when>
-
 			<when value="no" />
 		</conditional>
 
-		<!-- <param name="additional_mismatch" type="integer" value="1" label="additional mismatch" help="Pindel will only map part of a read to the reference genome if there are no other candidate positions with no more than the specified number of mismatches position. The bigger the value, the more accurate but less sensitive. (minimum value 1, default value 1)" />
 
-		<param name="min_perfect_match_around_BP" type="integer" value="3" label="min perfect match around BP" help="at the point where the read is split into two, there should at least be this number of perfectly matching bases between read and reference (default value 3)" />
+		<param name="additional_mismatch" type="integer" value="1" label="additional mismatch" help="Pindel will only map part of a read to the reference genome if there are no other candidate positions with no more than the specified number of mismatches position. The bigger the value, the more accurate but less sensitive (minimum value 1)" />
+
+		<param name="min_perfect_match_around_BP" type="integer" value="3" label="min perfect match around BP" help="at the point where the read is split into two, there should at least be this number of perfectly matching bases between read and reference" />
 
-		<param name="min_inversion_size" type="integer" value="50" label="min inversion size" help="only report inversions greater than this number of bases (default 50)" />
+		<param name="min_inversion_size" type="integer" value="50" label="min inversion size" help="only report inversions greater than this number of bases" />
+		
+		<param name="min_num_matched_bases" type="integer" value="30" label="min num matched bases" help="only consider reads as evidence if they map with more than X bases to the reference" />
 		
-		<param name="min_num_matched_bases" type="integer" value="30" label="min num matched bases" help="only consider reads as evidence if they map with more than X bases to the reference. (default 30)" />
+		<param name="balance_cutoff" type="integer" value="0" label="balance cutoff" help="the number of bases of a SV above which a more stringent filter is applied which demands that both sides of the SV are mapped with sufficiently long strings of bases" />
 
-		<param name="balance_cutoff" type="integer" value="0" label="balance cutoff" help="the number of bases of a SV above which a more stringent filter is applied which demands that both sides of the SV are mapped with sufficiently long strings of bases (default 0)" />
+		<param name="anchor_quality" type="integer" value="0" label="anchor quality" help="the minimal mapping quality of the reads Pindel uses as anchor If you only need high confident calls, set to 30 or higher" />
+
+		<param name="minimum_support_for_event" type="integer" value="3" label="minimum support for event" help="Pindel only calls events which have this number or more supporting reads" />
+
 
-		<param name="anchor_quality" type="integer" value="0" label="anchor quality" help="the minimal mapping quality of the reads Pindel uses as anchor If you only need high confident calls, set to 30 or higher(default 0)" />
-
-		<param name="minimum_support_for_event" type="integer" value="3" label="minimum support for event" help="Pindel only calls events which have this number or more supporting reads (default 3)" />
+		<conditional name="input_SV_Calls_for_assembly">
+			<param name="checking" type="select" label="input SV Calls for assembly ?" 
+				<option value="yes">Yes</option>
+				<option value="no" selected="True">No</option>
+			</param>
+			<when value="yes">
+				<param name="input_SV_Calls" type="data" format="tabular" label="variant call file" help="A filename of a list of SV calls for assembling breakpoints 
+					Types: DEL, INS, DUP, INV, CTX and ITX 
+					File format: Type chrA posA Confidence_Range_A chrB posB 
+					Confidence_Range_B 
+					Example: DEL chr1 10000 50 chr2 20000 100" />
+			</when>
+			<when value="no" />
+		</conditional>
 
-		<param name="input_SV_Calls_for_assembly" type="text" value="" label="input SV Calls for assembly" help="A filename of a list of SV calls for assembling breakpoints 
-			Types: DEL, INS, DUP, INV, CTX and ITX 
-			File format: Type chrA posA Confidence_Range_A chrB posB 
-		   Confidence_Range_B 
-			Example: DEL chr1 10000 50 chr2 20000 100" />
+		<param name="genotyping" type="boolean" truevalue="true" falsevalue="false" checked="no" label="genotyping" help="gentype variants if -i is also used." />
+
+		<param name="detect_DD" type="boolean" label="Flag indicating whether to detect dispersed duplications" checked="false" />
+
+		<param name="MAX_DD_BREAKPOINT_DISTANCE" type="integer" value="350" label="MAX DD BREAKPOINT DISTANCE" help="Maximum distance between dispersed duplication breakpoints to assume they refer to the same event" />
+
+		<param name="MAX_DISTANCE_CLUSTER_READS" type="integer" value="100" label="MAX DISTANCE CLUSTER READS" help="Maximum distance between reads for them to provide evidence for a single breakpoint for dispersed duplications" />
 
-		<param name="genotyping" type="boolean" truevalue="true" falsevalue="false" checked="no" label="genotyping" help="gentype variants if -i is also used." /> -->
+		<param name="MIN_DD_CLUSTER_SIZE" type="integer" value="3" label="MIN DD CLUSTER SIZE" help="Minimum number of reads needed for calling a breakpoint for dispersed duplications" />
+
+		<param name="MIN_DD_BREAKPOINT_SUPPORT" type="integer" value="3" label="MIN DD BREAKPOINT SUPPORT" help="Minimum number of split reads for calling an exact breakpoint for dispersed duplications" />
+
+		<param name="MIN_DD_MAP_DISTANCE" type="integer" value="8000" label="MIN DD MAP DISTANCE" help="Minimum mapping distance of read pairs for them to be considered discordant" />
+
+		<param name="DD_REPORT_DUPLICATION_READS" type="boolean" label="DD REPORT DUPLICATION READS" checked="false" help="Report discordant sequences and positions for mates of reads mapping inside dispersed duplications"/>
+
 	</inputs>
 
 	<outputs>
-		<data format="vcf" name="outputVcfFile" label="vcf pindel"/>
+		<!-- <data format="tabular" name="outputRawFile" /> -->
+		<data format="vcf" name="outputVcfFile" />
 	</outputs>
 
 	<help>