diff scripts/tools/cg_scripts/List_Unique_Variants.xml @ 0:951ae80a19fc draft

Uploaded
author bcrain-completegenomics
date Tue, 12 Jun 2012 14:42:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/tools/cg_scripts/List_Unique_Variants.xml	Tue Jun 12 14:42:04 2012 -0400
@@ -0,0 +1,323 @@
+<tool id="pl_listuniquevariants" name="List_Unique_Variants" version="0.0.1">
+
+  <description>with annotations from gene or var files</description> <!--adds description in toolbar-->
+  
+  <command interpreter="perl"> <!--run executable-->
+		#if $file_types.file_type =="var2" 
+			List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output 
+			--Var_Type $file_types.variants 
+			$file_types.scoresVAF 
+			$file_types.scoresEAF 
+			$file_types.varQuality
+			#if $file_types.data_sources.data_source == "in" 
+				#for $v in $file_types.data_sources.varfiles <!--get each var file-->
+				--Input_File ${v.input}
+				#end for
+			#else
+				`cat $file_types.data_sources.varlist`
+			#end if
+	
+		#else if $file_types.file_type =="var1"
+			List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output 
+			--Var_Type $file_types.variants 
+			$file_types.scores
+			#if $file_types.data_sources.data_source == "in" 
+				#for $v in $file_types.data_sources.varfiles <!--get each var file-->
+				--Input_File ${v.input}
+				#end for
+			#else
+				`cat $file_types.data_sources.varlist`
+			#end if
+	
+		#else if $file_types.file_type =="gene"
+			List_Unique_Variants_2_1_0.pl --File_Type G --Output_File $output 
+			--Var_Type $file_types.variants 
+			--Component $file_types.component 
+			--Impact $file_types.impact 
+			#if $file_types.data_sources.data_source == "in" 
+				#for $g in $file_types.data_sources.genefiles <!--get each var file-->
+				--Input_File ${g.input}
+				#end for
+			#else
+				`cat $file_types.data_sources.genelist`
+			#end if
+		#end if
+  </command>
+
+  <outputs>
+    <data format="tabular" name="output" />
+  </outputs>
+
+  <inputs>
+    <conditional name="file_types">
+			<!--form field to select file type-->
+			<param name="file_type" type="select" label="Select the input file type">
+				<option value="var2" selected="True">var files, format 2.x</option>
+				<option value="var1">var files, format 1.x</option>
+				<option value="gene">gene files</option>
+			</param>
+			
+			<when value="var2">
+				<!--form field to select all variant types to annotate-->
+				<param name="variants" label="Select variant types to include" type="select" multiple="true" >
+					<!--<validator type="no_options" message="Please select at least one variant type."/>-->
+					<option value="All" selected="true">All</option>
+					<option value="snp">snp</option>
+					<option value="ins">ins</option>
+					<option value="del">del</option>
+					<option value="sub">sub</option>
+					<option value="ref">ref</option>
+				</param>
+ 
+				<!--form field to select varScoresVAF-->
+				<param name="scoresVAF" type="select" label="Include varScoreVAF?">
+					<option value="--Scores_VAF yes" selected="true">yes</option>
+					<option value="--Scores_VAF no">no</option>
+				</param>
+				<!--form field to select varScoresEAF-->
+				<param name="scoresEAF" type="select" label="Include varScoreEAF?">
+					<option value="--Scores_EAF yes" selected="true">yes</option>
+					<option value="--Scores_EAF no">no</option>
+				</param>
+				<!--form field to select varQuality-->
+				<param name="varQuality" type="select" label="Include varQuality?">
+					<option value="--Score_Qualities yes" selected="true">yes</option>
+					<option value="--Score_Qualities no">no</option>
+				</param>
+				
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where are the input var files?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<repeat name="varfiles" title="Variant files">
+							<param name="input" type="data" format="cg_var" label="Dataset">
+								<validator type="unspecified_build" />
+								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+								 metadata_name="dbkey" metadata_column="1"
+								 message="cgatools is not currently available for this build."/>
+							</param>
+						</repeat>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
+					</when>
+				</conditional>
+			</when>
+			
+			<when value="var1">
+				<!--form field to select all variant types to annotate-->
+				<param name="variants" label="Select variant types to include" type="select" multiple="true" >
+					<!--<validator type="no_options" message="Please select at least one variant type."/>-->
+					<option value="All" selected="true">All</option>
+					<option value="snp">snp</option>
+					<option value="ins">ins</option>
+					<option value="del">del</option>
+					<option value="sub">sub</option>
+					<option value="ref">ref</option>
+				</param>
+ 
+				<!--form field to select scores-->
+				<param name="scores" type="select" label="Include totalScore?">
+					<option value="--Scores yes" selected="true">yes</option>
+					<option value="--Scores no">no</option>
+				</param>
+				
+				<!--conditional to select variant file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where are the input var files?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<repeat name="varfiles" title="Variant files">
+							<param name="input" type="data" format="cg_var" label="Dataset">
+								<validator type="unspecified_build" />
+								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+								 metadata_name="dbkey" metadata_column="1"
+								 message="cgatools is not currently available for this build."/>
+							</param>
+						</repeat>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
+					</when>
+				</conditional>
+			</when>
+
+			<when value="gene">
+				<!--form field to select all variant types to annotate-->
+				<param name="variants" label="Select variant types to include" type="select" multiple="true" >
+					<!--<validator type="no_options" message="Please select at least one variant type."/>-->
+					<option value="All" selected="true">All</option>
+					<option value="snp">snp</option>
+					<option value="ins">ins</option>
+					<option value="del">del</option>
+					<option value="sub">sub</option>
+					<option value="ref">ref</option>
+				</param>
+ 
+				<!--form field to select component in gene file-->
+				<param name="component" type="select" label="Select component types to include" multiple="true" >
+				  <option value="All" selected="true">All</option>
+					<option value="CDS">CDS</option>
+					<option value="INTRON">INTRON</option>
+					<option value="DONOR">DONOR</option>
+					<option value="ACCEPTOR">ACCEPTOR</option>
+					<option value="TSS-UPSTREAM">TSS-UPSTREAM</option>
+					<option value="SPAN5">SPAN5</option>
+					<option value="SPAN3">SPAN3</option>
+					<option value="SPAN">SPAN</option>
+					<option value="UTR5">UTR5</option>
+					<option value="UTR3">UTR3</option>
+					<option value="UTR">UTR</option>
+				</param>
+				
+				<!--form field to select impact in gene file-->
+				<param name="impact" type="select" label="Select impact types to include" multiple="true" >
+				  <option value="All" selected="true">All</option>
+					<option value="NO-CHANGE">NO-CHANGE</option>
+					<option value="SYNONYMOUS">SYNONYMOUS</option>
+					<option value="MISSENES">MISSENES</option>
+					<option value="NONSENSE">NONSENSE</option>
+					<option value="NONSSTOP">NONSSTOP</option>
+					<option value="DELETE">DELETE</option>
+					<option value="INSERT">INSERT</option>
+					<option value="DELETE+">DELETE+</option>
+					<option value="INSERT+">INSERT+</option>
+					<option value="FRAMESHIFT">FRAMESHIFT</option>
+					<option value="MISSTART">MISSTART</option>
+					<option value="DISRUPT">DISRUPT</option>
+					<option value="UNKNOWN-VNC">UNKNOWN-VNC</option>
+					<option value="UNKNOWN-INC">UNKNOWN-INC</option>
+					<option value="UNKNOWN-TR">UNKNOWN-TR</option>
+				</param>
+				
+				<!--conditional to select gene file input-->
+				<conditional name="data_sources">
+					<param name="data_source" type="select" label="Where are the input gene files?">
+						<option value="in" selected="true">imported into Galaxy</option>
+						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
+					</param>
+					<when value="in">
+						<!--form field to select variant files-->
+						<repeat name="genefiles" title="Gene files">
+							<param name="input" type="data" format="cg_gene" label="Dataset">
+								<validator type="unspecified_build" />
+								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
+								 metadata_name="dbkey" metadata_column="1"
+								 message="cgatools is not currently available for this build."/>
+							</param>
+						</repeat>
+					</when>
+					<when value="out">
+						<!--form field to select crr file-->
+						<param name="genelist" type="text" label="List of gene files (/path/file)" size="200" help="file with list of gene files (/path/genefile), gene files can be compressed (gz, bz2)."/>
+					</when>
+				</conditional>
+			</when>
+			
+		</conditional>	
+  </inputs>
+
+
+  <help>
+
+**What it does**
+
+This tool identifies all called variants present in the var or gene files and generates annotated variant list.
+
+-----
+
+**Instructions**::
+
+		List Unique Variants for Pipeline 1.x and 2.x
+		[Uses header if available, checks for position of xref field if not]
+		Take one or more var or gene files
+		Extract a non-redundant set of variants
+	
+		For var files:
+		The fields used to define non-redundant variants are are:
+			chromosome begin end varType reference alleleSeq xRef
+		User can nominate class(es) of varType to filter on
+		Outputs varScoreEAF, varScoreVAF and varQuality as a default but user can turn
+			them off (separately)
+		Scores and qualities stored in separate fields, all values for a variant across
+			a set of genomes.
+		Values for different genomes separated by ':', for two hom entries for the same
+			genome by '|'
+		Output is accepted by testvariants to generate a variant table, all fields kept
+			in testvariants output
+	
+		For gene files:
+		The fields used to define non-redundant gene variants are:
+			chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol
+			orientation component componentIndex codingRegionKnown impact nucleotidePos
+			proteinPos annotationRefSequence sampleSequence genomeRefSequence
+		User can nominate class(es) of varType, component or impact to filter on
+		All gene entries kept ie  multiple entries if multiple transcripts
+	
+		NB Now treating xref as a separate component in var recs, as it is not consistent
+			between X and Y vars
+		Not fixed for gene recs yet
+	
+		perl List_Unique_Variants_2_0_11.pl
+		--File_Type [V|G]
+		--Input_File input_file_1 [set of var or gene files]
+		--Input_File input_file_2
+		...
+		--Input_File input_file_n
+		--Output_File filename
+		--Var_Type [For both file types, 'All' or any value from the varType field,
+				multiple values allowed, separated by comma]
+		--Component [Gene file specific,'All' or any value from component field of gene
+				file, multiple allowed; 'All" is default]
+		--Impact All [Gene file specific,'All' or any value from impact field of gene
+				file, multiple allowed; 'All" is default]
+		--Scores [1.x var file specific, yes|no, yes is default]
+		--Scores_VAF [2.0 var file specific, yes|no, yes is default]
+		--Scores_EAF [2.0 var file specific, yes|no, yes is default]
+		--Score_Qualities [yes|no, yes is default]
+		eg
+		perl List_Unique_Variants_2_0_11.pl \
+		--File_Type V \
+		--Input_File /Yoruban_Trio_1100_37/GS19238-1100-37/GS00028-DNA_A01/ASM/gene-GS19238-1100-37-ASM.tsv.bz2 \
+		--Input_File /Yoruban_Trio_1100_37/GS19239-1100-37/GS00028-DNA_B01/ASM/gene-GS19239-1100-37-ASM.tsv.bz2 \
+		--Input_File /Yoruban_Trio_1100_37/GS19240-1100-37/GS00028-DNA_C01/ASM/gene-GS19240-1100-37-ASM.tsv.bz2 \
+		--Output_File /Users/rtearle/Documents/TBF/YRI_Trio_Protein_Coding.tsv \
+		--Var_Type All
+		--Component All
+		--Impact All
+		--Scores_VAF yes \
+		--Scores_EAF yes \
+		--Score_Qualities yes
+	
+		var fields
+		1.x	locus ploidy haplotype chromosome begin end varType reference alleleSeq
+				totalScore hapLink xRef
+		2.0	locus ploidy allele chromosome begin end varType reference alleleSeq
+				varScoreVAF varScoreEAF varQuality hapLink xRef
+	
+		gene fields
+		1.x index locus allele chromosome begin end varType reference call xRef geneId
+				mrnaAcc proteinAcc symbol orientation component componentIndex
+				codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence
+				sampleSequence genomeRefSequence
+		2.0 index locus allele chromosome begin end varType reference call xRef geneId
+				mrnaAcc proteinAcc symbol orientation component componentIndex hasCodingRegion
+				impact nucleotidePos proteinPos annotationRefSequence sampleSequence
+				genomeRefSequence pfam
+	
+		Parsing and storing input parameters
+		Only input_file fields can be repeated
+		input paramaters are case insensitive
+
+
+  </help>
+</tool>