view scripts/tools/cg_scripts/List_Unique_Variants.xml @ 8:3b3d5434b66a draft

Deleted selected files
author bcrain-completegenomics
date Wed, 13 Jun 2012 14:05:50 -0400
parents 951ae80a19fc
children
line wrap: on
line source

<tool id="pl_listuniquevariants" name="List_Unique_Variants" version="0.0.1">

  <description>with annotations from gene or var files</description> <!--adds description in toolbar-->
  
  <command interpreter="perl"> <!--run executable-->
		#if $file_types.file_type =="var2" 
			List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output 
			--Var_Type $file_types.variants 
			$file_types.scoresVAF 
			$file_types.scoresEAF 
			$file_types.varQuality
			#if $file_types.data_sources.data_source == "in" 
				#for $v in $file_types.data_sources.varfiles <!--get each var file-->
				--Input_File ${v.input}
				#end for
			#else
				`cat $file_types.data_sources.varlist`
			#end if
	
		#else if $file_types.file_type =="var1"
			List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output 
			--Var_Type $file_types.variants 
			$file_types.scores
			#if $file_types.data_sources.data_source == "in" 
				#for $v in $file_types.data_sources.varfiles <!--get each var file-->
				--Input_File ${v.input}
				#end for
			#else
				`cat $file_types.data_sources.varlist`
			#end if
	
		#else if $file_types.file_type =="gene"
			List_Unique_Variants_2_1_0.pl --File_Type G --Output_File $output 
			--Var_Type $file_types.variants 
			--Component $file_types.component 
			--Impact $file_types.impact 
			#if $file_types.data_sources.data_source == "in" 
				#for $g in $file_types.data_sources.genefiles <!--get each var file-->
				--Input_File ${g.input}
				#end for
			#else
				`cat $file_types.data_sources.genelist`
			#end if
		#end if
  </command>

  <outputs>
    <data format="tabular" name="output" />
  </outputs>

  <inputs>
    <conditional name="file_types">
			<!--form field to select file type-->
			<param name="file_type" type="select" label="Select the input file type">
				<option value="var2" selected="True">var files, format 2.x</option>
				<option value="var1">var files, format 1.x</option>
				<option value="gene">gene files</option>
			</param>
			
			<when value="var2">
				<!--form field to select all variant types to annotate-->
				<param name="variants" label="Select variant types to include" type="select" multiple="true" >
					<!--<validator type="no_options" message="Please select at least one variant type."/>-->
					<option value="All" selected="true">All</option>
					<option value="snp">snp</option>
					<option value="ins">ins</option>
					<option value="del">del</option>
					<option value="sub">sub</option>
					<option value="ref">ref</option>
				</param>
 
				<!--form field to select varScoresVAF-->
				<param name="scoresVAF" type="select" label="Include varScoreVAF?">
					<option value="--Scores_VAF yes" selected="true">yes</option>
					<option value="--Scores_VAF no">no</option>
				</param>
				<!--form field to select varScoresEAF-->
				<param name="scoresEAF" type="select" label="Include varScoreEAF?">
					<option value="--Scores_EAF yes" selected="true">yes</option>
					<option value="--Scores_EAF no">no</option>
				</param>
				<!--form field to select varQuality-->
				<param name="varQuality" type="select" label="Include varQuality?">
					<option value="--Score_Qualities yes" selected="true">yes</option>
					<option value="--Score_Qualities no">no</option>
				</param>
				
				<!--conditional to select variant file input-->
				<conditional name="data_sources">
					<param name="data_source" type="select" label="Where are the input var files?">
						<option value="in" selected="true">imported into Galaxy</option>
						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
					</param>
					<when value="in">
						<!--form field to select variant files-->
						<repeat name="varfiles" title="Variant files">
							<param name="input" type="data" format="cg_var" label="Dataset">
								<validator type="unspecified_build" />
								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
								 metadata_name="dbkey" metadata_column="1"
								 message="cgatools is not currently available for this build."/>
							</param>
						</repeat>
					</when>
					<when value="out">
						<!--form field to select crr file-->
						<param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
					</when>
				</conditional>
			</when>
			
			<when value="var1">
				<!--form field to select all variant types to annotate-->
				<param name="variants" label="Select variant types to include" type="select" multiple="true" >
					<!--<validator type="no_options" message="Please select at least one variant type."/>-->
					<option value="All" selected="true">All</option>
					<option value="snp">snp</option>
					<option value="ins">ins</option>
					<option value="del">del</option>
					<option value="sub">sub</option>
					<option value="ref">ref</option>
				</param>
 
				<!--form field to select scores-->
				<param name="scores" type="select" label="Include totalScore?">
					<option value="--Scores yes" selected="true">yes</option>
					<option value="--Scores no">no</option>
				</param>
				
				<!--conditional to select variant file input-->
				<conditional name="data_sources">
					<param name="data_source" type="select" label="Where are the input var files?">
						<option value="in" selected="true">imported into Galaxy</option>
						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
					</param>
					<when value="in">
						<!--form field to select variant files-->
						<repeat name="varfiles" title="Variant files">
							<param name="input" type="data" format="cg_var" label="Dataset">
								<validator type="unspecified_build" />
								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
								 metadata_name="dbkey" metadata_column="1"
								 message="cgatools is not currently available for this build."/>
							</param>
						</repeat>
					</when>
					<when value="out">
						<!--form field to select crr file-->
						<param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
					</when>
				</conditional>
			</when>

			<when value="gene">
				<!--form field to select all variant types to annotate-->
				<param name="variants" label="Select variant types to include" type="select" multiple="true" >
					<!--<validator type="no_options" message="Please select at least one variant type."/>-->
					<option value="All" selected="true">All</option>
					<option value="snp">snp</option>
					<option value="ins">ins</option>
					<option value="del">del</option>
					<option value="sub">sub</option>
					<option value="ref">ref</option>
				</param>
 
				<!--form field to select component in gene file-->
				<param name="component" type="select" label="Select component types to include" multiple="true" >
				  <option value="All" selected="true">All</option>
					<option value="CDS">CDS</option>
					<option value="INTRON">INTRON</option>
					<option value="DONOR">DONOR</option>
					<option value="ACCEPTOR">ACCEPTOR</option>
					<option value="TSS-UPSTREAM">TSS-UPSTREAM</option>
					<option value="SPAN5">SPAN5</option>
					<option value="SPAN3">SPAN3</option>
					<option value="SPAN">SPAN</option>
					<option value="UTR5">UTR5</option>
					<option value="UTR3">UTR3</option>
					<option value="UTR">UTR</option>
				</param>
				
				<!--form field to select impact in gene file-->
				<param name="impact" type="select" label="Select impact types to include" multiple="true" >
				  <option value="All" selected="true">All</option>
					<option value="NO-CHANGE">NO-CHANGE</option>
					<option value="SYNONYMOUS">SYNONYMOUS</option>
					<option value="MISSENES">MISSENES</option>
					<option value="NONSENSE">NONSENSE</option>
					<option value="NONSSTOP">NONSSTOP</option>
					<option value="DELETE">DELETE</option>
					<option value="INSERT">INSERT</option>
					<option value="DELETE+">DELETE+</option>
					<option value="INSERT+">INSERT+</option>
					<option value="FRAMESHIFT">FRAMESHIFT</option>
					<option value="MISSTART">MISSTART</option>
					<option value="DISRUPT">DISRUPT</option>
					<option value="UNKNOWN-VNC">UNKNOWN-VNC</option>
					<option value="UNKNOWN-INC">UNKNOWN-INC</option>
					<option value="UNKNOWN-TR">UNKNOWN-TR</option>
				</param>
				
				<!--conditional to select gene file input-->
				<conditional name="data_sources">
					<param name="data_source" type="select" label="Where are the input gene files?">
						<option value="in" selected="true">imported into Galaxy</option>
						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
					</param>
					<when value="in">
						<!--form field to select variant files-->
						<repeat name="genefiles" title="Gene files">
							<param name="input" type="data" format="cg_gene" label="Dataset">
								<validator type="unspecified_build" />
								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
								 metadata_name="dbkey" metadata_column="1"
								 message="cgatools is not currently available for this build."/>
							</param>
						</repeat>
					</when>
					<when value="out">
						<!--form field to select crr file-->
						<param name="genelist" type="text" label="List of gene files (/path/file)" size="200" help="file with list of gene files (/path/genefile), gene files can be compressed (gz, bz2)."/>
					</when>
				</conditional>
			</when>
			
		</conditional>	
  </inputs>


  <help>

**What it does**

This tool identifies all called variants present in the var or gene files and generates annotated variant list.

-----

**Instructions**::

		List Unique Variants for Pipeline 1.x and 2.x
		[Uses header if available, checks for position of xref field if not]
		Take one or more var or gene files
		Extract a non-redundant set of variants
	
		For var files:
		The fields used to define non-redundant variants are are:
			chromosome begin end varType reference alleleSeq xRef
		User can nominate class(es) of varType to filter on
		Outputs varScoreEAF, varScoreVAF and varQuality as a default but user can turn
			them off (separately)
		Scores and qualities stored in separate fields, all values for a variant across
			a set of genomes.
		Values for different genomes separated by ':', for two hom entries for the same
			genome by '|'
		Output is accepted by testvariants to generate a variant table, all fields kept
			in testvariants output
	
		For gene files:
		The fields used to define non-redundant gene variants are:
			chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol
			orientation component componentIndex codingRegionKnown impact nucleotidePos
			proteinPos annotationRefSequence sampleSequence genomeRefSequence
		User can nominate class(es) of varType, component or impact to filter on
		All gene entries kept ie  multiple entries if multiple transcripts
	
		NB Now treating xref as a separate component in var recs, as it is not consistent
			between X and Y vars
		Not fixed for gene recs yet
	
		perl List_Unique_Variants_2_0_11.pl
		--File_Type [V|G]
		--Input_File input_file_1 [set of var or gene files]
		--Input_File input_file_2
		...
		--Input_File input_file_n
		--Output_File filename
		--Var_Type [For both file types, 'All' or any value from the varType field,
				multiple values allowed, separated by comma]
		--Component [Gene file specific,'All' or any value from component field of gene
				file, multiple allowed; 'All" is default]
		--Impact All [Gene file specific,'All' or any value from impact field of gene
				file, multiple allowed; 'All" is default]
		--Scores [1.x var file specific, yes|no, yes is default]
		--Scores_VAF [2.0 var file specific, yes|no, yes is default]
		--Scores_EAF [2.0 var file specific, yes|no, yes is default]
		--Score_Qualities [yes|no, yes is default]
		eg
		perl List_Unique_Variants_2_0_11.pl \
		--File_Type V \
		--Input_File /Yoruban_Trio_1100_37/GS19238-1100-37/GS00028-DNA_A01/ASM/gene-GS19238-1100-37-ASM.tsv.bz2 \
		--Input_File /Yoruban_Trio_1100_37/GS19239-1100-37/GS00028-DNA_B01/ASM/gene-GS19239-1100-37-ASM.tsv.bz2 \
		--Input_File /Yoruban_Trio_1100_37/GS19240-1100-37/GS00028-DNA_C01/ASM/gene-GS19240-1100-37-ASM.tsv.bz2 \
		--Output_File /Users/rtearle/Documents/TBF/YRI_Trio_Protein_Coding.tsv \
		--Var_Type All
		--Component All
		--Impact All
		--Scores_VAF yes \
		--Scores_EAF yes \
		--Score_Qualities yes
	
		var fields
		1.x	locus ploidy haplotype chromosome begin end varType reference alleleSeq
				totalScore hapLink xRef
		2.0	locus ploidy allele chromosome begin end varType reference alleleSeq
				varScoreVAF varScoreEAF varQuality hapLink xRef
	
		gene fields
		1.x index locus allele chromosome begin end varType reference call xRef geneId
				mrnaAcc proteinAcc symbol orientation component componentIndex
				codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence
				sampleSequence genomeRefSequence
		2.0 index locus allele chromosome begin end varType reference call xRef geneId
				mrnaAcc proteinAcc symbol orientation component componentIndex hasCodingRegion
				impact nucleotidePos proteinPos annotationRefSequence sampleSequence
				genomeRefSequence pfam
	
		Parsing and storing input parameters
		Only input_file fields can be repeated
		input paramaters are case insensitive


  </help>
</tool>