Mercurial > repos > bcrain-completegenomics > testing4
view scripts/tools/cg_scripts/List_Unique_Variants.xml @ 8:3b3d5434b66a draft
Deleted selected files
author | bcrain-completegenomics |
---|---|
date | Wed, 13 Jun 2012 14:05:50 -0400 |
parents | 951ae80a19fc |
children |
line wrap: on
line source
<tool id="pl_listuniquevariants" name="List_Unique_Variants" version="0.0.1"> <description>with annotations from gene or var files</description> <!--adds description in toolbar--> <command interpreter="perl"> <!--run executable--> #if $file_types.file_type =="var2" List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output --Var_Type $file_types.variants $file_types.scoresVAF $file_types.scoresEAF $file_types.varQuality #if $file_types.data_sources.data_source == "in" #for $v in $file_types.data_sources.varfiles <!--get each var file--> --Input_File ${v.input} #end for #else `cat $file_types.data_sources.varlist` #end if #else if $file_types.file_type =="var1" List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output --Var_Type $file_types.variants $file_types.scores #if $file_types.data_sources.data_source == "in" #for $v in $file_types.data_sources.varfiles <!--get each var file--> --Input_File ${v.input} #end for #else `cat $file_types.data_sources.varlist` #end if #else if $file_types.file_type =="gene" List_Unique_Variants_2_1_0.pl --File_Type G --Output_File $output --Var_Type $file_types.variants --Component $file_types.component --Impact $file_types.impact #if $file_types.data_sources.data_source == "in" #for $g in $file_types.data_sources.genefiles <!--get each var file--> --Input_File ${g.input} #end for #else `cat $file_types.data_sources.genelist` #end if #end if </command> <outputs> <data format="tabular" name="output" /> </outputs> <inputs> <conditional name="file_types"> <!--form field to select file type--> <param name="file_type" type="select" label="Select the input file type"> <option value="var2" selected="True">var files, format 2.x</option> <option value="var1">var files, format 1.x</option> <option value="gene">gene files</option> </param> <when value="var2"> <!--form field to select all variant types to annotate--> <param name="variants" label="Select variant types to include" type="select" multiple="true" > <!--<validator type="no_options" message="Please select at least one variant type."/>--> <option value="All" selected="true">All</option> <option value="snp">snp</option> <option value="ins">ins</option> <option value="del">del</option> <option value="sub">sub</option> <option value="ref">ref</option> </param> <!--form field to select varScoresVAF--> <param name="scoresVAF" type="select" label="Include varScoreVAF?"> <option value="--Scores_VAF yes" selected="true">yes</option> <option value="--Scores_VAF no">no</option> </param> <!--form field to select varScoresEAF--> <param name="scoresEAF" type="select" label="Include varScoreEAF?"> <option value="--Scores_EAF yes" selected="true">yes</option> <option value="--Scores_EAF no">no</option> </param> <!--form field to select varQuality--> <param name="varQuality" type="select" label="Include varQuality?"> <option value="--Score_Qualities yes" selected="true">yes</option> <option value="--Score_Qualities no">no</option> </param> <!--conditional to select variant file input--> <conditional name="data_sources"> <param name="data_source" type="select" label="Where are the input var files?"> <option value="in" selected="true">imported into Galaxy</option> <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> </param> <when value="in"> <!--form field to select variant files--> <repeat name="varfiles" title="Variant files"> <param name="input" type="data" format="cg_var" label="Dataset"> <validator type="unspecified_build" /> <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" metadata_name="dbkey" metadata_column="1" message="cgatools is not currently available for this build."/> </param> </repeat> </when> <when value="out"> <!--form field to select crr file--> <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/> </when> </conditional> </when> <when value="var1"> <!--form field to select all variant types to annotate--> <param name="variants" label="Select variant types to include" type="select" multiple="true" > <!--<validator type="no_options" message="Please select at least one variant type."/>--> <option value="All" selected="true">All</option> <option value="snp">snp</option> <option value="ins">ins</option> <option value="del">del</option> <option value="sub">sub</option> <option value="ref">ref</option> </param> <!--form field to select scores--> <param name="scores" type="select" label="Include totalScore?"> <option value="--Scores yes" selected="true">yes</option> <option value="--Scores no">no</option> </param> <!--conditional to select variant file input--> <conditional name="data_sources"> <param name="data_source" type="select" label="Where are the input var files?"> <option value="in" selected="true">imported into Galaxy</option> <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> </param> <when value="in"> <!--form field to select variant files--> <repeat name="varfiles" title="Variant files"> <param name="input" type="data" format="cg_var" label="Dataset"> <validator type="unspecified_build" /> <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" metadata_name="dbkey" metadata_column="1" message="cgatools is not currently available for this build."/> </param> </repeat> </when> <when value="out"> <!--form field to select crr file--> <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/> </when> </conditional> </when> <when value="gene"> <!--form field to select all variant types to annotate--> <param name="variants" label="Select variant types to include" type="select" multiple="true" > <!--<validator type="no_options" message="Please select at least one variant type."/>--> <option value="All" selected="true">All</option> <option value="snp">snp</option> <option value="ins">ins</option> <option value="del">del</option> <option value="sub">sub</option> <option value="ref">ref</option> </param> <!--form field to select component in gene file--> <param name="component" type="select" label="Select component types to include" multiple="true" > <option value="All" selected="true">All</option> <option value="CDS">CDS</option> <option value="INTRON">INTRON</option> <option value="DONOR">DONOR</option> <option value="ACCEPTOR">ACCEPTOR</option> <option value="TSS-UPSTREAM">TSS-UPSTREAM</option> <option value="SPAN5">SPAN5</option> <option value="SPAN3">SPAN3</option> <option value="SPAN">SPAN</option> <option value="UTR5">UTR5</option> <option value="UTR3">UTR3</option> <option value="UTR">UTR</option> </param> <!--form field to select impact in gene file--> <param name="impact" type="select" label="Select impact types to include" multiple="true" > <option value="All" selected="true">All</option> <option value="NO-CHANGE">NO-CHANGE</option> <option value="SYNONYMOUS">SYNONYMOUS</option> <option value="MISSENES">MISSENES</option> <option value="NONSENSE">NONSENSE</option> <option value="NONSSTOP">NONSSTOP</option> <option value="DELETE">DELETE</option> <option value="INSERT">INSERT</option> <option value="DELETE+">DELETE+</option> <option value="INSERT+">INSERT+</option> <option value="FRAMESHIFT">FRAMESHIFT</option> <option value="MISSTART">MISSTART</option> <option value="DISRUPT">DISRUPT</option> <option value="UNKNOWN-VNC">UNKNOWN-VNC</option> <option value="UNKNOWN-INC">UNKNOWN-INC</option> <option value="UNKNOWN-TR">UNKNOWN-TR</option> </param> <!--conditional to select gene file input--> <conditional name="data_sources"> <param name="data_source" type="select" label="Where are the input gene files?"> <option value="in" selected="true">imported into Galaxy</option> <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> </param> <when value="in"> <!--form field to select variant files--> <repeat name="genefiles" title="Gene files"> <param name="input" type="data" format="cg_gene" label="Dataset"> <validator type="unspecified_build" /> <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" metadata_name="dbkey" metadata_column="1" message="cgatools is not currently available for this build."/> </param> </repeat> </when> <when value="out"> <!--form field to select crr file--> <param name="genelist" type="text" label="List of gene files (/path/file)" size="200" help="file with list of gene files (/path/genefile), gene files can be compressed (gz, bz2)."/> </when> </conditional> </when> </conditional> </inputs> <help> **What it does** This tool identifies all called variants present in the var or gene files and generates annotated variant list. ----- **Instructions**:: List Unique Variants for Pipeline 1.x and 2.x [Uses header if available, checks for position of xref field if not] Take one or more var or gene files Extract a non-redundant set of variants For var files: The fields used to define non-redundant variants are are: chromosome begin end varType reference alleleSeq xRef User can nominate class(es) of varType to filter on Outputs varScoreEAF, varScoreVAF and varQuality as a default but user can turn them off (separately) Scores and qualities stored in separate fields, all values for a variant across a set of genomes. Values for different genomes separated by ':', for two hom entries for the same genome by '|' Output is accepted by testvariants to generate a variant table, all fields kept in testvariants output For gene files: The fields used to define non-redundant gene variants are: chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol orientation component componentIndex codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence sampleSequence genomeRefSequence User can nominate class(es) of varType, component or impact to filter on All gene entries kept ie multiple entries if multiple transcripts NB Now treating xref as a separate component in var recs, as it is not consistent between X and Y vars Not fixed for gene recs yet perl List_Unique_Variants_2_0_11.pl --File_Type [V|G] --Input_File input_file_1 [set of var or gene files] --Input_File input_file_2 ... --Input_File input_file_n --Output_File filename --Var_Type [For both file types, 'All' or any value from the varType field, multiple values allowed, separated by comma] --Component [Gene file specific,'All' or any value from component field of gene file, multiple allowed; 'All" is default] --Impact All [Gene file specific,'All' or any value from impact field of gene file, multiple allowed; 'All" is default] --Scores [1.x var file specific, yes|no, yes is default] --Scores_VAF [2.0 var file specific, yes|no, yes is default] --Scores_EAF [2.0 var file specific, yes|no, yes is default] --Score_Qualities [yes|no, yes is default] eg perl List_Unique_Variants_2_0_11.pl \ --File_Type V \ --Input_File /Yoruban_Trio_1100_37/GS19238-1100-37/GS00028-DNA_A01/ASM/gene-GS19238-1100-37-ASM.tsv.bz2 \ --Input_File /Yoruban_Trio_1100_37/GS19239-1100-37/GS00028-DNA_B01/ASM/gene-GS19239-1100-37-ASM.tsv.bz2 \ --Input_File /Yoruban_Trio_1100_37/GS19240-1100-37/GS00028-DNA_C01/ASM/gene-GS19240-1100-37-ASM.tsv.bz2 \ --Output_File /Users/rtearle/Documents/TBF/YRI_Trio_Protein_Coding.tsv \ --Var_Type All --Component All --Impact All --Scores_VAF yes \ --Scores_EAF yes \ --Score_Qualities yes var fields 1.x locus ploidy haplotype chromosome begin end varType reference alleleSeq totalScore hapLink xRef 2.0 locus ploidy allele chromosome begin end varType reference alleleSeq varScoreVAF varScoreEAF varQuality hapLink xRef gene fields 1.x index locus allele chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol orientation component componentIndex codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence sampleSequence genomeRefSequence 2.0 index locus allele chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol orientation component componentIndex hasCodingRegion impact nucleotidePos proteinPos annotationRefSequence sampleSequence genomeRefSequence pfam Parsing and storing input parameters Only input_file fields can be repeated input paramaters are case insensitive </help> </tool>