Mercurial > repos > bcrain-completegenomics > testing4
diff scripts/tools/cg_scripts/List_Unique_Variants.xml @ 0:951ae80a19fc draft
Uploaded
author | bcrain-completegenomics |
---|---|
date | Tue, 12 Jun 2012 14:42:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/tools/cg_scripts/List_Unique_Variants.xml Tue Jun 12 14:42:04 2012 -0400 @@ -0,0 +1,323 @@ +<tool id="pl_listuniquevariants" name="List_Unique_Variants" version="0.0.1"> + + <description>with annotations from gene or var files</description> <!--adds description in toolbar--> + + <command interpreter="perl"> <!--run executable--> + #if $file_types.file_type =="var2" + List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output + --Var_Type $file_types.variants + $file_types.scoresVAF + $file_types.scoresEAF + $file_types.varQuality + #if $file_types.data_sources.data_source == "in" + #for $v in $file_types.data_sources.varfiles <!--get each var file--> + --Input_File ${v.input} + #end for + #else + `cat $file_types.data_sources.varlist` + #end if + + #else if $file_types.file_type =="var1" + List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output + --Var_Type $file_types.variants + $file_types.scores + #if $file_types.data_sources.data_source == "in" + #for $v in $file_types.data_sources.varfiles <!--get each var file--> + --Input_File ${v.input} + #end for + #else + `cat $file_types.data_sources.varlist` + #end if + + #else if $file_types.file_type =="gene" + List_Unique_Variants_2_1_0.pl --File_Type G --Output_File $output + --Var_Type $file_types.variants + --Component $file_types.component + --Impact $file_types.impact + #if $file_types.data_sources.data_source == "in" + #for $g in $file_types.data_sources.genefiles <!--get each var file--> + --Input_File ${g.input} + #end for + #else + `cat $file_types.data_sources.genelist` + #end if + #end if + </command> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + + <inputs> + <conditional name="file_types"> + <!--form field to select file type--> + <param name="file_type" type="select" label="Select the input file type"> + <option value="var2" selected="True">var files, format 2.x</option> + <option value="var1">var files, format 1.x</option> + <option value="gene">gene files</option> + </param> + + <when value="var2"> + <!--form field to select all variant types to annotate--> + <param name="variants" label="Select variant types to include" type="select" multiple="true" > + <!--<validator type="no_options" message="Please select at least one variant type."/>--> + <option value="All" selected="true">All</option> + <option value="snp">snp</option> + <option value="ins">ins</option> + <option value="del">del</option> + <option value="sub">sub</option> + <option value="ref">ref</option> + </param> + + <!--form field to select varScoresVAF--> + <param name="scoresVAF" type="select" label="Include varScoreVAF?"> + <option value="--Scores_VAF yes" selected="true">yes</option> + <option value="--Scores_VAF no">no</option> + </param> + <!--form field to select varScoresEAF--> + <param name="scoresEAF" type="select" label="Include varScoreEAF?"> + <option value="--Scores_EAF yes" selected="true">yes</option> + <option value="--Scores_EAF no">no</option> + </param> + <!--form field to select varQuality--> + <param name="varQuality" type="select" label="Include varQuality?"> + <option value="--Score_Qualities yes" selected="true">yes</option> + <option value="--Score_Qualities no">no</option> + </param> + + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input var files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <repeat name="varfiles" title="Variant files"> + <param name="input" type="data" format="cg_var" label="Dataset"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + + <when value="var1"> + <!--form field to select all variant types to annotate--> + <param name="variants" label="Select variant types to include" type="select" multiple="true" > + <!--<validator type="no_options" message="Please select at least one variant type."/>--> + <option value="All" selected="true">All</option> + <option value="snp">snp</option> + <option value="ins">ins</option> + <option value="del">del</option> + <option value="sub">sub</option> + <option value="ref">ref</option> + </param> + + <!--form field to select scores--> + <param name="scores" type="select" label="Include totalScore?"> + <option value="--Scores yes" selected="true">yes</option> + <option value="--Scores no">no</option> + </param> + + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input var files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <repeat name="varfiles" title="Variant files"> + <param name="input" type="data" format="cg_var" label="Dataset"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + + <when value="gene"> + <!--form field to select all variant types to annotate--> + <param name="variants" label="Select variant types to include" type="select" multiple="true" > + <!--<validator type="no_options" message="Please select at least one variant type."/>--> + <option value="All" selected="true">All</option> + <option value="snp">snp</option> + <option value="ins">ins</option> + <option value="del">del</option> + <option value="sub">sub</option> + <option value="ref">ref</option> + </param> + + <!--form field to select component in gene file--> + <param name="component" type="select" label="Select component types to include" multiple="true" > + <option value="All" selected="true">All</option> + <option value="CDS">CDS</option> + <option value="INTRON">INTRON</option> + <option value="DONOR">DONOR</option> + <option value="ACCEPTOR">ACCEPTOR</option> + <option value="TSS-UPSTREAM">TSS-UPSTREAM</option> + <option value="SPAN5">SPAN5</option> + <option value="SPAN3">SPAN3</option> + <option value="SPAN">SPAN</option> + <option value="UTR5">UTR5</option> + <option value="UTR3">UTR3</option> + <option value="UTR">UTR</option> + </param> + + <!--form field to select impact in gene file--> + <param name="impact" type="select" label="Select impact types to include" multiple="true" > + <option value="All" selected="true">All</option> + <option value="NO-CHANGE">NO-CHANGE</option> + <option value="SYNONYMOUS">SYNONYMOUS</option> + <option value="MISSENES">MISSENES</option> + <option value="NONSENSE">NONSENSE</option> + <option value="NONSSTOP">NONSSTOP</option> + <option value="DELETE">DELETE</option> + <option value="INSERT">INSERT</option> + <option value="DELETE+">DELETE+</option> + <option value="INSERT+">INSERT+</option> + <option value="FRAMESHIFT">FRAMESHIFT</option> + <option value="MISSTART">MISSTART</option> + <option value="DISRUPT">DISRUPT</option> + <option value="UNKNOWN-VNC">UNKNOWN-VNC</option> + <option value="UNKNOWN-INC">UNKNOWN-INC</option> + <option value="UNKNOWN-TR">UNKNOWN-TR</option> + </param> + + <!--conditional to select gene file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input gene files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <repeat name="genefiles" title="Gene files"> + <param name="input" type="data" format="cg_gene" label="Dataset"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="genelist" type="text" label="List of gene files (/path/file)" size="200" help="file with list of gene files (/path/genefile), gene files can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + + </conditional> + </inputs> + + + <help> + +**What it does** + +This tool identifies all called variants present in the var or gene files and generates annotated variant list. + +----- + +**Instructions**:: + + List Unique Variants for Pipeline 1.x and 2.x + [Uses header if available, checks for position of xref field if not] + Take one or more var or gene files + Extract a non-redundant set of variants + + For var files: + The fields used to define non-redundant variants are are: + chromosome begin end varType reference alleleSeq xRef + User can nominate class(es) of varType to filter on + Outputs varScoreEAF, varScoreVAF and varQuality as a default but user can turn + them off (separately) + Scores and qualities stored in separate fields, all values for a variant across + a set of genomes. + Values for different genomes separated by ':', for two hom entries for the same + genome by '|' + Output is accepted by testvariants to generate a variant table, all fields kept + in testvariants output + + For gene files: + The fields used to define non-redundant gene variants are: + chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol + orientation component componentIndex codingRegionKnown impact nucleotidePos + proteinPos annotationRefSequence sampleSequence genomeRefSequence + User can nominate class(es) of varType, component or impact to filter on + All gene entries kept ie multiple entries if multiple transcripts + + NB Now treating xref as a separate component in var recs, as it is not consistent + between X and Y vars + Not fixed for gene recs yet + + perl List_Unique_Variants_2_0_11.pl + --File_Type [V|G] + --Input_File input_file_1 [set of var or gene files] + --Input_File input_file_2 + ... + --Input_File input_file_n + --Output_File filename + --Var_Type [For both file types, 'All' or any value from the varType field, + multiple values allowed, separated by comma] + --Component [Gene file specific,'All' or any value from component field of gene + file, multiple allowed; 'All" is default] + --Impact All [Gene file specific,'All' or any value from impact field of gene + file, multiple allowed; 'All" is default] + --Scores [1.x var file specific, yes|no, yes is default] + --Scores_VAF [2.0 var file specific, yes|no, yes is default] + --Scores_EAF [2.0 var file specific, yes|no, yes is default] + --Score_Qualities [yes|no, yes is default] + eg + perl List_Unique_Variants_2_0_11.pl \ + --File_Type V \ + --Input_File /Yoruban_Trio_1100_37/GS19238-1100-37/GS00028-DNA_A01/ASM/gene-GS19238-1100-37-ASM.tsv.bz2 \ + --Input_File /Yoruban_Trio_1100_37/GS19239-1100-37/GS00028-DNA_B01/ASM/gene-GS19239-1100-37-ASM.tsv.bz2 \ + --Input_File /Yoruban_Trio_1100_37/GS19240-1100-37/GS00028-DNA_C01/ASM/gene-GS19240-1100-37-ASM.tsv.bz2 \ + --Output_File /Users/rtearle/Documents/TBF/YRI_Trio_Protein_Coding.tsv \ + --Var_Type All + --Component All + --Impact All + --Scores_VAF yes \ + --Scores_EAF yes \ + --Score_Qualities yes + + var fields + 1.x locus ploidy haplotype chromosome begin end varType reference alleleSeq + totalScore hapLink xRef + 2.0 locus ploidy allele chromosome begin end varType reference alleleSeq + varScoreVAF varScoreEAF varQuality hapLink xRef + + gene fields + 1.x index locus allele chromosome begin end varType reference call xRef geneId + mrnaAcc proteinAcc symbol orientation component componentIndex + codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence + sampleSequence genomeRefSequence + 2.0 index locus allele chromosome begin end varType reference call xRef geneId + mrnaAcc proteinAcc symbol orientation component componentIndex hasCodingRegion + impact nucleotidePos proteinPos annotationRefSequence sampleSequence + genomeRefSequence pfam + + Parsing and storing input parameters + Only input_file fields can be repeated + input paramaters are case insensitive + + + </help> +</tool>