Mercurial > repos > bcrain-completegenomics > testing_cgatools
changeset 6:8a3d71feeb48 draft
Uploaded
author | bcrain-completegenomics |
---|---|
date | Wed, 13 Jun 2012 17:31:27 -0400 |
parents | 51fea6716ea5 |
children | af4c3bfbfc68 |
files | cgatools/tools/cgatools/join.xml cgatools/tools/cgatools/junctiondiff.xml cgatools/tools/cgatools/listtestvariants.xml cgatools/tools/cgatools/listvariants.xml cgatools/tools/cgatools/snpdiff.xml cgatools/tools/cgatools/testvariants.xml cgatools/tools/cgatools/varfilter.xml cgatools/tools/cgatools/varfilter_wrapper.pl |
diffstat | 8 files changed, 1353 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgatools/tools/cgatools/join.xml Wed Jun 13 17:31:27 2012 -0400 @@ -0,0 +1,206 @@ +<tool id="cga_join" name="join(beta)" version="0.0.1"> + + <description>two tsv files based on equal fields or overlapping regions.</description> <!--adds description in toolbar--> + + <requirements> + <requirement type="binary">cgatools</requirement> + </requirements> + + <command> <!--run executable--> + cgatools join --beta + --input $inputA + --input $inputB + --output $output + --output-mode $outmode + $dump + --select $col + #for $m in $matches <!--get all matched columns--> + --match ${m.match} + #end for + #if $range_overlap.range == 'yes' + #for $o in $range_overlap.overlaps <!--get all matched columns--> + --overlap ${o.overlap} + #end for + --overlap-mode $range_overlap.overlapmode + --overlap-fraction-A $range_overlap.fractionA + --boundary-uncertainty-A $range_overlap.boundaryA + --overlap-fraction-B $range_overlap.fractionB + --boundary-uncertainty-B $range_overlap.boundaryB + #end if + </command> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + + <inputs> + <!--form field to select input file A--> + <param name="inputA" type="data" format="tabular" label="Select input file A "> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="0" + message="cgatools is not currently available for this build."/> + </param> + + <!--form field to select input file B--> + <param name="inputB" type="data" format="tabular" label="Select input file B "> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="0" + message="cgatools is not currently available for this build."/> + </param> + + <!--form field to specify columns to print--> + <param name="col" type="text" value="A.*,B.*" size="40" label="Specify columns for output" help="The default value A.*,B.* prints all columns from both files, other selections enter in the format A.col_name1,A.col_name3,B.col_name1" /> + + <!--form field to select output-mode--> + <param name="outmode" type="select" label="Select output mode"> + <option value="full" selected="true">full (1 line for each match of records in A and B)</option> + <option value="compact">compact (1 line for each record in A, joining multiple records in B by semicolon)</option> + <option value="compact-pct">compact-pct (same as compact, annotated with % overlap)</option> + </param> + + <!--form field to select columns to match--> + <param name="dump" type="select" label="Select records to print"> + <option value="--always-dump" selected="true">print all records of A even if not matched in B</option> + <option value="">print only records of A that are matched in B</option> + </param> + + <!--form field to specify columns to match--> + <repeat name="matches" title="Exact match column"> + <param name="match" type="text" size="40" label="Enter column:column" help="Enter column_from_A:column_from_B, e.g. chromosome:chromosome"/> + </repeat> + + <conditional name="range_overlap"> + <param name="range" type="select" label="Do you want to match columns by overlapping range?"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + + <when value="yes"> + <!--form field to specify columns to overlap--> + <repeat name="overlaps" title="Range column"> + <param name="overlap" type="text" size="40" label="Enter column[,column]:column[,column]" help="Enter range_start_from_A[,range_stop_from_A]:range_start_from_B[,range_stop_from_B], e.g. begin,end:begin,end (overlapping range of positions) or begin,end:position"/> + </repeat> + + <!--form field to select overlap-mode--> + <param name="overlapmode" type="select" label="Select overlap mode"> + <option value="strict" selected="true">strict (overlap if A.begin<B.end and B.begin>A.end)</option> + <option value="allow-abutting-points">allow-abutting-points (overlap if A.begin<B.end and B.begin>A.end, or if A.begin<=B.end and B.begin<=A.end and either A or B has zero length.)</option> + </param> + + <!--form fields to overlap options--> + <param name="fractionA" type="integer" value="0" label="Minimum fraction of A region overlap " /> + <param name="boundaryA" type="integer" value="0" label="Boundary uncertainty for A for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-A * (A-range-length - boundary-uncertainty-A)"/> + + <param name="fractionB" type="integer" value="0" label="Minimum fraction of B region overlap " /> + <param name="boundaryB" type="integer" value="0" label="Boundary uncertainty for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-B * (B-range-length - boundary-uncertainty-B)"/> + </when> + </conditional> + </inputs> + + <help> + +**What it does** + +This tool joins two tab-delimited files based on equal fields or overlapping regions. + +cgatools: http://sourceforge.net/projects/cgatools/files/ + +----- + +**cgatools Manual**:: + + COMMAND NAME + join - Joins two tab-delimited files based on equal fields or overlapping regions. + + DESCRIPTION + Joins two tab-delimited files based on equal fields or overlapping regions. + By default, an output record is produced for each match found between file + A and file B, but output format can be controlled by the --output-mode + parameter. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --input arg + File name to use as input (may be passed in as arguments at the end of + the command), or omitted for stdin). There must be exactly two input + files to join. If only one file is specified by name, file A is taken + to be stdin and file B is the named file. File B is read fully into + memory, and file A is streamed. File A's columns appear first in the + output. + + --output arg (=STDOUT) + The output file name (may be omitted for stdout). + + --match arg + A match specification, which is a column from A and a column from B + separated by a colon. + + --overlap arg + Overlap specification. An overlap specification consists of a range + definition for files A and B, separated by a colon. A range definition + may be two columns, in which case they are interpreted as the beginning + and end of the range. Or it may be one column, in which case the range + is defined as the 1-base range starting at the given value. The records + from the two files must overlap in order to be considered for output. + Two ranges are considered to overlap if the overlap is at least one + base long, or if one of the ranges is length 0 and the ranges overlap + or abut. For example, "begin,end:offset" will match wherever end-begin + > 0, begin<offset+1, and end>offset, or wherever end-begin = 0, + begin<=offset+1, and end>=offset. + + + -m [ --output-mode ] arg (=full) + Output mode, one of the following: + full Print an output record for each match found between + file A and file B. + compact Print at most one record for each record of file A, + joining the file B values by a semicolon and + suppressing repeated B values and empty B values. + compact-pct Same as compact, but for each distinct B value, + annotate with the percentage of the A record that is + overlapped by B records with that B value. Percentage + is rounded up to nearest integer. + + --overlap-mode arg (=strict) + Overlap mode, one of the following: + strict Range A and B overlap if A.begin < B.end and + B.begin < A.end. + allow-abutting-points Range A and B overlap they meet the strict + requirements, or if A.begin <= B.end and + B.begin <= A.end and either A or B has zero + length. + + --select arg (=A.*,B.*) + Set of fields to select for output. + + -a [ --always-dump ] + Dump every record of A, even if there are no matches with file B. + + --overlap-fraction-A arg (=0) + Minimum fraction of A region overlap for filtering output. + + --boundary-uncertainty-A arg (=0) + Boundary uncertainty for overlap filtering. Specifically, records + failing the following predicate are filtered away: overlap >= + overlap-fraction-A * ( A-range-length - boundary-uncertainty-A ) + + --overlap-fraction-B arg (=0) + Minimum fraction of B region overlap for filtering output. + + --boundary-uncertainty-B arg (=0) + Boundary uncertainty for overlap filtering. Specifically, records + failing the following predicate are filtered away: overlap >= + overlap-fraction-B * ( B-range-length - boundary-uncertainty-B ) + + SUPPORTED FORMAT_VERSION + Any + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgatools/tools/cgatools/junctiondiff.xml Wed Jun 13 17:31:27 2012 -0400 @@ -0,0 +1,146 @@ +<tool id="cga_junctiondiff" name="junctiondiff(beta)" version="0.0.1"> + + <description>reports difference between junction calls</description> <!--adds description in toolbar--> + + <requirements> + <requirement type="binary">cgatools</requirement> + </requirements> + + <command> <!--run executable--> + cgatools junctiondiff --beta + --reference $crr.fields.path + --junctionsA $data_sources.inputA + --junctionsB $data_sources.inputB + --scoreThresholdA $scoreA + --scoreThresholdB $scoreB + --distance $distance + --minlength $minlength + $stat + --output-prefix cg_ + ; + mv cg_diff-*tsv cg_diff.tsv + </command> + + <outputs> + <data format="tabular" name="output1" from_work_dir="cg_diff.tsv" label="${tool.name} on ${on_string}: diff"/> + <data format="tabular" name="output2" from_work_dir="cg_report.tsv" label="${tool.name} on ${on_string}: report"> + <filter>(stat == '--statout')</filter> + </data> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Genome build"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input junction files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <param name="inputA" type="data" format="tabluar" label="Junction file A"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + <param name="inputB" type="data" format="tabluar" label="Junction file B"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + <when value="out"> + <!--form field to enter external input files--> + <param name="inputA" type="text" label="Junction file A (path/file_name)" size="40" help="Junction file can be compressed (gz, bz2)."/> + <param name="inputB" type="text" label="Junction file B (path/file_name)" size="40" help="Junction file can be compressed (gz, bz2)."/> + </when> + </conditional> + + <!--form field to select stats output--> + <param name="stat" type="select" label="Print input file stats"> + <option value="">no</option> + <option value="--statout">yes</option> + </param> + + <param name="scoreA" type="integer" label="Score threshold value for input file A (default 10)" value="10"/> + <param name="scoreB" type="integer" label="Score threshold value for input file B (default 0)" value="0"/> + <param name="distance" type="integer" label="Max distance between coordinates of potentially compatible junctions (default 200)" value="200"/> + <param name="minlength" type="integer" label="Minimum deletion junction length to be included into the difference file (default 500)" value="500"/> + </inputs> + + + <help> + +**What it does** + +This tool reports difference between junction calls of Complete Genomics junctions files + +cgatools: http://sourceforge.net/projects/cgatools/files/ + +----- + +**cgatools Manual**:: + + COMMAND NAME + junctiondiff - Reports difference between junction calls of Complete Genomics junctions files. + + DESCRIPTION + junctiondiff takes two junction files A and B as input and produces the + following output: + - "diff-inputFileName" - the junctions from an input file A that are not + present in input file B. + - "report.txt" - a brief summary report (if --statout is used) + + Two junctions are considered equivalent if: + - they come from different files + - left and right positions of one junction are not more than "--distance" + bases apart from the corresponding positions of another junction + - the junction scores are equal or above the scoreThreshold + - they are on the same strands + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + -s [ --reference ] arg + Reference file. + + -a [ --junctionsA ] arg + input junction file A. + + -b [ --junctionsB ] arg + input junction file B. + + -A [ --scoreThresholdA ] arg (=10) + score threshold value for the input file A. + + -B [ --scoreThresholdB ] arg (=0) + score threshold value for the input file B. + + -d [ --distance ] arg (=200) + Max distance between coordinates of potentially compatible junctions. + + -l [ --minlength ] arg (=500) + Minimum deletion junction length to be included into the difference + file. + + -o [ --output-prefix ] arg + The path prefix for all the output reports. + + -S [ --statout ] + (Debug) Report various input file statistics. Experimental feature. + + SUPPORTED FORMAT_VERSION + 1.5 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgatools/tools/cgatools/listtestvariants.xml Wed Jun 13 17:31:27 2012 -0400 @@ -0,0 +1,239 @@ +<tool id="cga_listtestvariants" name="listvariants(beta)-testvariants(beta)" version="1.0.1"> +<!-- +This tool creates a GUI for cgatools listvariants and testvariants from Complete Genomics, Inc. +to be run consecutively with the same input files. +written 5-29-2012 by bcrain@completegenomics.com +--> + + <description></description> <!--adds description in toolbar--> + + <requirements> + <requirement type="binary">cgatools</requirement> + </requirements> + + <command> <!--run executable--> + cgatools listvariants + --beta + --reference ${crr.fields.path} + --output $output1 + #if $include_list.listing == "yes" <!--only added when yes--> + --variant-listing $include_list.list + #end if + $longvar + --variants + #if $file_types.data_sources.data_source == "in" + #for $v in $file_types.data_sources.varfiles <!--get each var file--> + ${v.input} + #end for + #else + `cat $file_types.data_sources.varlist` + #end if + ; + + cgatools testvariants + --beta + --reference ${crr.fields.path} + --output $output2 + --input $output1 + --variants + #if $file_types.data_sources.data_source == "in" + #for $v in $file_types.data_sources.varfiles <!--get each var/mastervar file--> + ${v.input} + #end for + #else + `cat $file_types.data_sources.varlist` + #end if + </command> + + <outputs> + <data format="tabular" name="output1" label="listvariants output"/> + <data format="tabular" name="output2" label="testvariants output"/> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Genome build"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--form field to select long variants option--> + <param name="longvar" type="select" label="List long variants?"> + <option value="" selected="true">no</option> + <option value="--list-long-variants">yes</option> + </param> + + <!--form fields to include existing variant list--> + <conditional name="include_list"> + <param name="listing" type="select" label="Include variant listing?"> + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <param name="list" type="data" format="tabular" label="Variant listing"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + </conditional> + + <!--conditional to select input file type--> + <conditional name="file_types"> + <param name="file_type" type="select" label="Select the input file type"> + <option value="var" selected="true">var files</option> + <option value="mastervar">mastervar files</option> + </param> + + <when value="var"> + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input var files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <repeat name="varfiles" title="Variant files"> + <param name="input" type="data" format="cg_var" label="Dataset"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + + <when value="mastervar"> + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input mastervar files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <repeat name="varfiles" title="Variant files"> + <param name="input" type="data" format="cg_mastervar" label="Dataset"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="varlist" type="text" label="List of mastervar files (/path/file)" size="200" help="file with list of mastervar files (/path/varfile), mastervar files can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + </conditional> + </inputs> + + <help> + +**What it does** + +This tool uses the cgatools testvariants to test variant or mastervar files for the presence of variants. + +cgatools: http://sourceforge.net/projects/cgatools/files/ + +----- + +**cgatools Manual**:: + + COMMAND NAME + listvariants - Lists the variants present in a variant file. + + DESCRIPTION + Lists all called variants present in the specified variant files, in a + format suitable for processing by the testvariants command. The output is a + tab-delimited file consisting of the following columns: + + variantId Sequential id assigned to each variant. + chromosome The chromosome of the variant. + begin 0-based reference offset of the beginning of the variant. + end 0-based reference offset of the end of the variant. + varType The varType as extracted from the variant file. + reference The reference sequence. + alleleSeq The variant allele sequence as extracted from the variant + file. + xRef The xRef as extrated from the variant file. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --reference arg + The reference crr file. + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + --variants arg + The input variant files (may be positional args). + + --variant-listing arg + The output of another listvariants run, to be merged in to produce the + output of this run. + + --list-long-variants + In addition to listing short variants, list longer variants as well + (10's of bases) by concatenating nearby calls. + + SUPPORTED FORMAT_VERSION + 0.3 or later + + + + COMMAND NAME + testvariants - Tests variant files for presence of variants. + + DESCRIPTION + Tests variant files for presence of variants. The output is a tab-delimited + file consisting of the columns of the input variants file, plus a column + for each assembly results file that contains a character code for each + allele. The character codes have meaning as follows: + + 0 This allele of this genome is consistent with the reference at this + locus but inconsistent with the variant. + 1 This allele of this genome has the input variant at this locus. + N This allele of this genome has no-calls but is consistent with the + input variant. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --reference arg + The reference crr file. + + --input arg (=STDIN) + The input variants to test for. + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + --variants arg + The input variant files (may be passed in as arguments at the end of + the command). + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgatools/tools/cgatools/listvariants.xml Wed Jun 13 17:31:27 2012 -0400 @@ -0,0 +1,188 @@ +<tool id="cga_listvariant" name="listvariants(beta)" version="0.0.1"> +<!-- +This tool creates a GUI for cgatools listvariants from Complete Genomics, Inc. +written 5-29-2012 by bcrain@completegenomics.com +--> + + <description>lists all called variants</description> <!--adds description in toolbar--> + + <requirements> + <requirement type="binary">cgatools</requirement> + </requirements> + + <command> <!--run executable--> + cgatools listvariants + --beta + --reference ${crr.fields.path} + --output $output + #if $include_list.listing == "yes" <!--only added when yes--> + --variant-listing $include_list.list + #end if + $longvar + --variants + #if $file_types.data_sources.data_source == "in" + #for $v in $file_types.data_sources.varfiles <!--get each var/mastervar file--> + ${v.input} + #end for + #else + `cat $file_types.data_sources.varlist` + #end if + </command> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Genome build"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--form field to select long variants option--> + <param name="longvar" type="select" label="List long variants?"> + <option value="" selected="true">no</option> + <option value="--list-long-variants">yes</option> + </param> + + <!--form fields to include existing variant list--> + <conditional name="include_list"> + <param name="listing" type="select" label="Include variant listing?"> + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <param name="list" type="data" format="tabular" label="Variant listing"/> + </when> + </conditional> + + <!--conditional to select input file type--> + <conditional name="file_types"> + <param name="file_type" type="select" label="Select the input file type"> + <option value="var" selected="true">var files</option> + <option value="mastervar">mastervar files</option> + </param> + + <when value="var"> + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input var files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <repeat name="varfiles" title="Variant files"> + <param name="input" type="data" format="cg_var" label="Dataset"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + <!--<validator type="expression" message="Dataset does not match selected build.">$dbkey == $crr.fields.dbkey</validator>--> + </param> + </repeat> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + + <when value="mastervar"> + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input mastervar files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <repeat name="varfiles" title="Variant files"> + <param name="input" type="data" format="cg_mastervar" label="Dataset"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="varlist" type="text" label="List of mastervar files (/path/file)" size="200" help="file with list of mastervar files (/path/varfile), mastervar files can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + </conditional> + </inputs> + + <outputs> + <data format="tabular" name="output"/> + </outputs> + +<!-- <tests> + <test> + <param name="reference" value="hg19.crr"/> + <param name="file_type" value="var"/> + <param name="data_source" value="in"/> + <param name="varfiles?input" value="??"/> + <param name="varfiles?input" value="??"/> + <output name="output" file="??"/> + </test> + </tests>--> + + <help> + +**What it does** + +This tool uses the cgatools listvariants to list all called variants present in the var or mastervar files. + +cgatools: http://sourceforge.net/projects/cgatools/files/ + +----- + +**cgatools Manual**:: + + COMMAND NAME + listvariants - Lists the variants present in a variant file. + + DESCRIPTION + Lists all called variants present in the specified variant files, in a + format suitable for processing by the testvariants command. The output is a + tab-delimited file consisting of the following columns: + + variantId Sequential id assigned to each variant. + chromosome The chromosome of the variant. + begin 0-based reference offset of the beginning of the variant. + end 0-based reference offset of the end of the variant. + varType The varType as extracted from the variant file. + reference The reference sequence. + alleleSeq The variant allele sequence as extracted from the variant + file. + xRef The xRef as extrated from the variant file. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --reference arg + The reference crr file. + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + --variants arg + The input variant files (may be positional args). + + --variant-listing arg + The output of another listvariants run, to be merged in to produce the + output of this run. + + --list-long-variants + In addition to listing short variants, list longer variants as well + (10's of bases) by concatenating nearby calls. + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgatools/tools/cgatools/snpdiff.xml Wed Jun 13 17:31:27 2012 -0400 @@ -0,0 +1,177 @@ +<tool id="cga_snpdiff" name="snpdiff" version="0.0.1"> + + <description>compares snp calls to a Complete Genomics variant file.</description> <!--adds description in toolbar--> + + <requirements> + <requirement type="binary">cgatools</requirement> + </requirements> + + <command> <!--run executable--> + cgatools snpdiff + --reference $crr.fields.path + --variants $varfile + --genotypes $genotype + --output-prefix cg_ + --reports `echo ${report1} ${report2} ${report3} | sed 's/ */,/g'` + </command> + + <outputs> + <data format="tabular" name="output1" from_work_dir="cg_Output.tsv" label="${tool.name} on ${on_string}: Output"> + <filter>(report1 == 'Output')</filter> + </data> + <data format="tabular" name="output2" from_work_dir="cg_Verbose.tsv" label="${tool.name} on ${on_string}: Verbose"> + <filter>(report2 == 'Verbose')</filter> + </data> + <data format="tabular" name="output3" from_work_dir="cg_Stats.tsv" label="${tool.name} on ${on_string}: Stats"> + <filter>(report3 == 'Stats')</filter> + </data> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Genome build"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where is the input varfile?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <param name="varfile" type="data" format="cg_var" label="Var file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="varfile" type="text" label="Variant file (/path/file_name)" size="40" help="Variant file can be compressed (gz, bz2)."/> + </when> + </conditional> + + <!--conditional to select genotypes file input--> + <param name="genotype" type="data" format="tabular" label="Genotypes file with SNP calls" help="The genotypes file is a tab-delimited file with at + least the following columns (additional columns may be given): Chromosome (Required), Offset0Based (Required), GenotypesStrand (Optional), Genotypes (Optional)"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + + <param name="report1" type="select" label="Report Output"> + <option value="">no</option> + <option value="Output">yes</option> + </param> + <param name="report2" type="select" label="Report Verbose"> + <option value="">no</option> + <option value="Verbose">yes</option> + </param> + <param name="report3" type="select" label="Report Stats"> + <option value="">no</option> + <option value="Stats">yes</option> + </param> + + </inputs> + + <help> + +**What it does** + +This tool ompares snp calls to a Complete Genomics variant file. + +cgatools: http://sourceforge.net/projects/cgatools/files/ + +----- + +**cgatools Manual**:: + + COMMAND NAME + snpdiff - Compares snp calls to a Complete Genomics variant file. + + DESCRIPTION + Compares the snp calls in the "genotypes" file to the calls in a Complete + Genomics variant file. The genotypes file is a tab-delimited file with at + least the following columns (additional columns may be given): + + Chromosome (Required) The name of the chromosome. + Offset0Based (Required) The 0-based offset in the chromosome. + GenotypesStrand (Optional) The strand of the calls in the Genotypes + column (+ or -, defaults to +). + Genotypes (Optional) The calls, one per allele. The following + calls are recognized: + A,C,G,T A called base. + N A no-call. + - A deleted base. + . A non-snp variation. + + The output is a tab-delimited file consisting of the columns of the + original genotypes file, plus the following additional columns: + + Reference The reference base at the given position. + VariantFile The calls made by the variant file, one per allele. + The character codes are the same as is described for + the Genotypes column. + DiscordantAlleles (Only if Genotypes is present) The number of + Genotypes alleles that are discordant with calls in + the VariantFile. If the VariantFile is described as + haploid at the given position but the Genotypes is + diploid, then each genotype allele is compared + against the haploid call of the VariantFile. + NoCallAlleles (Only if Genotypes is present) The number of + Genotypes alleles that were no-called by the + VariantFile. If the VariantFile is described as + haploid at the given position but the Genotypes is + diploid, then a VariantFile no-call is counted twice. + + The verbose output is a tab-delimited file consisting of the columns of the + original genotypes file, plus the following additional columns: + + Reference The reference base at the given position. + VariantFile The call made by the variant file for one allele (there is + a line in this file for each allele). The character codes + are the same as is described for the Genotypes column. + [CALLS] The rest of the columns are pasted in from the VariantFile, + describing the variant file line used to make the call. + + The stats output is a comma-separated file with several tables describing + the results of the snp comparison, for each diploid genotype. The tables + all describe the comparison result (column headers) versus the genotype + classification (row labels) in different ways. The "Locus classification" + tables have the most detailed match classifications, while the "Locus + concordance" tables roll these match classifications up into "discordance" + and "no-call". A locus is considered discordant if it is discordant for + either allele. A locus is considered no-call if it is concordant for both + alleles but has a no-call on either allele. The "Allele concordance" + describes the comparison result on a per-allele basis. + + OPTIONS + -h [ --help ] + Print this help message. + + --reference arg + The input crr file. + + --variants arg + The input variant file. + + --genotypes arg + The input genotypes file. + + --output-prefix arg + The path prefix for all output reports. + + --reports arg (=Output,Verbose,Stats) + Comma-separated list of reports to generate. A report is one of: + Output The output genotypes file. + Verbose The verbose output file. + Stats The stats output file. + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgatools/tools/cgatools/testvariants.xml Wed Jun 13 17:31:27 2012 -0400 @@ -0,0 +1,157 @@ +<tool id="cga_testvariants" name="testvariants(beta)" version="0.0.1"> +<!-- +This tool creates a GUI for cgatools testvariants from Complete Genomics, Inc. +written 5-29-2012 by bcrain@completegenomics.com +--> + + <description>test for the presence of variants</description> <!--adds description in toolbar--> + + <requirements> + <requirement type="binary">cgatools</requirement> + </requirements> + + <command> <!--run executable--> + cgatools testvariants + --beta + --reference ${crr.fields.path} + --output $output + --input $listing + --variants + #if $file_types.data_sources.data_source == "in" + #for $v in $file_types.data_sources.varfiles <!--get each var/mastervar file--> + ${v.input} + #end for + #else + `cat $file_types.data_sources.varlist` + #end if + </command> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Genome build"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--form fields to select variant list--> + <param name="listing" type="data" format="tabular" label="Select variant list"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + + <!--conditional to select input file type--> + <conditional name="file_types"> + <param name="file_type" type="select" label="Select the input file type"> + <option value="var" selected="true">var files</option> + <option value="mastervar">mastervar files</option> + </param> + + <when value="var"> + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input var files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <repeat name="varfiles" title="Variant files"> + <param name="input" type="data" format="cg_var" label="Dataset"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + + <when value="mastervar"> + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input mastervar files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <repeat name="varfiles" title="Variant files"> + <param name="input" type="data" format="cg_mastervar" label="Dataset"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="varlist" type="text" label="List of mastervar files (/path/file)" size="200" help="file with list of mastervar files (/path/varfile), mastervar files can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + </conditional> + </inputs> + + <help> + +**What it does** + +This tool uses the cgatools testvariants to test variant or mastervar files for the presence of variants. + +cgatools: http://sourceforge.net/projects/cgatools/files/ + +----- + +**cgatools Manual**:: + + COMMAND NAME + testvariants - Tests variant files for presence of variants. + + DESCRIPTION + Tests variant files for presence of variants. The output is a tab-delimited + file consisting of the columns of the input variants file, plus a column + for each assembly results file that contains a character code for each + allele. The character codes have meaning as follows: + + 0 This allele of this genome is consistent with the reference at this + locus but inconsistent with the variant. + 1 This allele of this genome has the input variant at this locus. + N This allele of this genome has no-calls but is consistent with the + input variant. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --reference arg + The reference crr file. + + --input arg (=STDIN) + The input variants to test for. + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + --variants arg + The input variant files (may be passed in as arguments at the end of + the command). + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgatools/tools/cgatools/varfilter.xml Wed Jun 13 17:31:27 2012 -0400 @@ -0,0 +1,184 @@ +<tool id="cga_varfilter" name="varfilter(beta)" version="0.0.1"> +<!-- +This tool creates a GUI for cgatools varfilter from Complete Genomics, Inc. +The function is called via a Perl script vartools_wrapper.pl, designed to generate the correctly formated filters to append the input file on the command line. +written 6-1-2012 by bcrain@completegenomics.com +--> + + <description>copies input file, applying filters.</description> <!--adds description in toolbar--> + + <requirements> + <requirement type="binary">cgatools</requirement> + </requirements> + + <command interpreter="perl"> + varfilter_wrapper.pl + --reference $crr.fields.path + --output $output + --input $file_types.data_sources.input + #for $f in $filters + --zygosity $f.zygosity + --vartype $f.vartype + --varscorevaf x$f.varscorevaf + --varscoreeaf x$f.varscoreeaf + --varquality $f.varquality + #end for + </command> + + <outputs> + <data format="cg_var" name="output" /> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Genome build"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--conditional to select input file type--> + <conditional name="file_types"> + <param name="file_type" type="select" label="Select the input file type"> + <option value="var" selected="true">var files</option> + <option value="mastervar">mastervar files</option> + </param> + + <when value="var"> + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where is the input var file?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <param name="input" type="data" format="cg_var" label="Var file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="input" type="text" label="Var file (/path/file)" size="200" help="var file can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + + <when value="mastervar"> + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where is the input mastervar file?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> + </param> + <when value="in"> + <!--form field to select variant files--> + <param name="input" type="data" format="cg_mastervar" label="Mastervar file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + <when value="out"> + <!--form field to select crr file--> + <param name="input" type="text" label="Mastervar file (/path/file)" size="200" help="mastervar file can be compressed (gz, bz2)."/> + </when> + </conditional> + </when> + </conditional> + + <!-- formfields to add filters --> + <repeat name="filters" title="Filter"> + <param name="zygosity" type="select" label="Filter out call (set to no-call) IF locus IS"> + <option value="NA">- all loci -</option> + <option value="hom">homozygous</option> + <option value="het">heterzygous</option> + </param> + + <param name="vartype" type="select" label="AND varType IS"> + <option value="NA">- any varType -</option> + <option value="snp">snp</option> + <option value="ins">ins</option> + <option value="del">del</option> + <option value="sub">sub</option> + <option value="ref">ref</option> + </param> + + <param name="varscorevaf" type="text" label="AND varScoreVAF IS LESS THAN"/> + <param name="varscoreeaf" type="text" label="AND varScoreEAF IS LESS THAN"/> + + <param name="varquality" type="select" label="AND varQuality IS NOT"> + <option value="NA"> </option> + <option value="VQHigh">VQHigh</option> + <option value="VQLOW">VQLOW</option> + </param> + </repeat> + </inputs> + + <help> + +**What it does** + +This tool copies input var file or masterVar file to output, applying specified filters. + +cgatools: http://sourceforge.net/projects/cgatools/files/ + +----- + +**cgatools Manual**:: + + COMMAND NAME + varfilter - Copies input var file or masterVar file to output, applying + specified filters. + + DESCRIPTION + Copies input var file or masterVar file to output, applying specified + filters (which are available to all cgatools commands that read a var file + or masterVar file as input). Filters are specified by appending the filter + specification to the var file name on the command line. For example: + + /path/to/var.tsv.bz2#varQuality!=VQHIGH + + The preceding example filters out any calls marked as VQLOW. The filter + specification follows the "#" sign, and consists of a list of filters to + apply, separated by a comma. Each filter is a colon-separated list of call + selectors. Any scored call that passes all the colon-separated call + selectors for one or more of the comma-separated filters is turned into a + no-call. The following call selectors are available: + + hom Selects only calls in homozygous loci. + het Selects any scored call not selected by the hom selector. + varType=XX Selects calls whose varType is XX. + varScoreVAF<XX Selects calls whose varScoreVAF<XX. + varScoreEAF<XX Selects calls whose varScoreEAF<XX. + varQuality!=XX Selects calls whose varQuality is not XX. + + Here is an example that filters homozygous SNPs with varScoreVAF < 25 and + heterozygous insertions with varScoreEAF < 50: + + + '/path/to/var.tsv.bz2#hom:varType=snp:varScoreVAF<25,het:varType=ins:varScoreEAF<50' + + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta flag. + + --reference arg + The reference crr file. + + --input arg + The input var file or masterVar file (typically with filters specified). + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgatools/tools/cgatools/varfilter_wrapper.pl Wed Jun 13 17:31:27 2012 -0400 @@ -0,0 +1,56 @@ +#!/usr/bin/perl +use strict; +use Getopt::Long; +use vars qw($opt_reference $opt_input $opt_output @opt_zygosity @opt_vartype @opt_varscorevaf @opt_varscoreeaf @opt_varquality); +$| = 1; # set autoflush to screen + +# This is a wrapper for the cgatools varfilter function to run cgatools varfilter in Galaxy. +# The wrapper generates the filter(s) in the correct format to be used with the input file. +# written 6-1-2012 by bcrain@completegenomics.com + + +#print join("\n", @ARGV), "\n"; +&GetOptions("reference=s", "input=s", "output=s", "zygosity=s@", "vartype=s@", "varscorevaf=s@", "varscoreeaf=s@", "varquality=s@"); + +my $append = ''; + +for (my $i = 0; $i <= $#opt_zygosity; $i ++) +{ + my $filter = ''; + unless ($opt_zygosity[$i] eq 'NA') {$filter = $opt_zygosity[$i];} + unless ($opt_vartype[$i] eq 'NA') + { + $filter ne '' and $filter .= ':'; + $filter .= 'varType=' . $opt_vartype[$i]; + } + unless ($opt_varscorevaf[$i] eq 'x') + { + $filter ne '' and $filter .= ':'; + $opt_varscorevaf[$i] =~ s/^x//; + $filter .= 'varScoreVAF<' . $opt_varscorevaf[$i]; + } + unless ($opt_varscoreeaf[$i] eq 'x') + { + $filter ne '' and $filter .= ':'; + $opt_varscoreeaf[$i] =~ s/^x//; + $filter .= 'varScoreEAF<' . $opt_varscoreeaf[$i]; + } + unless ($opt_varquality[$i] eq 'NA') + { + $filter ne '' and $filter .= ':'; + $filter .= 'varQuality!=' . $opt_varquality[$i]; + } + + if ($filter ne '') + { + if ($append eq '') {$append = '#' . $filter;} + else {$append .= ',' . $filter;} + } +} +print "cgatools varfilter +--beta +--reference $opt_reference +--output $opt_output +--input '${opt_input}${append}'\n"; + +`cgatools varfilter --beta --reference $opt_reference --output $opt_output --input '${opt_input}${append}'`; \ No newline at end of file