# HG changeset patch
# User bcrain-completegenomics
# Date 1339623087 14400
# Node ID 8a3d71feeb48d330ae747f476b86fcc8abcbdbc2
# Parent 51fea6716ea54b32c27c2ec653a9b9c1e2f2ac31
Uploaded
diff -r 51fea6716ea5 -r 8a3d71feeb48 cgatools/tools/cgatools/join.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools/join.xml Wed Jun 13 17:31:27 2012 -0400
@@ -0,0 +1,206 @@
+
+
+ two tsv files based on equal fields or overlapping regions.
+
+
+ cgatools
+
+
+
+ cgatools join --beta
+ --input $inputA
+ --input $inputB
+ --output $output
+ --output-mode $outmode
+ $dump
+ --select $col
+ #for $m in $matches
+ --match ${m.match}
+ #end for
+ #if $range_overlap.range == 'yes'
+ #for $o in $range_overlap.overlaps
+ --overlap ${o.overlap}
+ #end for
+ --overlap-mode $range_overlap.overlapmode
+ --overlap-fraction-A $range_overlap.fractionA
+ --boundary-uncertainty-A $range_overlap.boundaryA
+ --overlap-fraction-B $range_overlap.fractionB
+ --boundary-uncertainty-B $range_overlap.boundaryB
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool joins two tab-delimited files based on equal fields or overlapping regions.
+
+cgatools: http://sourceforge.net/projects/cgatools/files/
+
+-----
+
+**cgatools Manual**::
+
+ COMMAND NAME
+ join - Joins two tab-delimited files based on equal fields or overlapping regions.
+
+ DESCRIPTION
+ Joins two tab-delimited files based on equal fields or overlapping regions.
+ By default, an output record is produced for each match found between file
+ A and file B, but output format can be controlled by the --output-mode
+ parameter.
+
+ OPTIONS
+ -h [ --help ]
+ Print this help message.
+
+ --beta
+ This is a beta command. To run this command, you must pass the --beta
+ flag.
+
+ --input arg
+ File name to use as input (may be passed in as arguments at the end of
+ the command), or omitted for stdin). There must be exactly two input
+ files to join. If only one file is specified by name, file A is taken
+ to be stdin and file B is the named file. File B is read fully into
+ memory, and file A is streamed. File A's columns appear first in the
+ output.
+
+ --output arg (=STDOUT)
+ The output file name (may be omitted for stdout).
+
+ --match arg
+ A match specification, which is a column from A and a column from B
+ separated by a colon.
+
+ --overlap arg
+ Overlap specification. An overlap specification consists of a range
+ definition for files A and B, separated by a colon. A range definition
+ may be two columns, in which case they are interpreted as the beginning
+ and end of the range. Or it may be one column, in which case the range
+ is defined as the 1-base range starting at the given value. The records
+ from the two files must overlap in order to be considered for output.
+ Two ranges are considered to overlap if the overlap is at least one
+ base long, or if one of the ranges is length 0 and the ranges overlap
+ or abut. For example, "begin,end:offset" will match wherever end-begin
+ > 0, begin<offset+1, and end>offset, or wherever end-begin = 0,
+ begin<=offset+1, and end>=offset.
+
+
+ -m [ --output-mode ] arg (=full)
+ Output mode, one of the following:
+ full Print an output record for each match found between
+ file A and file B.
+ compact Print at most one record for each record of file A,
+ joining the file B values by a semicolon and
+ suppressing repeated B values and empty B values.
+ compact-pct Same as compact, but for each distinct B value,
+ annotate with the percentage of the A record that is
+ overlapped by B records with that B value. Percentage
+ is rounded up to nearest integer.
+
+ --overlap-mode arg (=strict)
+ Overlap mode, one of the following:
+ strict Range A and B overlap if A.begin < B.end and
+ B.begin < A.end.
+ allow-abutting-points Range A and B overlap they meet the strict
+ requirements, or if A.begin <= B.end and
+ B.begin <= A.end and either A or B has zero
+ length.
+
+ --select arg (=A.*,B.*)
+ Set of fields to select for output.
+
+ -a [ --always-dump ]
+ Dump every record of A, even if there are no matches with file B.
+
+ --overlap-fraction-A arg (=0)
+ Minimum fraction of A region overlap for filtering output.
+
+ --boundary-uncertainty-A arg (=0)
+ Boundary uncertainty for overlap filtering. Specifically, records
+ failing the following predicate are filtered away: overlap >=
+ overlap-fraction-A * ( A-range-length - boundary-uncertainty-A )
+
+ --overlap-fraction-B arg (=0)
+ Minimum fraction of B region overlap for filtering output.
+
+ --boundary-uncertainty-B arg (=0)
+ Boundary uncertainty for overlap filtering. Specifically, records
+ failing the following predicate are filtered away: overlap >=
+ overlap-fraction-B * ( B-range-length - boundary-uncertainty-B )
+
+ SUPPORTED FORMAT_VERSION
+ Any
+
+
diff -r 51fea6716ea5 -r 8a3d71feeb48 cgatools/tools/cgatools/junctiondiff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools/junctiondiff.xml Wed Jun 13 17:31:27 2012 -0400
@@ -0,0 +1,146 @@
+
+
+ reports difference between junction calls
+
+
+ cgatools
+
+
+
+ cgatools junctiondiff --beta
+ --reference $crr.fields.path
+ --junctionsA $data_sources.inputA
+ --junctionsB $data_sources.inputB
+ --scoreThresholdA $scoreA
+ --scoreThresholdB $scoreB
+ --distance $distance
+ --minlength $minlength
+ $stat
+ --output-prefix cg_
+ ;
+ mv cg_diff-*tsv cg_diff.tsv
+
+
+
+
+
+ (stat == '--statout')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool reports difference between junction calls of Complete Genomics junctions files
+
+cgatools: http://sourceforge.net/projects/cgatools/files/
+
+-----
+
+**cgatools Manual**::
+
+ COMMAND NAME
+ junctiondiff - Reports difference between junction calls of Complete Genomics junctions files.
+
+ DESCRIPTION
+ junctiondiff takes two junction files A and B as input and produces the
+ following output:
+ - "diff-inputFileName" - the junctions from an input file A that are not
+ present in input file B.
+ - "report.txt" - a brief summary report (if --statout is used)
+
+ Two junctions are considered equivalent if:
+ - they come from different files
+ - left and right positions of one junction are not more than "--distance"
+ bases apart from the corresponding positions of another junction
+ - the junction scores are equal or above the scoreThreshold
+ - they are on the same strands
+
+ OPTIONS
+ -h [ --help ]
+ Print this help message.
+
+ --beta
+ This is a beta command. To run this command, you must pass the --beta
+ flag.
+
+ -s [ --reference ] arg
+ Reference file.
+
+ -a [ --junctionsA ] arg
+ input junction file A.
+
+ -b [ --junctionsB ] arg
+ input junction file B.
+
+ -A [ --scoreThresholdA ] arg (=10)
+ score threshold value for the input file A.
+
+ -B [ --scoreThresholdB ] arg (=0)
+ score threshold value for the input file B.
+
+ -d [ --distance ] arg (=200)
+ Max distance between coordinates of potentially compatible junctions.
+
+ -l [ --minlength ] arg (=500)
+ Minimum deletion junction length to be included into the difference
+ file.
+
+ -o [ --output-prefix ] arg
+ The path prefix for all the output reports.
+
+ -S [ --statout ]
+ (Debug) Report various input file statistics. Experimental feature.
+
+ SUPPORTED FORMAT_VERSION
+ 1.5 or later
+
+
diff -r 51fea6716ea5 -r 8a3d71feeb48 cgatools/tools/cgatools/listtestvariants.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools/listtestvariants.xml Wed Jun 13 17:31:27 2012 -0400
@@ -0,0 +1,239 @@
+
+
+
+
+
+
+ cgatools
+
+
+
+ cgatools listvariants
+ --beta
+ --reference ${crr.fields.path}
+ --output $output1
+ #if $include_list.listing == "yes"
+ --variant-listing $include_list.list
+ #end if
+ $longvar
+ --variants
+ #if $file_types.data_sources.data_source == "in"
+ #for $v in $file_types.data_sources.varfiles
+ ${v.input}
+ #end for
+ #else
+ `cat $file_types.data_sources.varlist`
+ #end if
+ ;
+
+ cgatools testvariants
+ --beta
+ --reference ${crr.fields.path}
+ --output $output2
+ --input $output1
+ --variants
+ #if $file_types.data_sources.data_source == "in"
+ #for $v in $file_types.data_sources.varfiles
+ ${v.input}
+ #end for
+ #else
+ `cat $file_types.data_sources.varlist`
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool uses the cgatools testvariants to test variant or mastervar files for the presence of variants.
+
+cgatools: http://sourceforge.net/projects/cgatools/files/
+
+-----
+
+**cgatools Manual**::
+
+ COMMAND NAME
+ listvariants - Lists the variants present in a variant file.
+
+ DESCRIPTION
+ Lists all called variants present in the specified variant files, in a
+ format suitable for processing by the testvariants command. The output is a
+ tab-delimited file consisting of the following columns:
+
+ variantId Sequential id assigned to each variant.
+ chromosome The chromosome of the variant.
+ begin 0-based reference offset of the beginning of the variant.
+ end 0-based reference offset of the end of the variant.
+ varType The varType as extracted from the variant file.
+ reference The reference sequence.
+ alleleSeq The variant allele sequence as extracted from the variant
+ file.
+ xRef The xRef as extrated from the variant file.
+
+ OPTIONS
+ -h [ --help ]
+ Print this help message.
+
+ --beta
+ This is a beta command. To run this command, you must pass the --beta
+ flag.
+
+ --reference arg
+ The reference crr file.
+
+ --output arg (=STDOUT)
+ The output file (may be omitted for stdout).
+
+ --variants arg
+ The input variant files (may be positional args).
+
+ --variant-listing arg
+ The output of another listvariants run, to be merged in to produce the
+ output of this run.
+
+ --list-long-variants
+ In addition to listing short variants, list longer variants as well
+ (10's of bases) by concatenating nearby calls.
+
+ SUPPORTED FORMAT_VERSION
+ 0.3 or later
+
+
+
+ COMMAND NAME
+ testvariants - Tests variant files for presence of variants.
+
+ DESCRIPTION
+ Tests variant files for presence of variants. The output is a tab-delimited
+ file consisting of the columns of the input variants file, plus a column
+ for each assembly results file that contains a character code for each
+ allele. The character codes have meaning as follows:
+
+ 0 This allele of this genome is consistent with the reference at this
+ locus but inconsistent with the variant.
+ 1 This allele of this genome has the input variant at this locus.
+ N This allele of this genome has no-calls but is consistent with the
+ input variant.
+
+ OPTIONS
+ -h [ --help ]
+ Print this help message.
+
+ --beta
+ This is a beta command. To run this command, you must pass the --beta
+ flag.
+
+ --reference arg
+ The reference crr file.
+
+ --input arg (=STDIN)
+ The input variants to test for.
+
+ --output arg (=STDOUT)
+ The output file (may be omitted for stdout).
+
+ --variants arg
+ The input variant files (may be passed in as arguments at the end of
+ the command).
+
+ SUPPORTED FORMAT_VERSION
+ 0.3 or later
+
+
diff -r 51fea6716ea5 -r 8a3d71feeb48 cgatools/tools/cgatools/listvariants.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools/listvariants.xml Wed Jun 13 17:31:27 2012 -0400
@@ -0,0 +1,188 @@
+
+
+
+ lists all called variants
+
+
+ cgatools
+
+
+
+ cgatools listvariants
+ --beta
+ --reference ${crr.fields.path}
+ --output $output
+ #if $include_list.listing == "yes"
+ --variant-listing $include_list.list
+ #end if
+ $longvar
+ --variants
+ #if $file_types.data_sources.data_source == "in"
+ #for $v in $file_types.data_sources.varfiles
+ ${v.input}
+ #end for
+ #else
+ `cat $file_types.data_sources.varlist`
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool uses the cgatools listvariants to list all called variants present in the var or mastervar files.
+
+cgatools: http://sourceforge.net/projects/cgatools/files/
+
+-----
+
+**cgatools Manual**::
+
+ COMMAND NAME
+ listvariants - Lists the variants present in a variant file.
+
+ DESCRIPTION
+ Lists all called variants present in the specified variant files, in a
+ format suitable for processing by the testvariants command. The output is a
+ tab-delimited file consisting of the following columns:
+
+ variantId Sequential id assigned to each variant.
+ chromosome The chromosome of the variant.
+ begin 0-based reference offset of the beginning of the variant.
+ end 0-based reference offset of the end of the variant.
+ varType The varType as extracted from the variant file.
+ reference The reference sequence.
+ alleleSeq The variant allele sequence as extracted from the variant
+ file.
+ xRef The xRef as extrated from the variant file.
+
+ OPTIONS
+ -h [ --help ]
+ Print this help message.
+
+ --beta
+ This is a beta command. To run this command, you must pass the --beta
+ flag.
+
+ --reference arg
+ The reference crr file.
+
+ --output arg (=STDOUT)
+ The output file (may be omitted for stdout).
+
+ --variants arg
+ The input variant files (may be positional args).
+
+ --variant-listing arg
+ The output of another listvariants run, to be merged in to produce the
+ output of this run.
+
+ --list-long-variants
+ In addition to listing short variants, list longer variants as well
+ (10's of bases) by concatenating nearby calls.
+
+ SUPPORTED FORMAT_VERSION
+ 0.3 or later
+
+
diff -r 51fea6716ea5 -r 8a3d71feeb48 cgatools/tools/cgatools/snpdiff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools/snpdiff.xml Wed Jun 13 17:31:27 2012 -0400
@@ -0,0 +1,177 @@
+
+
+ compares snp calls to a Complete Genomics variant file.
+
+
+ cgatools
+
+
+
+ cgatools snpdiff
+ --reference $crr.fields.path
+ --variants $varfile
+ --genotypes $genotype
+ --output-prefix cg_
+ --reports `echo ${report1} ${report2} ${report3} | sed 's/ */,/g'`
+
+
+
+
+ (report1 == 'Output')
+
+
+ (report2 == 'Verbose')
+
+
+ (report3 == 'Stats')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool ompares snp calls to a Complete Genomics variant file.
+
+cgatools: http://sourceforge.net/projects/cgatools/files/
+
+-----
+
+**cgatools Manual**::
+
+ COMMAND NAME
+ snpdiff - Compares snp calls to a Complete Genomics variant file.
+
+ DESCRIPTION
+ Compares the snp calls in the "genotypes" file to the calls in a Complete
+ Genomics variant file. The genotypes file is a tab-delimited file with at
+ least the following columns (additional columns may be given):
+
+ Chromosome (Required) The name of the chromosome.
+ Offset0Based (Required) The 0-based offset in the chromosome.
+ GenotypesStrand (Optional) The strand of the calls in the Genotypes
+ column (+ or -, defaults to +).
+ Genotypes (Optional) The calls, one per allele. The following
+ calls are recognized:
+ A,C,G,T A called base.
+ N A no-call.
+ - A deleted base.
+ . A non-snp variation.
+
+ The output is a tab-delimited file consisting of the columns of the
+ original genotypes file, plus the following additional columns:
+
+ Reference The reference base at the given position.
+ VariantFile The calls made by the variant file, one per allele.
+ The character codes are the same as is described for
+ the Genotypes column.
+ DiscordantAlleles (Only if Genotypes is present) The number of
+ Genotypes alleles that are discordant with calls in
+ the VariantFile. If the VariantFile is described as
+ haploid at the given position but the Genotypes is
+ diploid, then each genotype allele is compared
+ against the haploid call of the VariantFile.
+ NoCallAlleles (Only if Genotypes is present) The number of
+ Genotypes alleles that were no-called by the
+ VariantFile. If the VariantFile is described as
+ haploid at the given position but the Genotypes is
+ diploid, then a VariantFile no-call is counted twice.
+
+ The verbose output is a tab-delimited file consisting of the columns of the
+ original genotypes file, plus the following additional columns:
+
+ Reference The reference base at the given position.
+ VariantFile The call made by the variant file for one allele (there is
+ a line in this file for each allele). The character codes
+ are the same as is described for the Genotypes column.
+ [CALLS] The rest of the columns are pasted in from the VariantFile,
+ describing the variant file line used to make the call.
+
+ The stats output is a comma-separated file with several tables describing
+ the results of the snp comparison, for each diploid genotype. The tables
+ all describe the comparison result (column headers) versus the genotype
+ classification (row labels) in different ways. The "Locus classification"
+ tables have the most detailed match classifications, while the "Locus
+ concordance" tables roll these match classifications up into "discordance"
+ and "no-call". A locus is considered discordant if it is discordant for
+ either allele. A locus is considered no-call if it is concordant for both
+ alleles but has a no-call on either allele. The "Allele concordance"
+ describes the comparison result on a per-allele basis.
+
+ OPTIONS
+ -h [ --help ]
+ Print this help message.
+
+ --reference arg
+ The input crr file.
+
+ --variants arg
+ The input variant file.
+
+ --genotypes arg
+ The input genotypes file.
+
+ --output-prefix arg
+ The path prefix for all output reports.
+
+ --reports arg (=Output,Verbose,Stats)
+ Comma-separated list of reports to generate. A report is one of:
+ Output The output genotypes file.
+ Verbose The verbose output file.
+ Stats The stats output file.
+
+ SUPPORTED FORMAT_VERSION
+ 0.3 or later
+
+
diff -r 51fea6716ea5 -r 8a3d71feeb48 cgatools/tools/cgatools/testvariants.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools/testvariants.xml Wed Jun 13 17:31:27 2012 -0400
@@ -0,0 +1,157 @@
+
+
+
+ test for the presence of variants
+
+
+ cgatools
+
+
+
+ cgatools testvariants
+ --beta
+ --reference ${crr.fields.path}
+ --output $output
+ --input $listing
+ --variants
+ #if $file_types.data_sources.data_source == "in"
+ #for $v in $file_types.data_sources.varfiles
+ ${v.input}
+ #end for
+ #else
+ `cat $file_types.data_sources.varlist`
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool uses the cgatools testvariants to test variant or mastervar files for the presence of variants.
+
+cgatools: http://sourceforge.net/projects/cgatools/files/
+
+-----
+
+**cgatools Manual**::
+
+ COMMAND NAME
+ testvariants - Tests variant files for presence of variants.
+
+ DESCRIPTION
+ Tests variant files for presence of variants. The output is a tab-delimited
+ file consisting of the columns of the input variants file, plus a column
+ for each assembly results file that contains a character code for each
+ allele. The character codes have meaning as follows:
+
+ 0 This allele of this genome is consistent with the reference at this
+ locus but inconsistent with the variant.
+ 1 This allele of this genome has the input variant at this locus.
+ N This allele of this genome has no-calls but is consistent with the
+ input variant.
+
+ OPTIONS
+ -h [ --help ]
+ Print this help message.
+
+ --beta
+ This is a beta command. To run this command, you must pass the --beta
+ flag.
+
+ --reference arg
+ The reference crr file.
+
+ --input arg (=STDIN)
+ The input variants to test for.
+
+ --output arg (=STDOUT)
+ The output file (may be omitted for stdout).
+
+ --variants arg
+ The input variant files (may be passed in as arguments at the end of
+ the command).
+
+ SUPPORTED FORMAT_VERSION
+ 0.3 or later
+
+
diff -r 51fea6716ea5 -r 8a3d71feeb48 cgatools/tools/cgatools/varfilter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools/varfilter.xml Wed Jun 13 17:31:27 2012 -0400
@@ -0,0 +1,184 @@
+
+
+
+ copies input file, applying filters.
+
+
+ cgatools
+
+
+
+ varfilter_wrapper.pl
+ --reference $crr.fields.path
+ --output $output
+ --input $file_types.data_sources.input
+ #for $f in $filters
+ --zygosity $f.zygosity
+ --vartype $f.vartype
+ --varscorevaf x$f.varscorevaf
+ --varscoreeaf x$f.varscoreeaf
+ --varquality $f.varquality
+ #end for
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool copies input var file or masterVar file to output, applying specified filters.
+
+cgatools: http://sourceforge.net/projects/cgatools/files/
+
+-----
+
+**cgatools Manual**::
+
+ COMMAND NAME
+ varfilter - Copies input var file or masterVar file to output, applying
+ specified filters.
+
+ DESCRIPTION
+ Copies input var file or masterVar file to output, applying specified
+ filters (which are available to all cgatools commands that read a var file
+ or masterVar file as input). Filters are specified by appending the filter
+ specification to the var file name on the command line. For example:
+
+ /path/to/var.tsv.bz2#varQuality!=VQHIGH
+
+ The preceding example filters out any calls marked as VQLOW. The filter
+ specification follows the "#" sign, and consists of a list of filters to
+ apply, separated by a comma. Each filter is a colon-separated list of call
+ selectors. Any scored call that passes all the colon-separated call
+ selectors for one or more of the comma-separated filters is turned into a
+ no-call. The following call selectors are available:
+
+ hom Selects only calls in homozygous loci.
+ het Selects any scored call not selected by the hom selector.
+ varType=XX Selects calls whose varType is XX.
+ varScoreVAF<XX Selects calls whose varScoreVAF<XX.
+ varScoreEAF<XX Selects calls whose varScoreEAF<XX.
+ varQuality!=XX Selects calls whose varQuality is not XX.
+
+ Here is an example that filters homozygous SNPs with varScoreVAF < 25 and
+ heterozygous insertions with varScoreEAF < 50:
+
+
+ '/path/to/var.tsv.bz2#hom:varType=snp:varScoreVAF<25,het:varType=ins:varScoreEAF<50'
+
+
+ OPTIONS
+ -h [ --help ]
+ Print this help message.
+
+ --beta
+ This is a beta command. To run this command, you must pass the --beta flag.
+
+ --reference arg
+ The reference crr file.
+
+ --input arg
+ The input var file or masterVar file (typically with filters specified).
+
+ --output arg (=STDOUT)
+ The output file (may be omitted for stdout).
+
+ SUPPORTED FORMAT_VERSION
+ 0.3 or later
+
+
diff -r 51fea6716ea5 -r 8a3d71feeb48 cgatools/tools/cgatools/varfilter_wrapper.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cgatools/tools/cgatools/varfilter_wrapper.pl Wed Jun 13 17:31:27 2012 -0400
@@ -0,0 +1,56 @@
+#!/usr/bin/perl
+use strict;
+use Getopt::Long;
+use vars qw($opt_reference $opt_input $opt_output @opt_zygosity @opt_vartype @opt_varscorevaf @opt_varscoreeaf @opt_varquality);
+$| = 1; # set autoflush to screen
+
+# This is a wrapper for the cgatools varfilter function to run cgatools varfilter in Galaxy.
+# The wrapper generates the filter(s) in the correct format to be used with the input file.
+# written 6-1-2012 by bcrain@completegenomics.com
+
+
+#print join("\n", @ARGV), "\n";
+&GetOptions("reference=s", "input=s", "output=s", "zygosity=s@", "vartype=s@", "varscorevaf=s@", "varscoreeaf=s@", "varquality=s@");
+
+my $append = '';
+
+for (my $i = 0; $i <= $#opt_zygosity; $i ++)
+{
+ my $filter = '';
+ unless ($opt_zygosity[$i] eq 'NA') {$filter = $opt_zygosity[$i];}
+ unless ($opt_vartype[$i] eq 'NA')
+ {
+ $filter ne '' and $filter .= ':';
+ $filter .= 'varType=' . $opt_vartype[$i];
+ }
+ unless ($opt_varscorevaf[$i] eq 'x')
+ {
+ $filter ne '' and $filter .= ':';
+ $opt_varscorevaf[$i] =~ s/^x//;
+ $filter .= 'varScoreVAF<' . $opt_varscorevaf[$i];
+ }
+ unless ($opt_varscoreeaf[$i] eq 'x')
+ {
+ $filter ne '' and $filter .= ':';
+ $opt_varscoreeaf[$i] =~ s/^x//;
+ $filter .= 'varScoreEAF<' . $opt_varscoreeaf[$i];
+ }
+ unless ($opt_varquality[$i] eq 'NA')
+ {
+ $filter ne '' and $filter .= ':';
+ $filter .= 'varQuality!=' . $opt_varquality[$i];
+ }
+
+ if ($filter ne '')
+ {
+ if ($append eq '') {$append = '#' . $filter;}
+ else {$append .= ',' . $filter;}
+ }
+}
+print "cgatools varfilter
+--beta
+--reference $opt_reference
+--output $opt_output
+--input '${opt_input}${append}'\n";
+
+`cgatools varfilter --beta --reference $opt_reference --output $opt_output --input '${opt_input}${append}'`;
\ No newline at end of file