# HG changeset patch
# User bcrain-completegenomics
# Date 1339516759 14400
# Node ID 7debe34387134b340b92803e22a0511f3b706860
# Parent ffb2b0244ab2d97baabcf4583d8d81dd4874c433
Deleted selected files
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/.DS_Store
Binary file CG_cgatools/.DS_Store has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/._.DS_Store
Binary file CG_cgatools/._.DS_Store has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/._README.txt
Binary file CG_cgatools/._README.txt has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/._datatypes_conf.xml
Binary file CG_cgatools/._datatypes_conf.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/._tool_config.xml.sample.xml
Binary file CG_cgatools/._tool_config.xml.sample.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/._tool_data_table_conf.xml.sample.xml
Binary file CG_cgatools/._tool_data_table_conf.xml.sample.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/README.txt
--- a/CG_cgatools/README.txt Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-Provides galaxy tools for Complete Genomics' cgatools package - http://www.completegenomics.com
-
-Create new section 'Complete Genomics' if it doesn't already exist in your tool panel.
\ No newline at end of file
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/datatypes_conf.xml
--- a/CG_cgatools/datatypes_conf.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/lib/galaxy/datatypes/._completegenomics.py
Binary file CG_cgatools/lib/galaxy/datatypes/._completegenomics.py has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/lib/galaxy/datatypes/completegenomics.py
--- a/CG_cgatools/lib/galaxy/datatypes/completegenomics.py Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-"""
-Complete Genomics datatypes
-Birgit Crain - Complete Genomics, Inc
-"""
-
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
-import logging
-from galaxy.datatypes import data
-from galaxy import util
-from cgi import escape
-from galaxy.datatypes import metadata
-from galaxy.datatypes import tabular
-from galaxy.datatypes.metadata import MetadataElement
-from galaxy.datatypes.tabular import Tabular
-import galaxy_utils.sequence.vcf
-from galaxy.datatypes.sniff import *
-
-log = logging.getLogger(__name__)
-
-class CG_Var( Tabular ):
- file_ext = 'cg_var'
- def __init__(self, **kwd):
- """Initialize CG_Var datatype"""
- Tabular.__init__( self, **kwd )
- self.column_names = ['locus', 'ploidy', 'allele', 'chromosome', 'begin', 'end',
- 'varType', 'reference', 'alleleSeq', 'varScoreVAF',
- 'varScoreEAF', 'varQuality', 'hapLink', 'xRef'
- ]
- def display_peek( self, dataset ):
- """Returns formated html of peek"""
- return Tabular.make_html_table( self, dataset, column_names=self.column_names )
-
-class CG_MasterVar( Tabular ):
- file_ext = 'cg_mastervar'
- def __init__(self, **kwd):
- """Initialize CG_MasterVar datatype"""
- Tabular.__init__( self, **kwd )
- self.column_names = ['locus', 'ploidy', 'chromosome', 'begin', 'end', 'zygosity',
- 'varType', 'reference', 'allele1Seq', 'allele2Seq',
- 'allele1VarScoreVAF', 'allele2VarScoreVAF', 'allele1VarScoreEAF',
- 'allele2VarScoreEAF', 'allele1VarQuality', 'allele2VarQuality',
- 'allele1HapLink', 'allele2HapLink', 'allele1XRef', 'allele2XRef',
- 'evidenceIntervalId', 'allele1ReadCount', 'allele2ReadCount',
- 'referenceAlleleRead', 'totalReadCount', 'allele1Gene',
- 'allele2Gene pfam', 'miRBaseId', 'repeatMasker', 'segDupOverlap',
- 'relativeCoverageDiploid', 'calledPloidy',
- 'relativeCoverageNondiploid', 'calledLevel'
- ]
-
- def display_peek( self, dataset ):
- """Returns formated html of peek"""
- return Tabular.make_html_table( self, dataset, column_names=self.column_names )
-
-class CG_Gene( Tabular ):
- file_ext = 'cg_gene'
- def __init__(self, **kwd):
- """Initialize CG_Gene datatype"""
- Tabular.__init__( self, **kwd )
- self.column_names = ['index', 'locus', 'allele', 'chromosome', 'begin', 'end',
- 'varType', 'reference', 'call', 'xRef', 'geneId',
- 'mrnaAcc', 'proteinAcc', 'symbol', 'orientation', 'component',
- 'componentIndex', 'hasCodingRegion', 'impact', 'nucleotidePos',
- 'proteinPos', 'annotationRefSequence', 'sampleSequence',
- 'genomeRefSequence', 'pfam'
- ]
-
- def display_peek( self, dataset ):
- """Returns formated html of peek"""
- return Tabular.make_html_table( self, dataset, column_names=self.column_names )
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tool-data/._cg_crr_files.loc.sample
Binary file CG_cgatools/tool-data/._cg_crr_files.loc.sample has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tool-data/cg_crr_files.loc.sample
--- a/CG_cgatools/tool-data/cg_crr_files.loc.sample Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use .crr reference files. You will need to download or create
-#the .crr reference files and then create a cg_crr_files.loc file
-#similar to this one (store it in this directory) that points to
-#the location of the files. The cg_crr_files.loc
-#file has this format (white space characters are TAB characters):
-#
-#
-#
-#hg19 hg19 hg19.crr /Users/bcrain/Documents/hg19.crr
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tool_config.xml.sample.xml
--- a/CG_cgatools/tool_config.xml.sample.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tool_data_table_conf.xml.sample.xml
--- a/CG_cgatools/tool_data_table_conf.xml.sample.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-
-
-
-
- value, dbkey, name, path
-
-
-
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/.DS_Store
Binary file CG_cgatools/tools/cgatools/.DS_Store has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._calldiff.xml
Binary file CG_cgatools/tools/cgatools/._calldiff.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._join.xml
Binary file CG_cgatools/tools/cgatools/._join.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._junctiondiff.xml
Binary file CG_cgatools/tools/cgatools/._junctiondiff.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._listtestvariants.xml
Binary file CG_cgatools/tools/cgatools/._listtestvariants.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._listvariants.xml
Binary file CG_cgatools/tools/cgatools/._listvariants.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._snpdiff.xml
Binary file CG_cgatools/tools/cgatools/._snpdiff.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._testing.pl
Binary file CG_cgatools/tools/cgatools/._testing.pl has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._testvariants.xml
Binary file CG_cgatools/tools/cgatools/._testvariants.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._varfilter.xml
Binary file CG_cgatools/tools/cgatools/._varfilter.xml has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/._varfilter_wrapper.pl
Binary file CG_cgatools/tools/cgatools/._varfilter_wrapper.pl has changed
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/calldiff.xml
--- a/CG_cgatools/tools/cgatools/calldiff.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,343 +0,0 @@
-
-
- compares two Complete Genomics variant files.
-
-
- cgatools
-
-
-
- cgatools calldiff --beta
- --reference ${crr.fields.path}
- --variantsA $data_sources.inputA
- --variantsB $data_sources.inputB
- $validation
- $diploid
- --locus-stats-column-count $column
- --max-hypothesis-count $hypothesis
- --output-prefix cg_
- --reports `echo ${report1} ${report2} ${report3} ${report4} ${report5} ${somatic.report6} | sed 's/ */,/g'`
- #if $somatic.report6 == "SomaticOutput"
- --genome-rootA $somatic.genomeA
- --genome-rootB $somatic.genomeB
- --calibration-root $somatic.calibration
- #end if
-
-
-
-
- (report1 == 'SuperlocusOutput')
-
-
- (report2 == 'SuperlocusStats')
-
-
- (report3 == 'LocusOutput')
-
-
- (report4 == 'LocusStats')
-
-
- (report5 == 'VariantOutput')
-
-
- (report5 == 'VariantOutput')
-
-
- (somatic['report6'] == 'SomaticOutput')
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool compares two Complete Genomics variant files.
-
-cgatools: http://sourceforge.net/projects/cgatools/files/
-
------
-
-**cgatools Manual**::
-
- COMMAND NAME
- calldiff - Compares two Complete Genomics variant files.
-
- DESCRIPTION
- Compares two Complete Genomics variant files. Divides the genome up into
- superloci of nearby variants, then compares the superloci. Also refines the
- comparison to determine per-call or per-locus comparison results.
-
- Comparison results are usually described by a semi-colon separated string,
- one per allele. Each allele's comparison result is one of the following
- classifications:
-
- ref-identical The alleles of the two variant files are identical, and
- they are consistent with the reference.
- alt-identical The alleles of the two variant files are identical, and
- they are inconsistent with the reference.
- ref-consistent The alleles of the two variant files are consistent,
- and they are consistent with the reference.
- alt-consistent The alleles of the two variant files are consistent,
- and they are inconsistent with the reference.
- onlyA The alleles of the two variant files are inconsistent,
- and only file A is inconsistent with the reference.
- onlyB The alleles of the two variant files are inconsistent,
- and only file B is inconsistent with the reference.
- mismatch The alleles of the two variant files are inconsistent,
- and they are both inconsistent with the reference.
- phase-mismatch The two variant files would be consistent if the
- hapLink field had been empty, but they are
- inconsistent.
- ploidy-mismatch The superlocus did not have uniform ploidy.
-
- In some contexts, this classification is rolled up into a simplified
- classification, which is one of "identical", "consistent", "onlyA",
- "onlyB", or "mismatch".
-
- A good place to start looking at the results is the superlocus-output file.
- It has columns defined as follows:
-
- SuperlocusId An identifier given to the superlocus.
- Chromosome The name of the chromosome.
- Begin The 0-based offset of the start of the superlocus.
- End The 0-based offset of the base one past the end of the
- superlocus.
- Classification The match classification of the superlocus.
- Reference The reference sequence.
- AllelesA A semicolon-separated list of the alleles (one per
- haplotype) for variant file A, for the phasing with the
- best comparison result.
- AllelesB A semicolon-separated list of the alleles (one per
- haplotype) for variant file B, for the phasing with the
- best comparison result.
-
- The locus-output file contains, for each locus in file A and file B that is
- not consistent with the reference, an annotated set of calls for the locus.
- The calls are annotated with the following columns:
-
- SuperlocusId The id of the superlocus containing the locus.
- File The variant file (A or B).
- LocusClassification The locus classification is determined by the
- varType column of the call that is inconsistent
- with the reference, concatenated with a
- modifier that describes whether the locus is
- heterozygous, homozygous, or contains no-calls.
- If there is no one variant in the locus (i.e.,
- it is heterozygous alt-alt), the locus
- classification begins with "other".
- LocusDiffClassification The match classification for the locus. This is
- defined to be the best of the comparison of the
- locus to the same region in the other file, or
- the comparison of the superlocus.
-
- The somatic output file contains a list of putative somatic variations of
- genome A. The output includes only those loci that can be classified as
- snp, del, ins or sub in file A, and are called reference in the file B.
- Every locus is annotated with the following columns:
-
- VarCvgA The totalReadCount from file A for this locus
- (computed on the fly if file A is not a
- masterVar file).
- VarScoreA The varScoreVAF from file A, or varScoreEAF if
- the "--diploid" option is used.
- RefCvgB The maximum of the uniqueSequenceCoverage
- values for the locus in genome B.
- RefScoreB Minimum of the reference scores of the locus in
- genome B.
- SomaticCategory The category used for determining the
- calibrated scores and the SomaticRank.
- VarScoreACalib The calibrated variant score of file A, under
- the model selected by using or not using the
- "--diploid" option, and corrected for the count
- of heterozygous variants observed in this
- genome. See user guide for more information.
- VarScoreBCalib The calibrated reference score of file B, under
- the model selected by using or not using the
- "--diploid" option, and corrected for the count
- of heterozygous variants observed in this
- genome. See user guide for more information.
- SomaticRank The estimated rank of this somatic mutation,
- amongst all true somatic mutations within this
- SomaticCategory. The value is a number between
- 0 and 1; a value of 0.012 means, for example,
- that an estimated 1.2% of the true somatic
- mutations in this somaticCategory have a
- somaticScore less than the somaticScore for
- this mutation. See user guide for more
- information.
- SomaticScore An integer that provides a total order on
- quality for all somatic mutations. It is equal
- to -10*log10( P(false)/P(true) ), under the
- assumption that this genome has a rate of
- somatic mutation equal to 1/Mb for
- SomaticCategory snp, 1/10Mb for SomaticCategory
- ins, 1/10Mb for SomaticCategory del, and 1/20Mb
- for SomaticCategory sub. The computation is
- based on the assumptions described in the user
- guide, and is affected by choice of variant
- model selected by using or not using the
- "--diploid" option.
- SomaticQuality Equal to VQHIGH for all somatic mutations where
- SomaticScore >= -10. Otherwise, this column is
- empty.
-
- OPTIONS
- -h [ --help ]
- Print this help message.
-
- --reference arg
- The input crr file.
-
- --variantsA arg
- The "A" input variant file.
-
- --variantsB arg
- The "B" input variant file.
-
- --output-prefix arg
- The path prefix for all output reports.
-
- --reports arg (=SuperlocusOutput,SuperlocusStats,LocusOutput,LocusStats)
- Comma-separated list of reports to generate. (Beware any reports whose
- name begins with "Debug".) A report is one of:
- SuperlocusOutput Report for superlocus classification.
- SuperlocusStats Report for superlocus classification stats.
- LocusOutput Report for locus classification.
- LocusStats Report for locus stats.
- VariantOutput Both variant files annotated by comparison
- results.If the somatic output report is
- requested, file A is also annotated with the
- same score ranks as produced in that report.
- SomaticOutput Report for the list of simple variations that
- are present only in file "A", annotated with
- the score that indicates the probability of
- the variation being truly somatic. Requires
- beta, genome-rootA, and genome-rootB options
- to be provided as well. Note: generating this
- report slows calldiff by 10x-20x.
- DebugCallOutput Report for call classification.
- DebugSuperlocusOutput Report for debug superlocus information.
- DebugSomaticOutput Report for distribution estimates used for
- somatic rescoring. Only produced if
- SomaticOutput is also turned on.
-
- --diploid
- Uses varScoreEAF instead of varScoreVAF in somatic score computations.
- Also, uses diploid variant model instead of variable allele mixture
- model.
-
- --locus-stats-column-count arg (=15)
- The number of columns for locus compare classification in the locus
- stats file.
-
- --max-hypothesis-count arg (=32)
- The maximum number of possible phasings to consider for a superlocus.
-
- --no-reference-cover-validation
- Turns off validation that all bases of a chromosome are covered by
- calls of the variant file.
-
- --genome-rootA arg
- The "A" genome directory, for example /data/GS00118-DNA_A01; this
- directory is expected to contain ASM/REF and ASM/EVIDENCE
- subdirectories.
-
- --genome-rootB arg
- The "B" genome directory.
-
- --calibration-root arg
- The directory containing calibration data. For example, there should
- exist a file calibration-root/0.0.0/metrics.tsv.
-
- --beta
- This flag enables the SomaticOutput report, which is beta
- functionality.
-
- SUPPORTED FORMAT_VERSION
- 0.3 or later
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/join.xml
--- a/CG_cgatools/tools/cgatools/join.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,157 +0,0 @@
-
-
- two tsv files based on equal fields or overlapping regions.
-
-
- cgatools
-
-
-
- cgatools join --beta
- --input $input1
- --input $input2
- --output $output
- --output-mode $outmode
- $dump
- --select $col
- #for $m in $matched
- --match ${m.match}
- #end for
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool joins two tab-delimited files based on equal fields or overlapping regions.
-
-cgatools: http://sourceforge.net/projects/cgatools/files/
-
------
-
-**cgatools Manual**::
-
- COMMAND NAME
- join - Joins two tab-delimited files based on equal fields or overlapping regions.
-
- DESCRIPTION
- Joins two tab-delimited files based on equal fields or overlapping regions.
- By default, an output record is produced for each match found between file
- A and file B, but output format can be controlled by the --output-mode
- parameter.
-
- OPTIONS
- -h [ --help ]
- Print this help message.
-
- --beta
- This is a beta command. To run this command, you must pass the --beta
- flag.
-
- --input arg
- File name to use as input (may be passed in as arguments at the end of
- the command), or omitted for stdin). There must be exactly two input
- files to join. If only one file is specified by name, file A is taken
- to be stdin and file B is the named file. File B is read fully into
- memory, and file A is streamed. File A's columns appear first in the
- output.
-
- --output arg (=STDOUT)
- The output file name (may be omitted for stdout).
-
- --match arg
- A match specification, which is a column from A and a column from B
- separated by a colon.
-
- --overlap arg
-
- -m [ --output-mode ] arg (=full)
- Output mode, one of the following:
- full Print an output record for each match found between
- file A and file B.
- compact Print at most one record for each record of file A,
- joining the file B values by a semicolon and
- suppressing repeated B values and empty B values.
- compact-pct Same as compact, but for each distinct B value,
- annotate with the percentage of the A record that is
- overlapped by B records with that B value. Percentage
- is rounded up to nearest integer.
-
- --overlap-mode arg (=strict)
- Overlap mode, one of the following:
- strict Range A and B overlap if A.begin < B.end and
- B.begin < A.end.
- allow-abutting-points Range A and B overlap they meet the strict
- requirements, or if A.begin <= B.end and
- B.begin <= A.end and either A or B has zero
- length.
-
- --select arg (=A.*,B.*)
- Set of fields to select for output.
-
- -a [ --always-dump ]
- Dump every record of A, even if there are no matches with file B.
-
- --overlap-fraction-A arg (=0)
- Minimum fraction of A region overlap for filtering output.
-
- --boundary-uncertainty-A arg (=0)
- Boundary uncertainty for overlap filtering. Specifically, records
- failing the following predicate are filtered away: overlap >=
- overlap-fraction-A * ( A-range-length - boundary-uncertainty-A )
-
- --overlap-fraction-B arg (=0)
- Minimum fraction of B region overlap for filtering output.
-
- --boundary-uncertainty-B arg (=0)
- Boundary uncertainty for overlap filtering. Specifically, records
- failing the following predicate are filtered away: overlap >=
- overlap-fraction-B * ( B-range-length - boundary-uncertainty-B )
-
- SUPPORTED FORMAT_VERSION
- Any
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/junctiondiff.xml
--- a/CG_cgatools/tools/cgatools/junctiondiff.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-
-
- reports difference between junction calls
-
-
- cgatools
-
-
-
- cgatools junctiondiff --beta -h
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool reports difference between junction calls of Complete Genomics junctions files
-
-cgatools: http://sourceforge.net/projects/cgatools/files/
-
------
-
-**cgatools Manual**::
-
- COMMAND NAME
- junctiondiff - Reports difference between junction calls of Complete Genomics junctions files.
-
- DESCRIPTION
- junctiondiff takes two junction files A and B as input and produces the
- following output:
- - "diff-inputFileName" - the junctions from an input file A that are not
- present in input file B.
- - "report.txt" - a brief summary report (if --statout is used)
-
- Two junctions are considered equivalent if:
- - they come from different files
- - left and right positions of one junction are not more than "--distance"
- bases apart from the corresponding positions of another junction
- - the junction scores are equal or above the scoreThreshold
- - they are on the same strands
-
- OPTIONS
- -h [ --help ]
- Print this help message.
-
- --beta
- This is a beta command. To run this command, you must pass the --beta
- flag.
-
- -s [ --reference ] arg
- Reference file.
-
- -a [ --junctionsA ] arg
- input junction file A.
-
- -b [ --junctionsB ] arg
- input junction file B.
-
- -A [ --scoreThresholdA ] arg (=10)
- score threshold value for the input file A.
-
- -B [ --scoreThresholdB ] arg (=0)
- score threshold value for the input file B.
-
- -d [ --distance ] arg (=200)
- Max distance between coordinates of potentially compatible junctions.
-
- -l [ --minlength ] arg (=500)
- Minimum deletion junction length to be included into the difference
- file.
-
- -o [ --output-prefix ] arg
- The path prefix for all the output reports.
-
- -S [ --statout ]
- (Debug) Report various input file statistics. Experimental feature.
-
- SUPPORTED FORMAT_VERSION
- 1.5 or later
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/listtestvariants.xml
--- a/CG_cgatools/tools/cgatools/listtestvariants.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,239 +0,0 @@
-
-
-
-
-
-
- cgatools
-
-
-
- cgatools listvariants
- --beta
- --reference ${crr.fields.path}
- --output $output1
- #if $include_list.listing == "yes"
- --variant-listing $include_list.list
- #end if
- $longvar
- --variants
- #if $file_types.data_sources.data_source == "in"
- #for $v in $file_types.data_sources.varfiles
- ${v.input}
- #end for
- #else
- `cat $file_types.data_sources.varlist`
- #end if
- ;
-
- cgatools testvariants
- --beta
- --reference ${crr.fields.path}
- --output $output2
- --input $output1
- --variants
- #if $file_types.data_sources.data_source == "in"
- #for $v in $file_types.data_sources.varfiles
- ${v.input}
- #end for
- #else
- `cat $file_types.data_sources.varlist`
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool uses the cgatools testvariants to test variant or mastervar files for the presence of variants.
-
-cgatools: http://sourceforge.net/projects/cgatools/files/
-
------
-
-**cgatools Manual**::
-
- COMMAND NAME
- listvariants - Lists the variants present in a variant file.
-
- DESCRIPTION
- Lists all called variants present in the specified variant files, in a
- format suitable for processing by the testvariants command. The output is a
- tab-delimited file consisting of the following columns:
-
- variantId Sequential id assigned to each variant.
- chromosome The chromosome of the variant.
- begin 0-based reference offset of the beginning of the variant.
- end 0-based reference offset of the end of the variant.
- varType The varType as extracted from the variant file.
- reference The reference sequence.
- alleleSeq The variant allele sequence as extracted from the variant
- file.
- xRef The xRef as extrated from the variant file.
-
- OPTIONS
- -h [ --help ]
- Print this help message.
-
- --beta
- This is a beta command. To run this command, you must pass the --beta
- flag.
-
- --reference arg
- The reference crr file.
-
- --output arg (=STDOUT)
- The output file (may be omitted for stdout).
-
- --variants arg
- The input variant files (may be positional args).
-
- --variant-listing arg
- The output of another listvariants run, to be merged in to produce the
- output of this run.
-
- --list-long-variants
- In addition to listing short variants, list longer variants as well
- (10's of bases) by concatenating nearby calls.
-
- SUPPORTED FORMAT_VERSION
- 0.3 or later
-
-
-
- COMMAND NAME
- testvariants - Tests variant files for presence of variants.
-
- DESCRIPTION
- Tests variant files for presence of variants. The output is a tab-delimited
- file consisting of the columns of the input variants file, plus a column
- for each assembly results file that contains a character code for each
- allele. The character codes have meaning as follows:
-
- 0 This allele of this genome is consistent with the reference at this
- locus but inconsistent with the variant.
- 1 This allele of this genome has the input variant at this locus.
- N This allele of this genome has no-calls but is consistent with the
- input variant.
-
- OPTIONS
- -h [ --help ]
- Print this help message.
-
- --beta
- This is a beta command. To run this command, you must pass the --beta
- flag.
-
- --reference arg
- The reference crr file.
-
- --input arg (=STDIN)
- The input variants to test for.
-
- --output arg (=STDOUT)
- The output file (may be omitted for stdout).
-
- --variants arg
- The input variant files (may be passed in as arguments at the end of
- the command).
-
- SUPPORTED FORMAT_VERSION
- 0.3 or later
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/listvariants.xml
--- a/CG_cgatools/tools/cgatools/listvariants.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-
-
-
- lists all called variants
-
-
- cgatools
-
-
-
- cgatools listvariants
- --beta
- --reference ${crr.fields.path}
- --output $output
- #if $include_list.listing == "yes"
- --variant-listing $include_list.list
- #end if
- $longvar
- --variants
- #if $file_types.data_sources.data_source == "in"
- #for $v in $file_types.data_sources.varfiles
- ${v.input}
- #end for
- #else
- `cat $file_types.data_sources.varlist`
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool uses the cgatools listvariants to list all called variants present in the var or mastervar files.
-
-cgatools: http://sourceforge.net/projects/cgatools/files/
-
------
-
-**cgatools Manual**::
-
- COMMAND NAME
- listvariants - Lists the variants present in a variant file.
-
- DESCRIPTION
- Lists all called variants present in the specified variant files, in a
- format suitable for processing by the testvariants command. The output is a
- tab-delimited file consisting of the following columns:
-
- variantId Sequential id assigned to each variant.
- chromosome The chromosome of the variant.
- begin 0-based reference offset of the beginning of the variant.
- end 0-based reference offset of the end of the variant.
- varType The varType as extracted from the variant file.
- reference The reference sequence.
- alleleSeq The variant allele sequence as extracted from the variant
- file.
- xRef The xRef as extrated from the variant file.
-
- OPTIONS
- -h [ --help ]
- Print this help message.
-
- --beta
- This is a beta command. To run this command, you must pass the --beta
- flag.
-
- --reference arg
- The reference crr file.
-
- --output arg (=STDOUT)
- The output file (may be omitted for stdout).
-
- --variants arg
- The input variant files (may be positional args).
-
- --variant-listing arg
- The output of another listvariants run, to be merged in to produce the
- output of this run.
-
- --list-long-variants
- In addition to listing short variants, list longer variants as well
- (10's of bases) by concatenating nearby calls.
-
- SUPPORTED FORMAT_VERSION
- 0.3 or later
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/snpdiff.xml
--- a/CG_cgatools/tools/cgatools/snpdiff.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,116 +0,0 @@
-
-
- compares snp calls to a Complete Genomics variant file.
-
-
- cgatools
-
-
-
- cgatools snpdiff --beta -h
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool ompares snp calls to a Complete Genomics variant file.
-
-cgatools: http://sourceforge.net/projects/cgatools/files/
-
------
-
-**cgatools Manual**::
-
- COMMAND NAME
- snpdiff - Compares snp calls to a Complete Genomics variant file.
-
- DESCRIPTION
- Compares the snp calls in the "genotypes" file to the calls in a Complete
- Genomics variant file. The genotypes file is a tab-delimited file with at
- least the following columns (additional columns may be given):
-
- Chromosome (Required) The name of the chromosome.
- Offset0Based (Required) The 0-based offset in the chromosome.
- GenotypesStrand (Optional) The strand of the calls in the Genotypes
- column (+ or -, defaults to +).
- Genotypes (Optional) The calls, one per allele. The following
- calls are recognized:
- A,C,G,T A called base.
- N A no-call.
- - A deleted base.
- . A non-snp variation.
-
- The output is a tab-delimited file consisting of the columns of the
- original genotypes file, plus the following additional columns:
-
- Reference The reference base at the given position.
- VariantFile The calls made by the variant file, one per allele.
- The character codes are the same as is described for
- the Genotypes column.
- DiscordantAlleles (Only if Genotypes is present) The number of
- Genotypes alleles that are discordant with calls in
- the VariantFile. If the VariantFile is described as
- haploid at the given position but the Genotypes is
- diploid, then each genotype allele is compared
- against the haploid call of the VariantFile.
- NoCallAlleles (Only if Genotypes is present) The number of
- Genotypes alleles that were no-called by the
- VariantFile. If the VariantFile is described as
- haploid at the given position but the Genotypes is
- diploid, then a VariantFile no-call is counted twice.
-
- The verbose output is a tab-delimited file consisting of the columns of the
- original genotypes file, plus the following additional columns:
-
- Reference The reference base at the given position.
- VariantFile The call made by the variant file for one allele (there is
- a line in this file for each allele). The character codes
- are the same as is described for the Genotypes column.
- [CALLS] The rest of the columns are pasted in from the VariantFile,
- describing the variant file line used to make the call.
-
- The stats output is a comma-separated file with several tables describing
- the results of the snp comparison, for each diploid genotype. The tables
- all describe the comparison result (column headers) versus the genotype
- classification (row labels) in different ways. The "Locus classification"
- tables have the most detailed match classifications, while the "Locus
- concordance" tables roll these match classifications up into "discordance"
- and "no-call". A locus is considered discordant if it is discordant for
- either allele. A locus is considered no-call if it is concordant for both
- alleles but has a no-call on either allele. The "Allele concordance"
- describes the comparison result on a per-allele basis.
-
- OPTIONS
- -h [ --help ]
- Print this help message.
-
- --reference arg
- The input crr file.
-
- --variants arg
- The input variant file.
-
- --genotypes arg
- The input genotypes file.
-
- --output-prefix arg
- The path prefix for all output reports.
-
- --reports arg (=Output,Verbose,Stats)
- Comma-separated list of reports to generate. A report is one of:
- Output The output genotypes file.
- Verbose The verbose output file.
- Stats The stats output file.
-
- SUPPORTED FORMAT_VERSION
- 0.3 or later
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/testing.pl
--- a/CG_cgatools/tools/cgatools/testing.pl Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-#!/usr/bin/perl
-
-print "$0 @ARGV\n";
-open OUT, ">@ARGV[0]";
-print "test1 ok\ttest1 ok\ntest1 ok\ttest1 ok\n";
-print OUT "test ok\ttest ok\ntest ok\ttest ok\n";
-close OUT;
-open OUT, ">somefile";
-print OUT "test2 ok\ttest2 ok\ntest2 ok\ttest2 ok\n";
-close OUT;
\ No newline at end of file
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/testvariants.xml
--- a/CG_cgatools/tools/cgatools/testvariants.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,157 +0,0 @@
-
-
-
- test for the presence of variants
-
-
- cgatools
-
-
-
- cgatools testvariants
- --beta
- --reference ${crr.fields.path}
- --output $output
- --input $listing
- --variants
- #if $file_types.data_sources.data_source == "in"
- #for $v in $file_types.data_sources.varfiles
- ${v.input}
- #end for
- #else
- `cat $file_types.data_sources.varlist`
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool uses the cgatools testvariants to test variant or mastervar files for the presence of variants.
-
-cgatools: http://sourceforge.net/projects/cgatools/files/
-
------
-
-**cgatools Manual**::
-
- COMMAND NAME
- testvariants - Tests variant files for presence of variants.
-
- DESCRIPTION
- Tests variant files for presence of variants. The output is a tab-delimited
- file consisting of the columns of the input variants file, plus a column
- for each assembly results file that contains a character code for each
- allele. The character codes have meaning as follows:
-
- 0 This allele of this genome is consistent with the reference at this
- locus but inconsistent with the variant.
- 1 This allele of this genome has the input variant at this locus.
- N This allele of this genome has no-calls but is consistent with the
- input variant.
-
- OPTIONS
- -h [ --help ]
- Print this help message.
-
- --beta
- This is a beta command. To run this command, you must pass the --beta
- flag.
-
- --reference arg
- The reference crr file.
-
- --input arg (=STDIN)
- The input variants to test for.
-
- --output arg (=STDOUT)
- The output file (may be omitted for stdout).
-
- --variants arg
- The input variant files (may be passed in as arguments at the end of
- the command).
-
- SUPPORTED FORMAT_VERSION
- 0.3 or later
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/varfilter.xml
--- a/CG_cgatools/tools/cgatools/varfilter.xml Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,184 +0,0 @@
-
-
-
- copies input file, applying filters.
-
-
- cgatools
-
-
-
- varfilter_wrapper.pl
- --reference $crr.fields.path
- --output $output
- --input $file_types.data_sources.input
- #for $f in $filters
- --zygosity $f.zygosity
- --vartype $f.vartype
- --varscorevaf x$f.varscorevaf
- --varscoreeaf x$f.varscoreeaf
- --varquality $f.varquality
- #end for
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool copies input var file or masterVar file to output, applying specified filters.
-
-cgatools: http://sourceforge.net/projects/cgatools/files/
-
------
-
-**cgatools Manual**::
-
- COMMAND NAME
- varfilter - Copies input var file or masterVar file to output, applying
- specified filters.
-
- DESCRIPTION
- Copies input var file or masterVar file to output, applying specified
- filters (which are available to all cgatools commands that read a var file
- or masterVar file as input). Filters are specified by appending the filter
- specification to the var file name on the command line. For example:
-
- /path/to/var.tsv.bz2#varQuality!=VQHIGH
-
- The preceding example filters out any calls marked as VQLOW. The filter
- specification follows the "#" sign, and consists of a list of filters to
- apply, separated by a comma. Each filter is a colon-separated list of call
- selectors. Any scored call that passes all the colon-separated call
- selectors for one or more of the comma-separated filters is turned into a
- no-call. The following call selectors are available:
-
- hom Selects only calls in homozygous loci.
- het Selects any scored call not selected by the hom selector.
- varType=XX Selects calls whose varType is XX.
- varScoreVAF<XX Selects calls whose varScoreVAF<XX.
- varScoreEAF<XX Selects calls whose varScoreEAF<XX.
- varQuality!=XX Selects calls whose varQuality is not XX.
-
- Here is an example that filters homozygous SNPs with varScoreVAF < 25 and
- heterozygous insertions with varScoreEAF < 50:
-
-
- '/path/to/var.tsv.bz2#hom:varType=snp:varScoreVAF<25,het:varType=ins:varScoreEAF<50'
-
-
- OPTIONS
- -h [ --help ]
- Print this help message.
-
- --beta
- This is a beta command. To run this command, you must pass the --beta flag.
-
- --reference arg
- The reference crr file.
-
- --input arg
- The input var file or masterVar file (typically with filters specified).
-
- --output arg (=STDOUT)
- The output file (may be omitted for stdout).
-
- SUPPORTED FORMAT_VERSION
- 0.3 or later
-
-
diff -r ffb2b0244ab2 -r 7debe3438713 CG_cgatools/tools/cgatools/varfilter_wrapper.pl
--- a/CG_cgatools/tools/cgatools/varfilter_wrapper.pl Tue Jun 12 11:54:38 2012 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use Getopt::Long;
-use vars qw($opt_reference $opt_input $opt_output @opt_zygosity @opt_vartype @opt_varscorevaf @opt_varscoreeaf @opt_varquality);
-$| = 1; # set autoflush to screen
-
-# This is a wrapper for the cgatools varfilter function to run cgatools varfilter in Galaxy.
-# The wrapper generates the filter(s) in the correct format to be used with the input file.
-# written 6-1-2012 by bcrain@completegenomics.com
-
-
-#print join("\n", @ARGV), "\n";
-&GetOptions("reference=s", "input=s", "output=s", "zygosity=s@", "vartype=s@", "varscorevaf=s@", "varscoreeaf=s@", "varquality=s@");
-
-my $append = '';
-
-for (my $i = 0; $i <= $#opt_zygosity; $i ++)
-{
- my $filter = '';
- unless ($opt_zygosity[$i] eq 'NA') {$filter = $opt_zygosity[$i];}
- unless ($opt_vartype[$i] eq 'NA')
- {
- $filter ne '' and $filter .= ':';
- $filter .= 'varType=' . $opt_vartype[$i];
- }
- unless ($opt_varscorevaf[$i] eq 'x')
- {
- $filter ne '' and $filter .= ':';
- $opt_varscorevaf[$i] =~ s/^x//;
- $filter .= 'varScoreVAF<' . $opt_varscorevaf[$i];
- }
- unless ($opt_varscoreeaf[$i] eq 'x')
- {
- $filter ne '' and $filter .= ':';
- $opt_varscoreeaf[$i] =~ s/^x//;
- $filter .= 'varScoreEAF<' . $opt_varscoreeaf[$i];
- }
- unless ($opt_varquality[$i] eq 'NA')
- {
- $filter ne '' and $filter .= ':';
- $filter .= 'varQuality!=' . $opt_varquality[$i];
- }
-
- if ($filter ne '')
- {
- if ($append eq '') {$append = '#' . $filter;}
- else {$append .= ',' . $filter;}
- }
-}
-print "cgatools varfilter
---beta
---reference $opt_reference
---output $opt_output
---input '${opt_input}${append}'\n";
-
-`cgatools varfilter --beta --reference $opt_reference --output $opt_output --input '${opt_input}${append}'`;
\ No newline at end of file