Mercurial > repos > devteam > testing_cgatools
changeset 0:ef23f9cd599b draft default tip
Uploaded
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/README.txt Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,81 @@ +Provides galaxy tools for Complete Genomics' cgatools package - http://www.completegenomics.com + +This repository provides tools to execute functions of cgatools from Complete Genomics, Inc. +and includes the cgatools 1.6 executable. + +Reference genomes files for cgatools can be downloaded from Complete Genomics' ftp site: +ftp://ftp.completegenomics.com/ReferenceFiles/build37.crr +ftp://ftp.completegenomics.com/ReferenceFiles/build36.crr + +Calibration files for cgatools can be downloaded from Complete Genomics' ftp site: +ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz + +After copying the files in the desired locations follow the instructions below to register +the reference files with galaxy. + + + + +AUTOMATIC INSTALL + +When prompted for a tool panel section to contain the installed tools create a new section +called 'Complete Genomics - cgatools 1.6'. + +After install create a cg_ccr_files.loc file in the tool-data directory of your Galaxy +instance by copying the cg_ccr_files.loc.sample file. In cg_ccr_files.loc edit the path +for the reference genome files (.crr files) downloaded from Complete Genomics' ftp site. + +Restart Galaxy instance after editing cg_crr_files.loc. + + + + +MANUAL INSTALL + +For manual install from compressed files move/copy the following files into your Galaxy instance: +directory tools/cgatools_1.6 to tools/ +file lib/galaxy/datatypes/completegenomics.py to lib/galaxy/datatypes/ +file tool-data/cg_crr_files.loc.sample to tool-data/cg_crr_files.loc + +In cg_ccr_files.loc edit the path for the reference genome files (.crr files) downloaded +from Complete Genomics' ftp site. + +Paste from tool_config.xml.sample into the tool_config.xml of your Galaxy instance: + <!-- + Copy the following section to tool_conf.xml file in your Galaxy distribution if you are + adding Complete Genomics tools manually to your Galaxy instance + --> + <section name="Complete Genomics - cgatools 1.6" id="cg_cgatools1.6"> + <tool file="cgatools_1.6/listvariants.xml" /> + <tool file="cgatools_1.6/testvariants.xml" /> + <tool file="cgatools_1.6/listtestvariants.xml" /> + <tool file="cgatools_1.6/calldiff.xml" /> + <tool file="cgatools_1.6/snpdiff.xml" /> + <tool file="cgatools_1.6/junctiondiff.xml" /> + <tool file="cgatools_1.6/join.xml" /> + <tool file="cgatools_1.6/varfilter.xml" /> + <tool file="cgatools_1.6/mkvcf.xml" /> + <tool file="cgatools_1.6/evidence2sam.xml" /> + </section> + <!-- End of copied section --> + +Paste from tool_data_table_config.xml.sample into the tool_data_table_config.xml of your Galaxy instance: + <!-- Start location of cgatools crr files --> + <table name="cg_crr_files" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/cg_crr_files.loc" /> + </table> + <!-- End Location of cgatools crr files --> + +Paste from datatypes_conf.xml into the datatypes_conf.xml of your Galaxy instance: + <!-- + Copy the following section to datatypes_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance + --> + <!-- Start Complete Genomics Datatypes --> + <datatype extension="cg_var" type="galaxy.datatypes.completegenomics:CG_Var" display_in_upload="true" /> + <datatype extension="cg_mastervar" type="galaxy.datatypes.completegenomics:CG_MasterVar" display_in_upload="true" /> + <datatype extension="cg_gene" type="galaxy.datatypes.completegenomics:CG_Gene" display_in_upload="true" /> + <!-- End Complete Genomics Datatypes --> + <!-- End of copied section --> + +Restart Galaxy instance. \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/cg_crr_files.loc.sample Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,17 @@ +#This is a sample file distributed with cgatools repository that enables the cgatools +#functions to use the .crr reference files. After installation download the reference +#genome files form Complete Genomics' ftp site: +#ftp://ftp.completegenomics.com/ReferenceFiles/build37.crr +#ftp://ftp.completegenomics.com/ReferenceFiles/build36.crr +#and edit the path for the reference genomes to correspond to their location. +# +#Restart your Galaxy instance to ensure the file locations are registered with Galaxy +#properly. +# +#The cg_crr_files.loc file has this format (white space characters are TAB characters): +# +#<value> <dbkey> <name> <path> +# +#hg19 hg19 build 37 /absolute/path/to/build37.crr +#hg18 hg18 build 36 /absolute/path/to/build36.crr +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/datatypes_conf.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,20 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="completegenomics.py"/> + </datatype_files> + + <registration> + <!-- + Copy the following section to datatypes_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance + --> + <!-- Start Complete Genomics Datatypes --> + <datatype extension="cg_var" type="galaxy.datatypes.completegenomics:CG_Var" display_in_upload="true" /> + <datatype extension="cg_mastervar" type="galaxy.datatypes.completegenomics:CG_MasterVar" display_in_upload="true" /> + <datatype extension="cg_gene" type="galaxy.datatypes.completegenomics:CG_Gene" display_in_upload="true" /> + <!-- End Complete Genomics Datatypes --> + <!-- End of copied section --> + </registration> + <sniffers> + </sniffers> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/lib/galaxy/datatypes/completegenomics.py Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,71 @@ +""" +Complete Genomics datatypes +Birgit Crain - Complete Genomics, Inc +""" + +import pkg_resources +pkg_resources.require( "bx-python" ) + +import logging +from galaxy.datatypes import data +from galaxy import util +from cgi import escape +from galaxy.datatypes import metadata +from galaxy.datatypes import tabular +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.tabular import Tabular +import galaxy_utils.sequence.vcf +from galaxy.datatypes.sniff import * + +log = logging.getLogger(__name__) + +class CG_Var( Tabular ): + file_ext = 'cg_var' + def __init__(self, **kwd): + """Initialize CG_Var datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['locus', 'ploidy', 'allele', 'chromosome', 'begin', 'end', + 'varType', 'reference', 'alleleSeq', 'varScoreVAF', + 'varScoreEAF', 'varQuality', 'hapLink', 'xRef' + ] + def display_peek( self, dataset ): + """Returns formated html of peek""" + return Tabular.make_html_table( self, dataset, column_names=self.column_names ) + +class CG_MasterVar( Tabular ): + file_ext = 'cg_mastervar' + def __init__(self, **kwd): + """Initialize CG_MasterVar datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['locus', 'ploidy', 'chromosome', 'begin', 'end', 'zygosity', + 'varType', 'reference', 'allele1Seq', 'allele2Seq', + 'allele1VarScoreVAF', 'allele2VarScoreVAF', 'allele1VarScoreEAF', + 'allele2VarScoreEAF', 'allele1VarQuality', 'allele2VarQuality', + 'allele1HapLink', 'allele2HapLink', 'allele1XRef', 'allele2XRef', + 'evidenceIntervalId', 'allele1ReadCount', 'allele2ReadCount', + 'referenceAlleleRead', 'totalReadCount', 'allele1Gene', + 'allele2Gene pfam', 'miRBaseId', 'repeatMasker', 'segDupOverlap', + 'relativeCoverageDiploid', 'calledPloidy', + 'relativeCoverageNondiploid', 'calledLevel' + ] + + def display_peek( self, dataset ): + """Returns formated html of peek""" + return Tabular.make_html_table( self, dataset, column_names=self.column_names ) + +class CG_Gene( Tabular ): + file_ext = 'cg_gene' + def __init__(self, **kwd): + """Initialize CG_Gene datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['index', 'locus', 'allele', 'chromosome', 'begin', 'end', + 'varType', 'reference', 'call', 'xRef', 'geneId', + 'mrnaAcc', 'proteinAcc', 'symbol', 'orientation', 'component', + 'componentIndex', 'hasCodingRegion', 'impact', 'nucleotidePos', + 'proteinPos', 'annotationRefSequence', 'sampleSequence', + 'genomeRefSequence', 'pfam' + ] + + def display_peek( self, dataset ): + """Returns formated html of peek""" + return Tabular.make_html_table( self, dataset, column_names=self.column_names )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tool_config.xml.sample Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<toolbox> + <!-- + Copy the following section to tool_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance + --> + <section name="Complete Genomics - cgatools 1.6" id="cg_cgatools1.6"> + <tool file="cgatools_1.6/listvariants.xml" /> + <tool file="cgatools_1.6/testvariants.xml" /> + <tool file="cgatools_1.6/listtestvariants.xml" /> + <tool file="cgatools_1.6/calldiff.xml" /> + <tool file="cgatools_1.6/snpdiff.xml" /> + <tool file="cgatools_1.6/junctiondiff.xml" /> + <tool file="cgatools_1.6/join.xml" /> + <tool file="cgatools_1.6/varfilter.xml" /> + <tool file="cgatools_1.6/mkvcf.xml" /> + <tool file="cgatools_1.6/evidence2sam.xml" /> + </section> + <!-- End of copied section --> +</toolbox> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tool_data_table_conf.xml.sample Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<tables> + <!-- + Copy the following section to tool_data_table_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance + --> + <!-- Start location of cgatools crr files --> + <table name="cg_crr_files" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/cg_crr_files.loc" /> + </table> + <!-- End Location of cgatools crr files --> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tool_dependencies.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,20 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="cgatools" version="1.6"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://sourceforge.net/projects/cgatools/files/1.6.0/cgatools-1.6.0.43-MacOSX_binary-x86_64.tar.gz</action> + <action type="move_directory_files"> + <source_directory>cgatools-1.6.0.43-MacOSX_binary-x86_64/bin</source_directory> + <destination_directory>$INSTALL_DIR/bin</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> +some text + </readme> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/calldiff.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,388 @@ +<tool id="cg_calldiff" name="calldiff(beta) 1.6" version="1.0.1"> +<!-- +This tool creates a GUI for the calldiff function of cgatools from Complete Genomics, Inc. +written 6-18-2012 by bcrain@completegenomics.com +updated 8-14-2012 by bcrain@completegenomics.com +--> + + <description>compares two Complete Genomics variant files.</description> + + <command> +<!-- print version of cgatools to STDOUT--> +cgatools | head -1; + +<!-- print command lines to STDOUT--> +echo "cgatools calldiff --beta +--reference ${crr.fields.path} +--variantsA $data_sources.inputA +--variantsB $data_sources.inputB +$validation +$diploid +--locus-stats-column-count $column +--max-hypothesis-count $hypothesis +--output-prefix cg_ +--reports `echo ${report1} ${report2} ${report3} ${report4} ${report5} ${somatic.report6} | sed 's/ */,/g'` +#if $somatic.report6 == "SomaticOutput" +--genome-rootA $somatic.genomeA +--genome-rootB $somatic.genomeB +--calibration-root $somatic.calibration +#end if +"; + +<!-- execute cgatools--> +cgatools calldiff --beta +--reference ${crr.fields.path} +--variantsA $data_sources.inputA +--variantsB $data_sources.inputB +$validation +$diploid +--locus-stats-column-count $column +--max-hypothesis-count $hypothesis +--output-prefix cg_ +--reports `echo ${report1} ${report2} ${report3} ${report4} ${report5} ${somatic.report6} | sed 's/ */,/g'` +#if $somatic.report6 == "SomaticOutput" + --genome-rootA $somatic.genomeA + --genome-rootB $somatic.genomeB + --calibration-root $somatic.calibration +#end if + </command> + + <outputs> + <data format="tabular" name="output1" from_work_dir="cg_SuperlocusOutput.tsv" label="${tool.name} SuperlocusOutput"> + <filter>(report1 == 'SuperlocusOutput')</filter> + </data> + <data format="tabular" name="output2" from_work_dir="cg_SuperlocusStats.tsv" label="${tool.name} SuperlocusStats"> + <filter>(report2 == 'SuperlocusStats')</filter> + </data> + <data format="tabular" name="output3" from_work_dir="cg_LocusOutput.tsv" label="${tool.name} LocusOutput"> + <filter>(report3 == 'LocusOutput')</filter> + </data> + <data format="tabular" name="output4" from_work_dir="cg_LocusStats.tsv" label="${tool.name} LocusStats"> + <filter>(report4 == 'LocusStats')</filter> + </data> + <data format="tabular" name="output5a" from_work_dir="cg_VariantsA.tsv" label="${tool.name} VariantsA"> + <filter>(report5 == 'VariantOutput')</filter> + </data> + <data format="tabular" name="output5b" from_work_dir="cg_VariantsB.tsv" label="${tool.name} VariantsB"> + <filter>(report5 == 'VariantOutput')</filter> + </data> + <data format="tabular" name="output6" from_work_dir="cg_SomaticOutput.tsv" label="${tool.name} SomaticOutput"> + <filter>(somatic['report6'] == 'SomaticOutput')</filter> + </data> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference genome (.crr file)"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input varfiles?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <!--form field to select variant files--> + <when value="in"> + <param name="inputA" type="data" format="cg_var" label="Var file A"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + <param name="inputB" type="data" format="cg_var" label="Var file B"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + + <!--form field to enter input files--> + <when value="out"> + <param name="inputA" type="text" label="Variant file A (/path/varfile)" size="300" help="Variant file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2"> + <validator type="empty_field" message="You must supply a var file"/> + </param> + <param name="inputB" type="text" label="Variant file B (/path/varfile)" size="300" help="Variant file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2."> + <validator type="empty_field" message="You must supply a var file"/> + </param> + </when> + </conditional> + + <!--other parameters--> + <param name="diploid" type="select" label="Use diploid variant model" help="Uses varScoreEAF instead of varScoreVAF in somatic score computations. Also, uses diploid variant model instead of variable allele mixture model."> + <option value="">no</option> + <option value="--diploid">yes</option> + </param> + + <param name="column" type="integer" label="Number of columns for locus compare classification in the locus stats file (default 15)" value="15"> + <validator type="empty_field" message="You must enter a value, the default is 15" /> + </param> + + <param name="hypothesis" type="integer" label="Maximum number of possible phasings to consider for a superlocus (default 32)" value="32"> + <validator type="empty_field" message="You must enter a value, the default is 32" /> + </param> + + <param name="validation" type="select" label="Reference cover validation (default on)" help="Turns on/off validation that all bases of a chromosome are covered by calls of the variant file."> + <option value="">on</option> + <option value="--no-reference-cover-validation">off</option> + </param> + + <!--form fields to select ooutput reports--> + <param name="report1" type="select" label="Create report SuperlocusOutput"> + <option value="">no</option> + <option value="SuperlocusOutput">yes</option> + </param> + <param name="report2" type="select" label="Create report SuperlocusStats"> + <option value="">no</option> + <option value="SuperlocusStats">yes</option> + </param> + <param name="report3" type="select" label="Create report LocusOutput"> + <option value="">no</option> + <option value="LocusOutput">yes</option> + </param> + <param name="report4" type="select" label="Create report LocusStats"> + <option value="">no</option> + <option value="LocusStats">yes</option> + </param> + <param name="report5" type="select" label="Create report VariantOutput" help="Both variant files annotated by comparison results. If the somatic output report is requested, file A is also annotated with the same score ranks as produced in that report."> + <option value="">no</option> + <option value="VariantOutput">yes</option> + </param> + + <!--conditional to select somatic reports and related inputs--> + <conditional name="somatic"> + <param name="report6" type="select" label="Create report SomaticOutput" help="This report can only be generated on local Galaxy instances. Report for the list of simple variations that are present only in file 'A', annotated with the score that indicates the probability of the variation being truly somatic. Note: generating this report slows calldiff by 10x-20x."> + <option value="">no</option> + <option value="SomaticOutput">yes</option> + </param> + + <when value="SomaticOutput"> + <param name="genomeA" type="text" size="300" label="Directory for genome A (/path/dir)" help="The 'A' genome directory, e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000; this directory is expected to contain ASM/REF and ASM/EVIDENCE subdirectories."> + <validator type="empty_field" message="You must supply the genome root directory for this sample"/> + </param> + <param name="genomeB" type="text" size="300" label="Directory for genome B (/path/dir)" help="The 'B' genome directory, e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000; this directory is expected to contain ASM/REF and ASM/EVIDENCE subdirectories."> + <validator type="empty_field" message="You must supply the genome root directory for this sample"/> + </param> + <param name="calibration" type="text" size="300" label="Directory containing calibration data (/path/dir)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v1.tgz"> + <validator type="empty_field" message="You must supply the directory containing the calibration data"/> + </param> + </when> + </conditional> + + </inputs> + + <help> + +**What it does** + +This tool uses cgatools calldiff to compare two Complete Genomics variant files. + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + calldiff - Compares two Complete Genomics variant files. + + DESCRIPTION + Compares two Complete Genomics variant files. Divides the genome up into + superloci of nearby variants, then compares the superloci. Also refines the + comparison to determine per-call or per-locus comparison results. + + Comparison results are usually described by a semi-colon separated string, + one per allele. Each allele's comparison result is one of the following + classifications: + + ref-identical The alleles of the two variant files are identical, and + they are consistent with the reference. + alt-identical The alleles of the two variant files are identical, and + they are inconsistent with the reference. + ref-consistent The alleles of the two variant files are consistent, + and they are consistent with the reference. + alt-consistent The alleles of the two variant files are consistent, + and they are inconsistent with the reference. + onlyA The alleles of the two variant files are inconsistent, + and only file A is inconsistent with the reference. + onlyB The alleles of the two variant files are inconsistent, + and only file B is inconsistent with the reference. + mismatch The alleles of the two variant files are inconsistent, + and they are both inconsistent with the reference. + phase-mismatch The two variant files would be consistent if the + hapLink field had been empty, but they are + inconsistent. + ploidy-mismatch The superlocus did not have uniform ploidy. + + In some contexts, this classification is rolled up into a simplified + classification, which is one of "identical", "consistent", "onlyA", + "onlyB", or "mismatch". + + A good place to start looking at the results is the superlocus-output file. + It has columns defined as follows: + + SuperlocusId An identifier given to the superlocus. + Chromosome The name of the chromosome. + Begin The 0-based offset of the start of the superlocus. + End The 0-based offset of the base one past the end of the + superlocus. + Classification The match classification of the superlocus. + Reference The reference sequence. + AllelesA A semicolon-separated list of the alleles (one per + haplotype) for variant file A, for the phasing with the + best comparison result. + AllelesB A semicolon-separated list of the alleles (one per + haplotype) for variant file B, for the phasing with the + best comparison result. + + The locus-output file contains, for each locus in file A and file B that is + not consistent with the reference, an annotated set of calls for the locus. + The calls are annotated with the following columns: + + SuperlocusId The id of the superlocus containing the locus. + File The variant file (A or B). + LocusClassification The locus classification is determined by the + varType column of the call that is inconsistent + with the reference, concatenated with a + modifier that describes whether the locus is + heterozygous, homozygous, or contains no-calls. + If there is no one variant in the locus (i.e., + it is heterozygous alt-alt), the locus + classification begins with "other". + LocusDiffClassification The match classification for the locus. This is + defined to be the best of the comparison of the + locus to the same region in the other file, or + the comparison of the superlocus. + + The somatic output file contains a list of putative somatic variations of + genome A. The output includes only those loci that can be classified as + snp, del, ins or sub in file A, and are called reference in the file B. + Every locus is annotated with the following columns: + + VarCvgA The totalReadCount from file A for this locus + (computed on the fly if file A is not a + masterVar file). + VarScoreA The varScoreVAF from file A, or varScoreEAF if + the "--diploid" option is used. + RefCvgB The maximum of the uniqueSequenceCoverage + values for the locus in genome B. + RefScoreB Minimum of the reference scores of the locus in + genome B. + SomaticCategory The category used for determining the + calibrated scores and the SomaticRank. + VarScoreACalib The calibrated variant score of file A, under + the model selected by using or not using the + "--diploid" option, and corrected for the count + of heterozygous variants observed in this + genome. See user guide for more information. + VarScoreBCalib The calibrated reference score of file B, under + the model selected by using or not using the + "--diploid" option, and corrected for the count + of heterozygous variants observed in this + genome. See user guide for more information. + SomaticRank The estimated rank of this somatic mutation, + amongst all true somatic mutations within this + SomaticCategory. The value is a number between + 0 and 1; a value of 0.012 means, for example, + that an estimated 1.2% of the true somatic + mutations in this somaticCategory have a + somaticScore less than the somaticScore for + this mutation. See user guide for more + information. + SomaticScore An integer that provides a total order on + quality for all somatic mutations. It is equal + to -10*log10( P(false)/P(true) ), under the + assumption that this genome has a rate of + somatic mutation equal to 1/Mb for + SomaticCategory snp, 1/10Mb for SomaticCategory + ins, 1/10Mb for SomaticCategory del, and 1/20Mb + for SomaticCategory sub. The computation is + based on the assumptions described in the user + guide, and is affected by choice of variant + model selected by using or not using the + "--diploid" option. + SomaticQuality Equal to VQHIGH for all somatic mutations where + SomaticScore >= -10. Otherwise, this column is + empty. + + OPTIONS + -h [ --help ] + Print this help message. + + --reference arg + The input crr file. + + --variantsA arg + The "A" input variant file. + + --variantsB arg + The "B" input variant file. + + --output-prefix arg + The path prefix for all output reports. + + --reports arg (=SuperlocusOutput,SuperlocusStats,LocusOutput,LocusStats) + Comma-separated list of reports to generate. (Beware any reports whose + name begins with "Debug".) A report is one of: + SuperlocusOutput Report for superlocus classification. + SuperlocusStats Report for superlocus classification stats. + LocusOutput Report for locus classification. + LocusStats Report for locus stats. + VariantOutput Both variant files annotated by comparison + results.If the somatic output report is + requested, file A is also annotated with the + same score ranks as produced in that report. + SomaticOutput Report for the list of simple variations that + are present only in file "A", annotated with + the score that indicates the probability of + the variation being truly somatic. Requires + beta, genome-rootA, and genome-rootB options + to be provided as well. Note: generating this + report slows calldiff by 10x-20x. + DebugCallOutput Report for call classification. + DebugSuperlocusOutput Report for debug superlocus information. + DebugSomaticOutput Report for distribution estimates used for + somatic rescoring. Only produced if + SomaticOutput is also turned on. + + --diploid + Uses varScoreEAF instead of varScoreVAF in somatic score computations. + Also, uses diploid variant model instead of variable allele mixture + model. + + --locus-stats-column-count arg (=15) + The number of columns for locus compare classification in the locus + stats file. + + --max-hypothesis-count arg (=32) + The maximum number of possible phasings to consider for a superlocus. + + --no-reference-cover-validation + Turns off validation that all bases of a chromosome are covered by + calls of the variant file. + + --genome-rootA arg + The "A" genome directory, for example /data/GS00118-DNA_A01; this + directory is expected to contain ASM/REF and ASM/EVIDENCE + subdirectories. + + --genome-rootB arg + The "B" genome directory. + + --calibration-root arg + The directory containing calibration data. For example, there should + exist a file calibration-root/0.0.0/metrics.tsv. + + --beta + This flag enables the SomaticOutput report, which is beta + functionality. + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/evidence2sam.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,229 @@ +<tool id="cg_evidence2sam" name="evidence2sam(beta) 1.6" version="1.0.0"> +<!-- +This tool creates a GUI for the evidence2sam function of cgatools from Complete Genomics, Inc. +written 8-31-2012 by bcrain@completegenomics.com +--> + + <description>converts evidence mappings to SAM format</description> + + <command> +<!-- print version of cgatools to STDOUT--> +cgatools | head -1; + +<!-- print command lines to STDOUT--> +echo "cgatools evidence2sam --beta +--reference $crr.fields.path +--output $output +--evidence-dnbs $data_sources.input +--consistent-mapping-range $range +#if $region.selectregion == "yes" +--extract-genomic-region $region.coordinates +#end if +$duplicates +$mates +$intervals +$skip +$svcandidates +$unmapped +$primary +"; + +<!-- execute cgatools--> +cgatools evidence2sam --beta +--reference $crr.fields.path +--evidence-dnbs $data_sources.input +#if $region.selectregion == "yes" + --extract-genomic-region $region.coordinates +#end if +$duplicates +$mates +$intervals +$skip +$svcandidates +$unmapped +$primary +--consistent-mapping-range $range +--output $output + </command> + + <outputs> + <data format="tabular" name="output" label="${tool.name} output"/> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference genome (.crr file)"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--conditional to select input file--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where is the input evidence file?"> + <option value="in">imported into Galaxy</option> + <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <!--form field to select evidence files--> + <when value="in"> + <param name="input" type="data" format="tabluar" label="EvidenceDnbs file"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + + <!--form field to enter external input file--> + <when value="out"> + <param name="input" type="text" label="EvidenceDnbs file (/path/file)" size="40" help="e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/EVIDENCE/evidenceDnbs-chr21-GS00000YYYY-ASM.tsv.bz2"> + <validator type="empty_field" message="You must supply an evidenceDnbs file"/> + </param> + </when> + </conditional> + + <!--form field to select chromosomal region--> + <conditional name="region"> + <param name="selectregion" type="select" label="Do you what to extract specific genomic region?"> + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + + <when value="yes"> + <param name="coordinates" type="text" label="Enter genomic coordinates to avoid converting the entire file (chr,from,to)" size="40" help="Specify the region as a half-open interval chr,from,to (e.g. chrX,15203639,15412498)"/> + </when> + </conditional> + + <!--form field to select duplicate handling--> + <param name="duplicates" type="select" label="Keep local duplicates of DNB mappings (default no)" help="All the output SAM records will be marked as 'not primary' if this option is used."> + <option value="" selected="true">no</option> + <option value="--keep-duplicates">yes</option> + </param> + + <!--form field to generate mate sequence--> + <param name="mates" type="select" label="Generate mate sequence (R2) and score (Q2) tags (default no)"> + <option value="" selected="true">no</option> + <option value="--add-mate-sequence">yes</option> + </param> + + <!--form field to generate interval ids--> + <param name="intervals" type="select" label="Generate interval id (ZI:I) and allele id (ZA:I) tags (default no)"> + <option value="" selected="true">no</option> + <option value="--add-allele-id">yes</option> + </param> + + <!--form field to skip not mapped reads--> + <param name="skip" type="select" label="Skip not mapped records (default no)"> + <option value="" selected="true">no</option> + <option value="--skip-not-mapped">yes</option> + </param> + + <!--form field to skip not mapped reads--> + <param name="svcandidates" type="select" label="Mate unique single arm mappings in SAM including those on different stands and chromosomes (default no)"> + <option value="" selected="true">no</option> + <option value="--mate-sv-candidates">yes</option> + </param> + + <!--form field to skip not mapped reads--> + <param name="unmapped" type="select" label="Generate mate sequence and score tags for inconsistent mappings only (default no)"> + <option value="" selected="true">no</option> + <option value="--add-unmapped-mate-info">yes</option> + </param> + + <!--form field to skip not mapped reads--> + <param name="primary" type="select" label="Use primary mappings only (default no)" help="Report only the best mappings"> + <option value="" selected="true">no</option> + <option value="--primary-mappings-only">yes</option> + </param> + + <param name="range" type="integer" value="1300" label="Maximum distance between consistent mates (default 1300)"> + <validator type="empty_field" message="You must enter a value, the default is 1300" /> + </param> + </inputs> + + <stdio> + <regex match="failed" source="stderr" level="fatal"/> + <regex match="error" source="stderr" level="fatal"/> + <regex match="Export the sequence:" source="stderr" level="warning" description="Finished:" /> + </stdio> + + <help> + +**What it does** + +This tool uses cgatools evidence2sam to convert Complete Genomics evidence mappings to SAM format + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + evidence2sam - Converts CGI variant evidence data into SAM format. + + DESCRIPTION + The evidence2sam converter takes as input evidence mapping files + (evidenceDnbs-*) and generates one SAM file as an output. The output is + sent into stdout by default. By default, all the evidence mapping records + from the input are converted into a pair of corresponding SAM records - one + record for each HalfDNB. The negative gaps in CGI mappings are represented + using GS/GQ/GC tags. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + -e [ --evidence-dnbs ] arg + Input evidence dnbs file. + + -s [ --reference ] arg + Reference file. + + -o [ --output ] arg (=STDOUT) + The output SAM file (may be omitted for stdout). + + -r [ --extract-genomic-region ] arg + defines a region as a half-open interval 'chr,from,to'. + + --keep-duplicates + Keep local duplicates of DNB mappings.All the output SAM records will + be marked as not primary if this option is used. + + --add-allele-id + Generate interval id and allele id tags. + + --skip-not-mapped + Skip not mapped records + + --add-mate-sequence + Generate mate sequence and score tags. + + --mate-sv-candidates + Inconsistent mappings are normally converted as single arm mappings + with no mate information provided. If the option is used map2sam will + mate unique single arm mappings in SAM including those on different + stands and chromosomes. To distinguish these "artificially" mated + records a tag "XS:i:1" is used. The MAPQ provided for these records is + a single arm mapping weight. + + --add-unmapped-mate-info + works like add-mate-sequence, but is applied to inconsistent mappings + only + + --primary-mappings-only + report only the best mappings + + --consistent-mapping-range arg (=1300) + limit the maximum distance between consistent mates + + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/join.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,241 @@ +<tool id="cg_join" name="join(beta) 1.6" version="1.0.1"> +<!-- +This tool creates a GUI for the join function of cgatools from Complete Genomics, Inc. +written 6-18-2012 by bcrain@completegenomics.com +updated 8-14-2012 by bcrain@completegenomics.com +--> + + <description>two tsv files based on equal fields or overlapping regions.</description> + + <command> +<!-- print version of cgatools to STDOUT--> +cgatools | head -1; + +<!-- print command lines to STDOUT--> +echo "cgatools join --beta +--input $inputA +--input $inputB +--output $output +--output-mode $outmode +$dump +--select $col +#for $m in $matches <!--get all matched columns--> +--match ${m.match} +#end for +#if $range_overlap.range == 'yes' +#for $o in $range_overlap.overlaps <!--get all overlapped columns--> +--overlap ${o.overlap} +#end for +--overlap-mode $range_overlap.overlapmode +--overlap-fraction-A $range_overlap.fractionA +--boundary-uncertainty-A $range_overlap.boundaryA +--overlap-fraction-B $range_overlap.fractionB +--boundary-uncertainty-B $range_overlap.boundaryB +#end if +"; + +<!-- execute cgatools--> +cgatools join --beta +--input $inputA +--input $inputB +--output $output +--output-mode $outmode +$dump +--select $col +#for $m in $matches <!--get all matched columns--> + --match ${m.match} +#end for +#if $range_overlap.range == 'yes' + #for $o in $range_overlap.overlaps <!--get all overlapped columns--> + --overlap ${o.overlap} + #end for + --overlap-mode $range_overlap.overlapmode + --overlap-fraction-A $range_overlap.fractionA + --boundary-uncertainty-A $range_overlap.boundaryA + --overlap-fraction-B $range_overlap.fractionB + --boundary-uncertainty-B $range_overlap.boundaryB +#end if + </command> + + <outputs> + <data format="tabular" name="output" label="${tool.name} output"/> + </outputs> + + <inputs> + <!--form field to select input file A--> + <param name="inputA" type="data" format="tabular" label="Select input file A "> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="0" + message="cgatools is not currently available for this build."/> + </param> + + <!--form field to select input file B--> + <param name="inputB" type="data" format="tabular" label="Select input file B "> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="0" + message="cgatools is not currently available for this build."/> + </param> + + <!--form field to specify columns to print--> + <param name="col" type="text" value="A.*,B.*" size="40" label="Specify columns for output" help="The default value A.*,B.* prints all columns from both files, other selections enter in the format A.col_name1,A.col_name3,B.col_name1"> + <validator type="empty_field" message="You must specify colums to print, the default is A.*,B.*"/> + </param> + + <!--form field to select output-mode--> + <param name="outmode" type="select" label="Select output mode"> + <option value="full" selected="true">full (1 line for each match of records in A and B)</option> + <option value="compact">compact (1 line for each record in A, joining multiple records in B by semicolon)</option> + <option value="compact-pct">compact-pct (same as compact, annotated with % overlap)</option> + </param> + + <!--form field to select dumping mode--> + <param name="dump" type="select" label="Select records to print"> + <option value="--always-dump" selected="true">print all records of A even if not matched in B</option> + <option value="">print only records of A that are matched in B</option> + </param> + + <!--form field to specify columns to match--> + <repeat name="matches" title="Exact match column" min="1"> + <param name="match" type="text" size="40" label="Enter column:column" help="Enter column_from_A:column_from_B, e.g. chromosome:chromosome"> + <validator type="empty_field" message="You must specify colums to match"/> + </param> + </repeat> + + <!--form field to select range overlaps--> + <conditional name="range_overlap"> + <param name="range" type="select" label="Do you want to match columns by overlapping range?"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + + <when value="yes"> + <!--form field to specify columns to overlap--> + <repeat name="overlaps" title="Range column"> + <param name="overlap" type="text" size="40" label="Enter column[,column]:column[,column]" help="Enter range_start_from_A[,range_stop_from_A]:range_start_from_B[,range_stop_from_B], e.g. begin,end:begin,end (overlapping range of positions) or begin,end:position"/> + </repeat> + + <!--form field to select overlap-mode--> + <param name="overlapmode" type="select" label="Select overlap mode"> + <option value="strict" selected="true">strict (overlap if A.begin<B.end and B.begin>A.end)</option> + <option value="allow-abutting-points">allow-abutting-points (overlap if A.begin<B.end and B.begin>A.end, or if A.begin<=B.end and B.begin<=A.end and either A or B has zero length.)</option> + </param> + + <!--form fields to enter overlap options--> + <param name="fractionA" type="integer" value="0" label="Minimum fraction of A region overlap (default 0)" /> + <param name="boundaryA" type="integer" value="0" label="Boundary uncertainty for A for overlap filtering (default 0)" help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-A * (A-range-length - boundary-uncertainty-A)"/> + + <param name="fractionB" type="integer" value="0" label="Minimum fraction of B region overlap (default 0)" /> + <param name="boundaryB" type="integer" value="0" label="Boundary uncertainty for overlap filtering (default 0)" help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-B * (B-range-length - boundary-uncertainty-B)"/> + </when> + </conditional> + </inputs> + + <help> + +**What it does** + +This tool joins two tab-delimited files based on equal fields or overlapping regions. + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + join - Joins two tab-delimited files based on equal fields or overlapping regions. + + DESCRIPTION + Joins two tab-delimited files based on equal fields or overlapping regions. + By default, an output record is produced for each match found between file + A and file B, but output format can be controlled by the --output-mode + parameter. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --input arg + File name to use as input (may be passed in as arguments at the end of + the command), or omitted for stdin). There must be exactly two input + files to join. If only one file is specified by name, file A is taken + to be stdin and file B is the named file. File B is read fully into + memory, and file A is streamed. File A's columns appear first in the + output. + + --output arg (=STDOUT) + The output file name (may be omitted for stdout). + + --match arg + A match specification, which is a column from A and a column from B + separated by a colon. + + --overlap arg + Overlap specification. An overlap specification consists of a range + definition for files A and B, separated by a colon. A range definition + may be two columns, in which case they are interpreted as the beginning + and end of the range. Or it may be one column, in which case the range + is defined as the 1-base range starting at the given value. The records + from the two files must overlap in order to be considered for output. + Two ranges are considered to overlap if the overlap is at least one + base long, or if one of the ranges is length 0 and the ranges overlap + or abut. For example, "begin,end:offset" will match wherever end-begin + > 0, begin<offset+1, and end>offset, or wherever end-begin = 0, + begin<=offset+1, and end>=offset. + + + -m [ --output-mode ] arg (=full) + Output mode, one of the following: + full Print an output record for each match found between + file A and file B. + compact Print at most one record for each record of file A, + joining the file B values by a semicolon and + suppressing repeated B values and empty B values. + compact-pct Same as compact, but for each distinct B value, + annotate with the percentage of the A record that is + overlapped by B records with that B value. Percentage + is rounded up to nearest integer. + + --overlap-mode arg (=strict) + Overlap mode, one of the following: + strict Range A and B overlap if A.begin < B.end and + B.begin < A.end. + allow-abutting-points Range A and B overlap they meet the strict + requirements, or if A.begin <= B.end and + B.begin <= A.end and either A or B has zero + length. + + --select arg (=A.*,B.*) + Set of fields to select for output. + + -a [ --always-dump ] + Dump every record of A, even if there are no matches with file B. + + --overlap-fraction-A arg (=0) + Minimum fraction of A region overlap for filtering output. + + --boundary-uncertainty-A arg (=0) + Boundary uncertainty for overlap filtering. Specifically, records + failing the following predicate are filtered away: overlap >= + overlap-fraction-A * ( A-range-length - boundary-uncertainty-A ) + + --overlap-fraction-B arg (=0) + Minimum fraction of B region overlap for filtering output. + + --boundary-uncertainty-B arg (=0) + Boundary uncertainty for overlap filtering. Specifically, records + failing the following predicate are filtered away: overlap >= + overlap-fraction-B * ( B-range-length - boundary-uncertainty-B ) + + SUPPORTED FORMAT_VERSION + Any + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/junctiondiff.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,181 @@ +<tool id="cg_junctiondiff" name="junctiondiff(beta) 1.6" version="1.0.1"> +<!-- +This tool creates a GUI for the junctiondiff function of cgatools from Complete Genomics, Inc. +written 6-18-2012 by bcrain@completegenomics.com +updated 8-14-2012 by bcrain@completegenomics.com +--> + + <description>reports difference between junction calls</description> + + <command> +<!-- print version of cgatools to STDOUT--> +cgatools | head -1; + +<!-- print command lines to STDOUT--> +echo "cgatools junctiondiff --beta +--reference $crr.fields.path +--junctionsA $data_sources.inputA +--junctionsB $data_sources.inputB +--scoreThresholdA $scoreA +--scoreThresholdB $scoreB +--distance $distance +--minlength $minlength +--output-prefix cg_ +$stat +"; + +<!-- execute cgatools--> +cgatools junctiondiff --beta +--reference $crr.fields.path +--junctionsA $data_sources.inputA +--junctionsB $data_sources.inputB +--scoreThresholdA $scoreA +--scoreThresholdB $scoreB +--distance $distance +--minlength $minlength +--output-prefix cg_ +$stat +; +mv cg_diff-*tsv cg_diff.tsv + </command> + + <outputs> + <data format="tabular" name="output1" from_work_dir="cg_diff.tsv" label="${tool.name} diff"/> + <data format="tabular" name="output2" from_work_dir="cg_report.tsv" label="${tool.name} report"> + <filter>(stat == '--statout')</filter> + </data> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference genome (.crr file)"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--conditional to select variant file input--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input junction files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <!--form field to select junction files--> + <when value="in"> + <param name="inputA" type="data" format="tabluar" label="Junction file A"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + <param name="inputB" type="data" format="tabluar" label="Junction file B"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + + <!--form field to enter external input files--> + <when value="out"> + <param name="inputA" type="text" label="Junction file A (/path/junction_file)" size="40" help="e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv"> + <validator type="empty_field" message="You must supply a junction file"/> + </param> + <param name="inputB" type="text" label="Junction file B (/path/junction_file)" size="40" help="e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv"> + <validator type="empty_field" message="You must supply a junction file"/> + </param> + </when> + </conditional> + + <!--form field to select stats output--> + <param name="stat" type="select" label="Print input file stats"> + <option value="">no</option> + <option value="--statout">yes</option> + </param> + + <!--other parameters--> + <param name="scoreA" type="integer" label="Score threshold value for input file A (default 10)" value="10"> + <validator type="empty_field" message="You must enter a value, the default is 10" /> + </param> + <param name="scoreB" type="integer" label="Score threshold value for input file B (default 0)" value="0"> + <validator type="empty_field" message="You must enter a value, the default is 0" /> + </param> + <param name="distance" type="integer" label="Max distance between coordinates of potentially compatible junctions (default 200)" value="200"> + <validator type="empty_field" message="You must enter a value, the default is 200" /> + </param> + <param name="minlength" type="integer" label="Minimum deletion junction length to be included into the difference file (default 500)" value="500"> + <validator type="empty_field" message="You must enter a value, the default is 500" /> + </param> + </inputs> + + + <help> + +**What it does** + +This tool uses cgatools junctiondiff to report difference between junction calls of two Complete Genomics junctions files + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + junctiondiff - Reports difference between junction calls of Complete Genomics junctions files. + + DESCRIPTION + junctiondiff takes two junction files A and B as input and produces the + following output: + - "diff-inputFileName" - the junctions from an input file A that are not + present in input file B. + - "report.txt" - a brief summary report (if --statout is used) + + Two junctions are considered equivalent if: + - they come from different files + - left and right positions of one junction are not more than "--distance" + bases apart from the corresponding positions of another junction + - the junction scores are equal or above the scoreThreshold + - they are on the same strands + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + -s [ --reference ] arg + Reference file. + + -a [ --junctionsA ] arg + input junction file A. + + -b [ --junctionsB ] arg + input junction file B. + + -A [ --scoreThresholdA ] arg (=10) + score threshold value for the input file A. + + -B [ --scoreThresholdB ] arg (=0) + score threshold value for the input file B. + + -d [ --distance ] arg (=200) + Max distance between coordinates of potentially compatible junctions. + + -l [ --minlength ] arg (=500) + Minimum deletion junction length to be included into the difference + file. + + -o [ --output-prefix ] arg + The path prefix for all the output reports. + + -S [ --statout ] + (Debug) Report various input file statistics. Experimental feature. + + SUPPORTED FORMAT_VERSION + 1.5 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/listtestvariants.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,264 @@ +<tool id="cg_listtestvariants" name="listvariants(beta)-testvariants(beta) 1.6" version="1.0.1"> +<!-- +This tool creates a GUI for the listvariants and testvariants functions of cgatools from Complete Genomics, Inc. +to be run consecutively with the same input files. +written 6-18-2012 by bcrain@completegenomics.com +updated 8-14-2012 by bcrain@completegenomics.com +--> + + <description>performs listsvariants and testvariants consecutively</description> + + <command> +<!-- print version of cgatools to STDOUT--> +cgatools | head -1; + +<!-- print command lines to STDOUT--> +echo "cgatools listvariants --beta +--reference ${crr.fields.path} +--output $output1 +#if $include_list.listing == "yes" +--variant-listing $include_list.list +#end if +$longvar +--variants +#if $data_sources.data_source == "in" <!--data in galaxy--> +#for $v in $data_sources.file_types.files <!--get each var/mastervar file--> +${v.input} +#end for +#else <!--data outside galaxy--> +`cat $data_sources.file_types.list` +#end if +"; +echo "cgatools testvariants --beta +--reference ${crr.fields.path} +--output $output2 +--input $output1 +--variants +#if $data_sources.data_source == "in" <!--data in galaxy--> +#for $v in $data_sources.file_types.files <!--get each var/mastervar file--> +${v.input} +#end for +#else <!--data outside galaxy--> +`cat $data_sources.file_types.list` +#end if +"; + +<!-- execute cgatools--> +cgatools listvariants +--beta +--reference ${crr.fields.path} +--output $output1 +#if $include_list.listing == "yes" + --variant-listing $include_list.list +#end if +$longvar +--variants +#if $data_sources.data_source == "in" <!--data in galaxy--> + #for $v in $data_sources.file_types.files <!--get each var/mastervar file--> + ${v.input} + #end for +#else <!--data outside galaxy--> + `cat $data_sources.file_types.list` +#end if +; + +cgatools testvariants +--beta +--reference ${crr.fields.path} +--output $output2 +--input $output1 +--variants +#if $data_sources.data_source == "in" <!--data in galaxy--> + #for $v in $data_sources.file_types.files <!--get each var/mastervar file--> + ${v.input} + #end for +#else <!--data outside galaxy--> + `cat $data_sources.file_types.list` +#end if + </command> + + <outputs> + <data format="tabular" name="output1" label="listvariants 1.6 output"/> + <data format="tabular" name="output2" label="testvariants 1.6 output"/> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference genome (.crr file)"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--form field to select long variants option--> + <param name="longvar" type="select" label="List long variants?"> + <option value="" selected="true">no</option> + <option value="--list-long-variants">yes</option> + </param> + + <!--form fields to include existing variant list--> + <conditional name="include_list"> + <param name="listing" type="select" label="Include variant listing?"> + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <param name="list" type="data" format="tabular" label="Variant listing"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + </conditional> + + <!--conditional to select data in/outside galaxy--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input var files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="in"> + <!--conditional to select input file type--> + <conditional name="file_types"> + <param name="file_type" type="select" label="Select the input file type"> + <option value="var" selected="true">var files</option> + <option value="mastervar">mastervar files</option> + </param> + + <!--form field to select variant files--> + <when value="var"> + <repeat name="files" title="Var file" min="1"> + <param name="input" type="data" format="cg_var" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + + <!--form field to select masterVar files--> + <when value="mastervar"> + <repeat name="files" title="masterVarVariant file"> + <param name="input" type="data" format="cg_mastervar" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + </conditional> + </when> + + <when value="out"> + <!--form field to enter list file--> + <param name="list" type="text" label="Enter file containing list of var or masterVar files (/path/file)" size="200" help="This file should contain a list of var or masterVar files, one per line in the format /path/varfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2), the var or masterVar files can be compressed (gz, bz2)."> + <validator type="empty_field" message="You must supply a file containing a list of var or masterVar files"/> + </param> + </when> + </conditional> + </inputs> + + <help> + +**What it does** + +This tool uses the cgatools listvariants and testvariants to test variant or mastervar files for the presence of variants. + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + listvariants - Lists the variants present in a variant file. + + DESCRIPTION + Lists all called variants present in the specified variant files, in a + format suitable for processing by the testvariants command. The output is a + tab-delimited file consisting of the following columns: + + variantId Sequential id assigned to each variant. + chromosome The chromosome of the variant. + begin 0-based reference offset of the beginning of the variant. + end 0-based reference offset of the end of the variant. + varType The varType as extracted from the variant file. + reference The reference sequence. + alleleSeq The variant allele sequence as extracted from the variant + file. + xRef The xRef as extrated from the variant file. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --reference arg + The reference crr file. + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + --variants arg + The input variant files (may be positional args). + + --variant-listing arg + The output of another listvariants run, to be merged in to produce the + output of this run. + + --list-long-variants + In addition to listing short variants, list longer variants as well + (10's of bases) by concatenating nearby calls. + + SUPPORTED FORMAT_VERSION + 0.3 or later + + + + COMMAND NAME + testvariants - Tests variant files for presence of variants. + + DESCRIPTION + Tests variant files for presence of variants. The output is a tab-delimited + file consisting of the columns of the input variants file, plus a column + for each assembly results file that contains a character code for each + allele. The character codes have meaning as follows: + + 0 This allele of this genome is consistent with the reference at this + locus but inconsistent with the variant. + 1 This allele of this genome has the input variant at this locus. + N This allele of this genome has no-calls but is consistent with the + input variant. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --reference arg + The reference crr file. + + --input arg (=STDIN) + The input variants to test for. + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + --variants arg + The input variant files (may be passed in as arguments at the end of + the command). + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/listvariants.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,192 @@ +<tool id="cg_listvariant" name="listvariants(beta) 1.6" version="1.0.1"> +<!-- +This tool creates a GUI for the listvariants function of cgatools from Complete Genomics, Inc. +written 6-18-2012 by bcrain@completegenomics.com +updated 8-13-2012 by bcrain@completegenomics.com +--> + + <description>lists all called variants</description> + + <command> +<!-- print version of cgatools to STDOUT--> +cgatools | head -1; + +<!-- print command lines to STDOUT--> +echo "cgatools listvariants --beta +--reference ${crr.fields.path} +--output $output +#if $include_list.listing == "yes" +--variant-listing $include_list.list +#end if +$longvar +--variants +#if $data_sources.data_source == "in" <!--data in galaxy--> +#for $v in $data_sources.file_types.files <!--get each var/mastervar file--> +${v.input} +#end for +#else <!--data outside galaxy--> +`cat $data_sources.list` +#end if +"; + +<!-- execute cgatools--> +cgatools listvariants --beta +--reference ${crr.fields.path} +--output $output +#if $include_list.listing == "yes" + --variant-listing $include_list.list +#end if +$longvar +--variants +#if $data_sources.data_source == "in" <!--data in galaxy--> + #for $v in $data_sources.file_types.files <!--get each var/mastervar file--> + ${v.input} + #end for +#else <!--data outside galaxy--> + `cat $data_sources.list` +#end if + </command> + + <outputs> + <data format="tabular" name="output" label="${tool.name} output"/> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference genome (.crr file)"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--form field to select long variants option--> + <param name="longvar" type="select" label="List long variants?"> + <option value="" selected="true">no</option> + <option value="--list-long-variants">yes</option> + </param> + + <!--form fields to include existing variant list--> + <conditional name="include_list"> + <param name="listing" type="select" label="Include variant listing (existing listvariants output)?"> + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <param name="list" type="data" format="tabular" label="Variant listing"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + </conditional> + + <!--conditional to select data in/outside galaxy--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="in"> + <!--conditional to select input file type--> + <conditional name="file_types"> + <param name="file_type" type="select" label="Select the input file type"> + <option value="var" selected="true">var files</option> + <option value="mastervar">mastervar files</option> + </param> + + <!--form field to select var files--> + <when value="var"> + <repeat name="files" title="Var file" min="1"> + <param name="input" type="data" format="cg_var" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + + <!--form field to select masterVar files--> + <when value="mastervar"> + <repeat name="files" title="MasterVar file" min="1"> + <param name="input" type="data" format="cg_mastervar" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + </conditional> + </when> + + <!--form field to enter list file--> + <when value="out"> + <param name="list" type="text" label="Enter file containing list of var or masterVar files (/path/file)" size="200" help="This file should contain a list of var or masterVar files, one per line in the format /path/varfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2), var or masterVar files can be compressed (gz, bz2)."> + <validator type="empty_field" message="You must supply a file containing a list of var or masterVar files"/> + </param> + </when> + </conditional> + </inputs> + + <help> + +**What it does** + +This tool uses the cgatools listvariants to list all called variants present in the var or mastervar files. + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + listvariants - Lists the variants present in a variant file. + + DESCRIPTION + Lists all called variants present in the specified variant files, in a + format suitable for processing by the testvariants command. The output is a + tab-delimited file consisting of the following columns: + + variantId Sequential id assigned to each variant. + chromosome The chromosome of the variant. + begin 0-based reference offset of the beginning of the variant. + end 0-based reference offset of the end of the variant. + varType The varType as extracted from the variant file. + reference The reference sequence. + alleleSeq The variant allele sequence as extracted from the variant + file. + xRef The xRef as extrated from the variant file. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --reference arg + The reference crr file. + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + --variants arg + The input variant files (may be positional args). + + --variant-listing arg + The output of another listvariants run, to be merged in to produce the + output of this run. + + --list-long-variants + In addition to listing short variants, list longer variants as well + (10's of bases) by concatenating nearby calls. + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/mkvcf.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,1001 @@ +<tool id="cg_mkvcf" name="mkvcf(beta) 1.6" version="1.0.0"> +<!-- +This tool creates a GUI for the mkvcf function of cgatools from Complete Genomics, Inc. +written 7-31-2012 by bcrain@completegenomics.com +--> + + <description>converts to vcf</description> + + <command interpreter="perl"> + <!--run wrapper script--> + mkvcf_wrapper.pl + --reference $crr.fields.path + --output $output + --genomes $count.genomes + --source $count.sources.source + --datasource $count.sources.data_sources.data_source + #if $count.sources.data_sources.data_source=="in" + #for $m in $count.sources.data_sources.files + --input $m.input + #end for + #else + --input $count.sources.data_sources.input + #end if + #if $count.sources.source=="masterVar" or $count.sources.source=="masterVar,CNV" + $count.sources.nocalls + --calibration $count.sources.calibration + #else if $count.sources.source=="SV" + --jctscore $count.sources.jctscore + --jctside $count.sources.jctside + --jctdistance $count.sources.jctdistance + --jctlength $count.sources.jctlength + $count.sources.jctpriority + $count.sources.jcttumor + #else if $count.sources.source=="masterVar,CNV,SV" or $count.sources.source=="masterVar,CNV,SV,MEI" + $count.sources.nocalls + --calibration $count.sources.calibration + --jctscore $count.sources.jctscore + --jctside $count.sources.jctside + --jctdistance $count.sources.jctdistance + --jctlength $count.sources.jctlength + $count.sources.jctpriority + $count.sources.jcttumor + #end if + --fields $count.sources.fields + </command> + + <outputs> + <data format="vcf" name="output" label="${tool.name} output"/> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference genome (.crr file)"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--select number of genomes - determines which input sources to show--> + <conditional name="count"> + <param name="genomes" type="select" label="Select the number of genomes to add to the vcf file" help="Note: multi-genome vcfs (2 or more genomes) can only be generated for format version 2.0 and up"> + <option value="1" selected="true">1 - allowed data sources are masterVar, CNV, SV, MEI</option> + <option value="2">2 - allowed data sources are masterVar, CNV, SV (format v2.x)</option> + <option value="3">3 or more - allowed data sources are masterVar, CNV (format v2.x)</option> + </param> + + <when value="1"> + <!--form field to select input sources--> + <conditional name="sources"> + <param name="source" type="select" label="Data sources to be included for this genome"> + <option value="masterVar,CNV,SV,MEI" selected="true">masterVar + CNV + SV + MEI</option> + <option value="masterVar">masterVar</option> + <option value="CNV">CNV</option> + <option value="SV">SV</option> + <option value="MEI">MEI</option> + </param> + + <when value="masterVar,CNV,SV,MEI"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="Genome root directory" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01)."> + <validator type="empty_field" message="You must supply the genome root directory"/> + </param> + </when> + </conditional> + + <!--form field to select no-calls--> + <param name="nocalls" type="select" label="Include no-calls?"> + <option value="" selected="true">no</option> + <option value="--nocalls">yes</option> + </param> + + <!--form field to enter calibration directory--> + <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/> + + <!--form fields junction threshold options--> + <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 10" /> + </param> + <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 70" /> + </param> + <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 200" /> + </param> + <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 500" /> + </param> + + <!--form field to select junction confidence in tumors--> + <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?"> + <option value="" selected="true">no</option> + <option value="--jctpriority">yes</option> + </param> + + <!--form field to select junction confidence in tumors--> + <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?"> + <option value="" selected="true">no</option> + <option value="--jcttumor">yes</option> + </param> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="NS">NS - Number of samples</option> + <option value="AN">AN - Total number of alleles in called genotypes</option> + <option value="AC">AC - Allele count in genotypes</option> + <option value="CGA_XR">CGA_XR - External database reference</option> + <option value="CGA_FI">CGA_FI - Functional impact</option> + <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option> + <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option> + <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option> + <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option> + <option value="GT">GT - Genotype</option> + <option value="PS">PS - Phase set</option> + <option value="FT">FT - Sample genotype filters</option> + <option value="GL">GL - Genotype likelihoods</option> + <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option> + <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option> + <option value="SS">SS - Somatic status</option> + <option value="HQ">HQ - Haplotype quality</option> + <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option> + <option value="GQ">GQ - Genotype quality</option> + <option value="DP">DP - Total read depth</option> + <option value="AD">AD - Allelic depths</option> + <option value="CGA_RDP">CGA_RDP - Read depth in reference</option> + <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option> + <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option> + <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option> + <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option> + <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option> + <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option> + <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option> + <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option> + <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment including this interval</option> + <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option> + <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option> + <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option> + <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option> + <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option> + <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option> + <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option> + <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option> + <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option> + <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option> + <option value="SVTYPE">SVTYPE - Type of structural variation</option> + <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option> + <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option> + <option value="MATEID">MATEID - ID of mate breakend</option> + <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option> + <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option> + <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option> + <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option> + <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option> + <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option> + <option value="CGA_IS">CGA_IS - Measure of confidence that there is a mobile element insertion</option> + <option value="CGA_IDC">CGA_IDC - Count of paired ends consistently indicating a mobile element insertion</option> + <option value="CGA_IDCL">CGA_IDCL - Count of paired ends indicating a mobile element insertion anchored 5'</option> + <option value="CGA_IDCR">CGA_IDCR - Count of paired ends indicating a mobile element insertion anchored 3'</option> + <option value="CGA_RDC">CGA_RDC - Count of paired ends supporting the presence of a reference allele</option> + <option value="CGA_NBET">CGA_NBET - Next-best estimate of type of MEI</option> + <option value="CGA_ETS">CGA_ETS - Measure of confidence that the ElementType (MEINFO:NAME) is correct</option> + <option value="CGA_KES">CGA_KES - Fraction of known MEI with at least as good an InsertionScore</option> + </param> + </when> + + <when value="masterVar"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where is the input file?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="in"> + <!--form field to select mastervar files--> + <repeat name="files" title="MasterVar file" min="1" max="1"> + <param name="input" type="data" format="cg_mastervar" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="Genome root directory or masterVar file" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or /path/masterVarfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2)."> + <validator type="empty_field" message="You must supply the genome root directory or masterVar file"/> + </param> + </when> + </conditional> + + <!--form field to select no-calls--> + <param name="nocalls" type="select" label="Include no-calls?"> + <option value="" selected="true">no</option> + <option value="--nocalls">yes</option> + </param> + + <!--form field to enter calibration directory--> + <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="NS">NS - Number of samples</option> + <option value="AN">AN - Total number of alleles in called genotypes</option> + <option value="AC">AC - Allele count in genotypes</option> + <option value="CGA_XR">CGA_XR - External database reference</option> + <option value="CGA_FI">CGA_FI - Functional impact</option> + <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option> + <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option> + <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option> + <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option> + <option value="GT">GT - Genotype</option> + <option value="PS">PS - Phase set</option> + <option value="FT">FT - Sample genotype filters</option> + <option value="GL">GL - Genotype likelihoods</option> + <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option> + <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option> + <option value="SS">SS - Somatic status</option> + <option value="HQ">HQ - Haplotype quality</option> + <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option> + <option value="GQ">GQ - Genotype quality</option> + <option value="DP">DP - Total read depth</option> + <option value="AD">AD - Allelic depths</option> + <option value="CGA_RDP">CGA_RDP - Read depth in reference</option> + <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option> + <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option> + <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option> + <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option> + <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option> + <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option> + </param> + </when> + + <when value="CNV"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="Genome root directory" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01)."> + <validator type="empty_field" message="You must supply the genome root directory"/> + </param> + </when> + </conditional> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="GT">GT - Genotype</option> + <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option> + <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option> + <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option> + <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option> + <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option> + <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option> + <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option> + <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option> + <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option> + <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option> + <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option> + <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option> + <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option> + </param> + </when> + + <when value="SV"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="in"> + <!--form field to select SV file--> + <repeat name="files" title="SV file" min="1" max="1"> + <param name="input" type="data" format="tabular" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="Genome root directory or SV file" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or /path/SVfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv)."> + <validator type="empty_field" message="You must supply the genome root directory or SV file"/> + </param> + </when> + </conditional> + + <!--form fields junction threshold options--> + <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 10" /> + </param> + <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 70" /> + </param> + <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 200" /> + </param> + <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 500" /> + </param> + + <!--form field to select junction confidence in tumors--> + <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?"> + <option value="" selected="true">no</option> + <option value="--jctpriority">yes</option> + </param> + + <!--form field to select junction confidence in tumors--> + <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?"> + <option value="" selected="true">no</option> + <option value="--jcttumor">yes</option> + </param> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="GT">GT - Genotype</option> + <option value="FT">FT - Sample genotype filters</option> + <option value="NS">NS - Number of samples</option> + <option value="CGA_XR">CGA_XR - External database reference</option> + <option value="SVTYPE">SVTYPE - Type of structural variation</option> + <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option> + <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option> + <option value="MATEID">MATEID - ID of mate breakend</option> + <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option> + <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option> + <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option> + <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option> + <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option> + <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option> + </param> + </when> + + <when value="MEI"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="out" selected="true">located outside Galaxy</option> + </param> + + <when value="out"> + <!--form field to select outside list of genome directories or mastervar files--> + <param name="input" type="text" label="Genome root directory" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01)."> + <validator type="empty_field" message="You must supply the genome root directory"/> + </param> + </when> + </conditional> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="GT">GT - Genotype</option> + <option value="FT">FT - Sample genotype filters</option> + <option value="CGA_IS">CGA_IS - Measure of confidence that there is a mobile element insertion</option> + <option value="CGA_IDC">CGA_IDC - Count of paired ends consistently indicating a mobile element insertion</option> + <option value="CGA_IDCL">CGA_IDCL - Count of paired ends indicating a mobile element insertion, anchored 5'</option> + <option value="CGA_IDCR">CGA_IDCR - Count of paired ends indicating a mobile element insertion, anchored 3'</option> + <option value="CGA_RDC">CGA_RDC - Count of paired ends supporting the presence of a reference allele</option> + <option value="CGA_NBET">CGA_NBET - Next-best estimate of type of MEI</option> + <option value="CGA_ETS">CGA_ETS - Measure of confidence that the ElementType (MEINFO:NAME) is correct</option> + <option value="CGA_KES">CGA_KES - Fraction of known MEI with at least as good an InsertionScore</option> + </param> + </when> + + </conditional> + </when> + + <when value="2"> + <!--form field to select input sources--> + <conditional name="sources"> + <param name="source" type="select" label="Data sources to be included for each genome"> + <option value="masterVar,CNV,SV" selected="true">masterVar + CNV + SV</option> + <option value="masterVar">masterVar</option> + <option value="CNV">CNV</option> + <option value="SV">SV</option> + </param> + + <when value="masterVar,CNV,SV"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01). For normal/tumor comparisons list the baseline genome first."> + <validator type="empty_field" message="You must supply the list of genome root directories"/> + </param> + </when> + </conditional> + + <!--form field to select no-calls--> + <param name="nocalls" type="select" label="Include no-calls?"> + <option value="" selected="true">no</option> + <option value="--nocalls">yes</option> + </param> + + <!--form field to enter calibration directory--> + <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/> + + <!--form fields junction threshold options--> + <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 10" /> + </param> + <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 70" /> + </param> + <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 200" /> + </param> + <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 500" /> + </param> + + <!--form field to select junction confidence in tumors--> + <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?"> + <option value="" selected="true">no</option> + <option value="--jctpriority">yes</option> + </param> + + <!--form field to select junction confidence in tumors--> + <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?"> + <option value="" selected="true">no</option> + <option value="--jcttumor">yes</option> + </param> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="NS">NS - Number of samples</option> + <option value="AN">AN - Total number of alleles in called genotypes</option> + <option value="AC">AC - Allele count in genotypes</option> + <option value="CGA_XR">CGA_XR - External database reference</option> + <option value="CGA_FI">CGA_FI - Functional impact</option> + <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option> + <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option> + <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option> + <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option> + <option value="GT">GT - Genotype</option> + <option value="PS">PS - Phase set</option> + <option value="FT">FT - Sample genotype filters</option> + <option value="GL">GL - Genotype likelihoods</option> + <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option> + <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option> + <option value="SS">SS - Somatic status</option> + <option value="HQ">HQ - Haplotype quality</option> + <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option> + <option value="GQ">GQ - Genotype quality</option> + <option value="DP">DP - Total read depth</option> + <option value="AD">AD - Allelic depths</option> + <option value="CGA_RDP">CGA_RDP - Read depth in reference</option> + <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option> + <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option> + <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option> + <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option> + <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option> + <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option> + <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option> + <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option> + <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option> + <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option> + <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option> + <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option> + <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option> + <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option> + <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option> + <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option> + <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option> + <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option> + <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option> + <option value="SVTYPE">SVTYPE - Type of structural variation</option> + <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option> + <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option> + <option value="MATEID">MATEID - ID of mate breakend</option> + <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option> + <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option> + <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option> + <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option> + <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option> + <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option> + </param> + </when> + + <when value="masterVar"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="in"> + <!--form field to select input files--> + <repeat name="files" title="MasterVar file" min="1" max="2"> + <param name="input" type="data" format="cg_mastervar" label="Dataset"> + <validator type="dataset_ok_validator"/> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="File with list of genome root directories or masterVar files" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or a list of masterVar files, one per line in the format /path/masterVarfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2)."> + <validator type="empty_field" message="You must supply the list of genome root directories or masterVar files"/> + </param> + </when> + </conditional> + + <!--form field to select no-calls--> + <param name="nocalls" type="select" label="Include no-calls?"> + <option value="" selected="true">no</option> + <option value="--nocalls">yes</option> + </param> + + <!--form field to enter calibration directory--> + <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="NS">NS - Number of samples</option> + <option value="AN">AN - Total number of alleles in called genotypes</option> + <option value="AC">AC - Allele count in genotypes</option> + <option value="CGA_XR">CGA_XR - External database reference</option> + <option value="CGA_FI">CGA_FI - Functional impact</option> + <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option> + <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option> + <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option> + <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option> + <option value="GT">GT - Genotype</option> + <option value="PS">PS - Phase set</option> + <option value="FT">FT - Sample genotype filters</option> + <option value="GL">GL - Genotype likelihoods</option> + <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option> + <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option> + <option value="SS">SS - Somatic status</option> + <option value="HQ">HQ - Haplotype quality</option> + <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option> + <option value="GQ">GQ - Genotype quality</option> + <option value="DP">DP - Total read depth</option> + <option value="AD">AD - Allelic depths</option> + <option value="CGA_RDP">CGA_RDP - Read depth in reference</option> + <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option> + <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option> + <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option> + <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option> + <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option> + <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option> + </param> + </when> + + <when value="CNV"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01)."> + <validator type="empty_field" message="You must supply the list of genome root directories"/> + </param> + </when> + </conditional> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="GT">GT - Genotype</option> + <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option> + <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option> + <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option> + <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option> + <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option> + <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option> + <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option> + <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option> + <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option> + <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option> + <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option> + <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option> + <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option> + </param> + </when> + + <when value="SV"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="in"> + <!--form field to select mastervar files--> + <repeat name="files" title="SV files" min="1" max="2"> + <param name="input" type="data" format="tabular" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="File with list of genome root directories or SV files" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or a list of SV files, one per line in the format /path/SVfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv)."> + <validator type="empty_field" message="You must supply the list of genome root directories or SV files"/> + </param> + </when> + </conditional> + + <!--form fields junction threshold options--> + <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 10" /> + </param> + <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 70" /> + </param> + <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 200" /> + </param> + <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)"> + <validator type="empty_field" message="You must enter a value, for the default value enter 500" /> + </param> + + <!--form field to select junction confidence in tumors--> + <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?"> + <option value="" selected="true">no</option> + <option value="--jctpriority">yes</option> + </param> + + <!--form field to select junction confidence in tumors--> + <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?"> + <option value="" selected="true">no</option> + <option value="--jcttumor">yes</option> + </param> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="GT">GT - Genotype</option> + <option value="FT">FT - Sample genotype filters</option> + <option value="SVTYPE">SVTYPE - Type of structural variation</option> + <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option> + <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option> + <option value="MATEID">MATEID - ID of mate breakend</option> + <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option> + <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option> + <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option> + <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option> + <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option> + <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option> + </param> + </when> + </conditional> + </when> + + <when value="3"> + <!--form field to select input sources--> + <conditional name="sources"> + <param name="source" type="select" label="Data sources to be included for each genome"> + <option value="masterVar,CNV" selected="true">masterVar + CNV</option> + <option value="masterVar">masterVar</option> + <option value="CNV">CNV</option> + </param> + + <when value="masterVar,CNV"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="out"> + <!--form field to select outside list of genome directories or mastervar files--> + <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01)."> + <validator type="empty_field" message="You must supply the list of genome root directories"/> + </param> + </when> + </conditional> + + <!--form field to select no-calls--> + <param name="nocalls" type="select" label="Include no-calls?"> + <option value="" selected="true">no</option> + <option value="--nocalls">yes</option> + </param> + + <!--form field to enter calibration directory--> + <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="NS">NS - Number of samples</option> + <option value="AN">AN - Total number of alleles in called genotypes</option> + <option value="AC">AC - Allele count in genotypes</option> + <option value="CGA_XR">CGA_XR - External database reference</option> + <option value="CGA_FI">CGA_FI - Functional impact</option> + <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option> + <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option> + <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option> + <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option> + <option value="GT">GT - Genotype</option> + <option value="PS">PS - Phase set</option> + <option value="FT">FT - Sample genotype filters</option> + <option value="GL">GL - Genotype likelihoods</option> + <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option> + <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option> + <option value="SS">SS - Somatic status</option> + <option value="HQ">HQ - Haplotype quality</option> + <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option> + <option value="GQ">GQ - Genotype quality</option> + <option value="DP">DP - Total read depth</option> + <option value="AD">AD - Allelic depths</option> + <option value="CGA_RDP">CGA_RDP - Read depth in reference</option> + <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option> + <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option> + <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option> + <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option> + <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option> + <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option> + <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option> + <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option> + <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option> + <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option> + <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option> + <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option> + <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option> + <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option> + <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option> + <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option> + <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option> + <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option> + <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option> + </param> + </when> + + <when value="masterVar"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="in"> + <!--form field to select mastervar files--> + <repeat name="files" title="MasterVar files" min="1"> + <param name="input" type="data" format="cg_mastervar" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="File with list of genome root directories or masterVar files" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or a list of masterVar files, one per line in the format /path/masterVarfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2)."> + <validator type="empty_field" message="You must supply the list of genome root directories or masterVar files"/> + </param> + </when> + </conditional> + + <!--form field to select no-calls--> + <param name="nocalls" type="select" label="Include no-calls?"> + <option value="" selected="true">no</option> + <option value="--nocalls">yes</option> + </param> + + <!--form field to enter calibration directory--> + <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="NS">NS - Number of samples</option> + <option value="AN">AN - Total number of alleles in called genotypes</option> + <option value="AC">AC - Allele count in genotypes</option> + <option value="CGA_XR">CGA_XR - External database reference</option> + <option value="CGA_FI">CGA_FI - Functional impact</option> + <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option> + <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option> + <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option> + <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option> + <option value="GT">GT - Genotype</option> + <option value="PS">PS - Phase set</option> + <option value="FT">FT - Sample genotype filters</option> + <option value="GL">GL - Genotype likelihoods</option> + <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option> + <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option> + <option value="SS">SS - Somatic status</option> + <option value="HQ">HQ - Haplotype quality</option> + <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option> + <option value="GQ">GQ - Genotype quality</option> + <option value="DP">DP - Total read depth</option> + <option value="AD">AD - Allelic depths</option> + <option value="CGA_RDP">CGA_RDP - Read depth in reference</option> + <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option> + <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option> + <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option> + <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option> + <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option> + <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option> + </param> + </when> + + <when value="CNV"> + <!--conditional to select inputs--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="out"> + <!--form field to enter input file--> + <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01)."> + <validator type="empty_field" message="You must supply the list of genome root directories"/> + </param> + </when> + </conditional> + + <!--form field to select field names to include in vcf--> + <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names."> + <option value="all" selected="true">-- all (default) --</option> + <option value="GT">GT - Genotype</option> + <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option> + <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option> + <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option> + <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option> + <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option> + <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option> + <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option> + <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option> + <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option> + <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option> + <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option> + <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option> + <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option> + </param> + </when> + </conditional> + </when> + </conditional> + </inputs> + + <help> + +**What it does** + +This tool uses cgatools mkvcf to convert Complete Genomics masterVar files, including CNV, SV and/or MEI data, to vcf format version. + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + mkvcf - Converts var file(s) or masterVar file(s) to VCF. + + DESCRIPTION + Converts var file(s) or masterVar file(s) to VCF. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --reference arg + The reference crr file. + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + --field-names arg (=GT,PS,NS,AN,AC,SS,FT,CGA_XR,CGA_FI,GQ,HQ,EHQ,CGA_CEHQ,GL, + CGA_CEGL,DP,AD,CGA_RDP,CGA_ODP,CGA_OAD,CGA_ORDP,CGA_PFAM,CGA_MIRB,CGA_RPT, + CGA_SDO,CGA_SOMC,CGA_SOMR,CGA_SOMS,CGA_GP,CGA_NP,CGA_CP,CGA_PS,CGA_CT, + CGA_TS,CGA_CL,CGA_LS,CGA_SCL,CGA_SLS,CGA_LAF,CGA_LLAF,CGA_ULAF,CGA_IS, + CGA_IDC,CGA_IDCL,CGA_IDCR,CGA_RDC,CGA_NBET,CGA_ETS,CGA_KES,CGA_BF, + CGA_MEDEL,MATEID,SVTYPE,CGA_BNDG,CGA_BNDGO,CGA_BNDMPC,CGA_BNDPOS,CGA_BNDDEF, + CGA_BNDP) + Comma-separated list of field names. By default, all fields are + included, but you may override this option to ensure only a subset of + the fields is included in the VCF output. For a description of each + field, see the cgatools user guide. + + --source-names arg (=masterVar,CNV,SV,MEI) + Comma-separated list of source names. The following source names are + available: + masterVar - Includes records extracted from the masterVar file. + CNV - Includes CNV-related records. + SV - Includes records derived from junctions files. + MEI - Includes records describing mobile element insertions. + Some of these source types are only available for more recent pipeline + versions, and some of these source types do not support multi-genome + VCFs. For more information about which source types are available for + which versions of the Complete Genomics pipeline software, see the + cgatools user guide. + + --genome-root arg + For each genome to include in the VCF, the genome root directory, for + example /data/GS00118-DNA_A01; this directory is expected to contain + the ASM and LIB subdirectories, for example. You must supply this + option for each genome in the VCF, unless you are using + --source-names=masterVar and you have specified the --master-var option + for each genome in the VCF. + + --master-var arg + For each genome to include in the VCF, the masterVar file. If + genome-roots parameter is given, this parameter defaults to the + masterVar in the given genome-root. + + --include-no-calls + Small variants VCF records include loci that have no + reference-inconsistent calls. + + --calibration-root arg + The directory containing calibration data. For example, there should + exist a file calibration-root/version0.0.0/metrics.tsv. This option is only + required if CGA_CEHQ or CGA_CEGL are included in the --field-names + parameter. + + --junction-file arg + For each genome to include in the VCF, the junctions file. If + genome-roots parameter is given, this parameter defaults to the + respective junctions file in the export directory. + + --junction-score-threshold arg (=10) + Junction score thresholds (discordant mate pair count). + + --junction-side-length-threshold arg (=70) + Junction side length threshold. + + --junction-distance-tolerance arg (=200) + Distance tolerance for junction compatibility. + + --junction-length-threshold arg (=500) + Length threshold for compatible junctions. + + --junction-normal-priority + Normal junction priority for vcf output. + + --junction-tumor-hc + use high confidence junctions for tumors. + + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/mkvcf_wrapper.pl Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,95 @@ +#!/usr/bin/perl +use strict; +use Getopt::Long; +use vars qw($opt_reference $opt_output @opt_input $opt_genomes $opt_source $opt_datasource $opt_fields $opt_nocalls $opt_calibration $opt_jctscore $opt_jctside $opt_jctdistance $opt_jctlength $opt_jctpriority $opt_jcttumor); +$| = 1; # set autoflush to screen + +# This is a wrapper for the cgatools mkvcf function to run cgatools mkvcf in Galaxy. +# written 8-10-2012 by bcrain@completegenomics.com + +#print join("\n", @ARGV), "\n"; +&GetOptions("reference=s", "output=s", "input=s@", "genomes=i", "source=s", "datasource=s", "fields=s", "nocalls", "calibration:s", "jctscore=i", "jctside=i", "jctdistance=i", "jctlength=i", "jctpriority", "jcttumor"); + +my $command = "cgatools mkvcf --beta --reference $opt_reference --output $opt_output --source-names $opt_source"; + +if ($opt_datasource eq 'in') +{ + foreach my $file (@opt_input) + { + if ($opt_source eq 'masterVar') {$command .= " --master-var ";} + elsif ($opt_source eq 'SV') {$command .= " --junction-file ";} + else {die "there is an error in the logic: wrong source $opt_source for datasource $opt_datasource.\n";} + $command .= $file + } +} +elsif ($opt_datasource eq 'out') +{ + if ($opt_genomes == 1) + { + if ($opt_input[0] =~ m/masterVar/ and $opt_source eq 'masterVar') + { + -f $opt_input[0] or die "$opt_input[0] is not a valid file.\n"; + $command .= " --master-var $opt_input[0]"; + } + elsif ($opt_input[0] =~ m/Junctions/ and $opt_source eq 'SV') + { + -f $opt_input[0] or die "$opt_input[0] is not a valid file.\n"; + $command .= " --junction-file $opt_input[0]"; + } + else + { + $opt_input[0] =~ s/\/$//; + -d $opt_input[0] or die "$opt_input[0] is not a valid directory.\n"; + $command .= " --genome-root $opt_input[0]"; + } + } + else + { + -T $opt_input[0] or die "$opt_input[0] is not a valid file.\n"; + my $count = 0; + foreach my $file (split /\s+/, `cat $opt_input[0]`) + { + $count ++; + ($opt_genomes == 2 and $count > 2) and die "The number of inputs in your list file cannot be greater than the number of genomes selected.\n"; + if ($file =~ m/masterVar/ and $opt_source eq 'masterVar') + { + -f $file or die "$file is not a valid file.\n"; + $command .= " --master-var "; + } + elsif ($file =~ m/Junctions/ and $opt_source eq 'SV') + { + -f $file or die "$file is not a valid file.\n"; + $command .= " --junction-file "; + } + else + { + -d $file or die "$file is not a valid directory.\n"; + $command .= " --genome-root "; + } + $command .= $file + } + } +} +else +{die "there is an error in the logic: wrong datasource $opt_datasource.\n";} + +if ($opt_calibration) +{ + (-r "$opt_calibration/0.0.0/metrics.tsv" or -r "$opt_calibration/version0.0.0/metrics.tsv") or die "This folder does not contain the calibration data\n"; + $command .= " --calibration-root $opt_calibration"; +} + +$opt_fields eq 'all' or $command .= " --field-names $opt_fields"; +$opt_nocalls and $command .= " --include-no-calls"; +$opt_jctscore and $command .= " --junction-score-threshold $opt_jctscore"; +$opt_jctside and $command .= " --junction-side-length-threshold $opt_jctside"; +$opt_jctdistance and $command .= " --junction-distance-tolerance $opt_jctdistance"; +$opt_jctlength and $command .= " --junction-length-threshold $opt_jctlength"; +$opt_jctpriority and $command .= " --junction-normal-priority"; +$opt_jcttumor and $command .= " --junction-tumor-hc"; + +my $version = `cgatools | head -1`; +print "$version\n"; +print "$command \n"; + +`$command`; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/snpdiff.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,198 @@ +<tool id="cg_snpdiff" name="snpdiff 1.6" version="1.0.1"> +<!-- +This tool creates a GUI for the snpdiff function of cgatools from Complete Genomics, Inc. +written 6-18-2012 by bcrain@completegenomics.com +updated 8-13-2012 by bcrain@completegenomics.com +--> + + <description>compares snp calls to var or masterVar file.</description> <!--adds description in toolbar--> + + <command> <!--run executable--> +<!-- print version of cgatools to STDOUT--> +cgatools | head -1; + +<!-- print command lines to STDOUT--> +echo "cgatools snpdiff +--reference $crr.fields.path +--variants $data_sources.varfile +--genotypes $genotype +--output-prefix cg_ +--reports `echo ${report1} ${report2} ${report3} | sed 's/ */,/g'` +"; + +<!-- execute cgatools--> +cgatools snpdiff +--reference $crr.fields.path +--variants $data_sources.varfile +--genotypes $genotype +--output-prefix cg_ +--reports `echo ${report1} ${report2} ${report3} | sed 's/ */,/g'` + + </command> + + <outputs> + <data format="tabular" name="output1" from_work_dir="cg_Output.tsv" label="${tool.name} Output"> + <filter>(report1 == 'Output')</filter> + </data> + <data format="tabular" name="output2" from_work_dir="cg_Verbose.tsv" label="${tool.name} Verbose"> + <filter>(report2 == 'Verbose')</filter> + </data> + <data format="tabular" name="output3" from_work_dir="cg_Stats.tsv" label="${tool.name} Stats"> + <filter>(report3 == 'Stats')</filter> + </data> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference genome (.crr file)"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--conditional to select data in/outside galaxy--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where is the input var or masterVar file?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <!--form field to select input file--> + <when value="in"> + <param name="varfile" type="data" format="cg_var,cg_mastervar" label="Var or masterVar file"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + + <!--form field to enter input file--> + <when value="out"> + <param name="varfile" type="text" label="Var or masterVar file (/path/file)" size="40" help="Var or masterVar file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2"> + <validator type="empty_field" message="You must supply a var or masterVar file"/> + </param> + </when> + </conditional> + + <!--param to select genotypes file input--> + <param name="genotype" type="data" format="tabular" label="Genotypes file with SNP calls" help="The genotypes file is a tab-delimited file with at least the following columns: Chromosome, Offset0Based, Genotypes (Optional)"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + <!--<validator type="expression" message="Dataset does not match selected build.">value.dbkey == $crr.fields.dbkey</validator>--> + </param> + + <!--params to select reports--> + <param name="report1" type="select" label="Create report Output"> + <option value="">no</option> + <option value="Output" selected="true">yes</option> + </param> + <param name="report2" type="select" label="Create report Verbose"> + <option value="">no</option> + <option value="Verbose">yes</option> + </param> + <param name="report3" type="select" label="Create report Stats"> + <option value="">no</option> + <option value="Stats">yes</option> + </param> + </inputs> + + <help> + +**What it does** + +This tool ompares snp calls to a Complete Genomics variant file. + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + snpdiff - Compares snp calls to a Complete Genomics variant file. + + DESCRIPTION + Compares the snp calls in the "genotypes" file to the calls in a Complete + Genomics variant file. The genotypes file is a tab-delimited file with at + least the following columns (additional columns may be given): + + Chromosome (Required) The name of the chromosome. + Offset0Based (Required) The 0-based offset in the chromosome. + GenotypesStrand (Optional) The strand of the calls in the Genotypes + column (+ or -, defaults to +). + Genotypes (Optional) The calls, one per allele. The following + calls are recognized: + A,C,G,T A called base. + N A no-call. + - A deleted base. + . A non-snp variation. + + The output is a tab-delimited file consisting of the columns of the + original genotypes file, plus the following additional columns: + + Reference The reference base at the given position. + VariantFile The calls made by the variant file, one per allele. + The character codes are the same as is described for + the Genotypes column. + DiscordantAlleles (Only if Genotypes is present) The number of + Genotypes alleles that are discordant with calls in + the VariantFile. If the VariantFile is described as + haploid at the given position but the Genotypes is + diploid, then each genotype allele is compared + against the haploid call of the VariantFile. + NoCallAlleles (Only if Genotypes is present) The number of + Genotypes alleles that were no-called by the + VariantFile. If the VariantFile is described as + haploid at the given position but the Genotypes is + diploid, then a VariantFile no-call is counted twice. + + The verbose output is a tab-delimited file consisting of the columns of the + original genotypes file, plus the following additional columns: + + Reference The reference base at the given position. + VariantFile The call made by the variant file for one allele (there is + a line in this file for each allele). The character codes + are the same as is described for the Genotypes column. + [CALLS] The rest of the columns are pasted in from the VariantFile, + describing the variant file line used to make the call. + + The stats output is a comma-separated file with several tables describing + the results of the snp comparison, for each diploid genotype. The tables + all describe the comparison result (column headers) versus the genotype + classification (row labels) in different ways. The "Locus classification" + tables have the most detailed match classifications, while the "Locus + concordance" tables roll these match classifications up into "discordance" + and "no-call". A locus is considered discordant if it is discordant for + either allele. A locus is considered no-call if it is concordant for both + alleles but has a no-call on either allele. The "Allele concordance" + describes the comparison result on a per-allele basis. + + OPTIONS + -h [ --help ] + Print this help message. + + --reference arg + The input crr file. + + --variants arg + The input variant file. + + --genotypes arg + The input genotypes file. + + --output-prefix arg + The path prefix for all output reports. + + --reports arg (=Output,Verbose,Stats) + Comma-separated list of reports to generate. A report is one of: + Output The output genotypes file. + Verbose The verbose output file. + Stats The stats output file. + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/testvariants.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,166 @@ +<tool id="cg_testvariants" name="testvariants(beta) 1.6" version="1.0.1"> +<!-- +This tool creates a GUI for the testvariants function of cgatools from Complete Genomics, Inc. +written 6-18-2012 by bcrain@completegenomics.com +updated 8-14-2012 by bcrain@completegenomics.com +--> + + <description>test for the presence of variants</description> + + <command> +<!-- print version of cgatools to STDOUT--> +cgatools | head -1; + +<!-- print command lines to STDOUT--> +echo "cgatools testvariants --beta +--reference ${crr.fields.path} +--output $output +--input $listing +--variants +#if $data_sources.data_source == "in" <!--data in galaxy--> +#for $v in $data_sources.file_types.files <!--get each var/mastervar file--> +${v.input} +#end for +#else <!--data outside galaxy--> +`cat $data_sources.list` +#end if +"; + +<!-- execute cgatools--> +cgatools testvariants +--beta +--reference ${crr.fields.path} +--output $output +--input $listing +--variants +#if $data_sources.data_source == "in" <!--data in galaxy--> + #for $v in $data_sources.file_types.files <!--get each var/mastervar file--> + ${v.input} + #end for +#else <!--data outside galaxy--> + `cat $data_sources.list` +#end if + </command> + + <outputs> + <data format="tabular" name="output" label="${tool.name} output"/> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference genome (.crr file)"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--form fields to select variant list--> + <param name="listing" type="data" format="tabular" label="Select variant list (output of listvariants)"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + + <!--conditional to select data in/outside galaxy--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where are the input files?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="in"> + <!--conditional to select input file type--> + <conditional name="file_types"> + <param name="file_type" type="select" label="Select the input file type"> + <option value="var" selected="true">var files</option> + <option value="mastervar">masterVar files</option> + </param> + + <!--form field to select variant files--> + <when value="var"> + <repeat name="files" title="Var file" min="1"> + <param name="input" type="data" format="cg_var" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + + <!--form field to select masterVar files--> + <when value="mastervar"> + <repeat name="files" title="MasterVar file" min="1"> + <param name="input" type="data" format="cg_mastervar" label="Dataset"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </repeat> + </when> + </conditional> + </when> + + <!--form field to enter list file--> + <when value="out"> + <param name="list" type="text" label="Enter file containing list of var or masterVar files (/path/file)" size="200" help="This file should contain a list of var or masterVar files, one per line in the format /path/varfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2), the var or masterVar files can be compressed (gz, bz2)."> + <validator type="empty_field" message="You must supply a file containing a list of var or masterVar files"/> + </param> + </when> + </conditional> + </inputs> + + <help> + +**What it does** + +This tool uses the cgatools testvariants to test var or masterVar files for the presence of variants. + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + testvariants - Tests variant files for presence of variants. + + DESCRIPTION + Tests variant files for presence of variants. The output is a tab-delimited + file consisting of the columns of the input variants file, plus a column + for each assembly results file that contains a character code for each + allele. The character codes have meaning as follows: + + 0 This allele of this genome is consistent with the reference at this + locus but inconsistent with the variant. + 1 This allele of this genome has the input variant at this locus. + N This allele of this genome has no-calls but is consistent with the + input variant. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --reference arg + The reference crr file. + + --input arg (=STDIN) + The input variants to test for. + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + --variants arg + The input variant files (may be passed in as arguments at the end of + the command). + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/varfilter.xml Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,180 @@ +<tool id="cg_varfilter" name="varfilter(beta) 1.6" version="1.0.1"> +<!-- +This tool creates a GUI for the varfilter function of cgatools from Complete Genomics, Inc. +The function is called via a Perl script vartools_wrapper.pl, designed to generate the correctly formated filters to append the input file on the command line. +written 6-18-2012 by bcrain@completegenomics.com +updated 8-13-2012 by bcrain@completegenomics.com +--> + + <description>copies input file, applying filters.</description> <!--adds description in toolbar--> + + <command interpreter="perl"> + <!--run wrapper script--> + varfilter_wrapper.pl + --reference $crr.fields.path + --output $output + #if $data_sources.data_source == "in" <!--data in galaxy--> + --input $data_sources.file_types.input + #else <!--data outside galaxy--> + --input $data_sources.input + #end if + #for $f in $filters + --zygosity $f.zygosity + --vartype $f.vartype + --varscorevaf $f.varscorevaf + --varscoreeaf $f.varscoreeaf + --varquality $f.varquality + #end for + </command> + + <outputs> + <data format="cg_var" name="output" label="${tool.name} output"/> + </outputs> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference genome (.crr file)"> + <options from_data_table="cg_crr_files" /> + </param> + + <!--conditional to select data in/outside galaxy--> + <conditional name="data_sources"> + <param name="data_source" type="select" label="Where is the input file?"> + <option value="in" selected="true">imported into Galaxy</option> + <option value="out">located outside Galaxy (data on server or mounted drive)</option> + </param> + + <when value="in"> + <!--conditional to select input file type--> + <conditional name="file_types"> + <param name="file_type" type="select" label="Select the input file type"> + <option value="var" selected="true">var file</option> + <option value="mastervar">masterVar file</option> + </param> + + <!--form field to select var file--> + <when value="var"> + <param name="input" type="data" format="cg_var" label="Var file"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + + <!--form field to select masterVar file--> + <when value="mastervar"> + <param name="input" type="data" format="cg_mastervar" label="MasterVar file"> + <validator type="dataset_ok_validator" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="1" + message="cgatools is not currently available for this build."/> + </param> + </when> + </conditional> + </when> + + <!--form field to enter list file--> + <when value="out"> + <param name="input" type="text" label="Var or masterVar file (/path/file)" size="200" help="The var or masterVar file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2"> + <validator type="empty_field" message="You must supply var or masterVar file"/> + </param> + </when> + </conditional> + + + <!-- formfields to add filters --> + <repeat name="filters" title="Filter" min="1"> + <param name="zygosity" type="select" label="Filter out call (set to no-call) IF locus IS"> + <option value="NA">-- keep all loci --</option> + <option value="hom">homozygous</option> + <option value="het">heterzygous</option> + </param> + + <param name="vartype" type="select" label="AND varType IS"> + <option value="NA">-- keep all varTypes --</option> + <option value="snp">snp</option> + <option value="ins">ins</option> + <option value="del">del</option> + <option value="sub">sub</option> + <option value="ref">ref</option> + </param> + + <param name="varscorevaf" type="text" label="AND varScoreVAF IS LESS THAN (integer)" value=""/> + <param name="varscoreeaf" type="text" label="AND varScoreEAF IS LESS THAN (integer)" value=""/> + + <param name="varquality" type="select" label="AND varQuality IS NOT"> + <option value="NA">-- keep all varQuality --</option> + <option value="VQHigh">VQHigh</option> + <option value="VQLOW">VQLOW</option> + </param> + </repeat> + </inputs> + + <help> + +**What it does** + +This tool uses cgatools varfilter to copy input var file or masterVar file to output, applying specified filters. Loci that are filtered out are set to no-call. + +**cgatools 1.6.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + varfilter - Copies input var file or masterVar file to output, applying + specified filters. + + DESCRIPTION + Copies input var file or masterVar file to output, applying specified + filters (which are available to all cgatools commands that read a var file + or masterVar file as input). Filters are specified by appending the filter + specification to the var file name on the command line. For example: + + /path/to/var.tsv.bz2#varQuality!=VQHIGH + + The preceding example filters out any calls marked as VQLOW. The filter + specification follows the "#" sign, and consists of a list of filters to + apply, separated by a comma. Each filter is a colon-separated list of call + selectors. Any scored call that passes all the colon-separated call + selectors for one or more of the comma-separated filters is turned into a + no-call. The following call selectors are available: + + hom Selects only calls in homozygous loci. + het Selects any scored call not selected by the hom selector. + varType=XX Selects calls whose varType is XX. + varScoreVAF<XX Selects calls whose varScoreVAF < XX. + varScoreEAF<XX Selects calls whose varScoreEAF < XX. + varQuality!=XX Selects calls whose varQuality is not XX. + + Here is an example that filters homozygous SNPs with varScoreVAF < 25 and + heterozygous insertions with varScoreEAF < 50: + + + '/path/to/var.tsv.bz2#hom:varType=snp:varScoreVAF<25,het:varType=ins:varScoreEAF<50' + + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta flag. + + --reference arg + The reference crr file. + + --input arg + The input var file or masterVar file (typically with filters specified). + + --output arg (=STDOUT) + The output file (may be omitted for stdout). + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/varfilter_wrapper.pl Thu Sep 27 13:37:59 2012 -0400 @@ -0,0 +1,57 @@ +#!/usr/bin/perl +use strict; +use Getopt::Long; +use vars qw($opt_reference $opt_input $opt_output @opt_zygosity @opt_vartype @opt_varscorevaf @opt_varscoreeaf @opt_varquality); +$| = 1; # set autoflush to screen + +# This is a wrapper for the cgatools varfilter function to run cgatools varfilter in Galaxy. +# The wrapper generates the filter(s) in the correct format to be used with the input file. +# written 6-1-2012 by bcrain@completegenomics.com + + +#print join("\n", @ARGV), "\n"; +&GetOptions("reference=s", "input=s", "output=s", "zygosity=s@", "vartype=s@", "varscorevaf:i@", "varscoreeaf:i@", "varquality=s@"); + +my $append = ''; + +for (my $i = 0; $i <= $#opt_zygosity; $i ++) +{ + my $filter = ''; + unless ($opt_zygosity[$i] eq 'NA') {$filter = $opt_zygosity[$i];} + unless ($opt_vartype[$i] eq 'NA') + { + $filter ne '' and $filter .= ':'; + $filter .= 'varType=' . $opt_vartype[$i]; + } + unless ($opt_varscorevaf[$i] == 0) + { + $filter ne '' and $filter .= ':'; + $filter .= 'varScoreVAF<' . $opt_varscorevaf[$i]; + } + unless ($opt_varscoreeaf[$i] == 0) + { + $filter ne '' and $filter .= ':'; + $filter .= 'varScoreEAF<' . $opt_varscoreeaf[$i]; + } + unless ($opt_varquality[$i] eq 'NA') + { + $filter ne '' and $filter .= ':'; + $filter .= 'varQuality!=' . $opt_varquality[$i]; + } + + if ($filter ne '') + { + if ($append eq '') {$append = '#' . $filter;} + else {$append .= ',' . $filter;} + } +} + +my $version = `cgatools | head -1`; +print "cgatools varfilter +--beta +--reference $opt_reference +--output $opt_output +--input '${opt_input}${append}'\n"; +print "$version\n"; + +`cgatools varfilter --beta --reference $opt_reference --output $opt_output --input '${opt_input}${append}'`; \ No newline at end of file