Mercurial > repos > blankenberg > naive_variant_detector
changeset 1:55b4460cd0ce
Add naive variant detector tool.
author | Daniel Blankenberg <dan@bx.psu.edu> |
---|---|
date | Tue, 14 May 2013 10:14:52 -0400 |
parents | 4f99c0ee5d2c |
children | 790104f4918e |
files | dependency_configs/tool_dependencies.xml test-data/fake_phiX174_reads_1.bam test-data/fake_phiX174_reads_1_test_out_1.vcf test-data/phiX174.fasta tool-data/sam_fa_indices.loc.sample tool_data_table_conf.xml.sample tools/naive_variant_detector.py tools/naive_variant_detector.xml |
diffstat | 8 files changed, 384 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dependency_configs/tool_dependencies.xml Tue May 14 10:14:52 2013 -0400 @@ -0,0 +1,11 @@ +<tool_dependency> + <package name="numpy" version="1.7.1"> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="package_numpy_1_7" owner="bgruening" changeset_revision="ec80bba4bccb" /> + </package> + <package name="pyBamParser" version="0.0.1"> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="pybamparser" owner="blankenberg" changeset_revision="f513cc9b881c" /> + </package> + <package name="pyBamTools" version="0.0.1"> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="pybamtools" owner="blankenberg" changeset_revision="03f1db7f814d" /> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fake_phiX174_reads_1_test_out_1.vcf Tue May 14 10:14:52 2013 -0400 @@ -0,0 +1,51 @@ +##fileformat=VCFv4.1 +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##FORMAT=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##FORMAT=<ID=NC,Number=.,Type=String,Description="Nucleotide and indel counts"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A Fake phiX Sample +phiX174 1411 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=1, +phiX174 1412 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=3, +phiX174 1413 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=5, +phiX174 1414 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=6, +phiX174 1415 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=7, +phiX174 1416 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=8, +phiX174 1417 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=9, +phiX174 1418 . T . . . AC=;AF= GT:AC:AF:NC 0/0:::T=10, +phiX174 1419 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=10, +phiX174 1420 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=10, +phiX174 1421 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=10, +phiX174 1422 . T . . . AC=;AF= GT:AC:AF:NC 0/0:::T=10, +phiX174 1423 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=10, +phiX174 1424 . C A . . AC=7;AF=0.7 GT:AC:AF:NC 1/0:7:0.7:A=7,C=3, +phiX174 1425 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=10, +phiX174 1426 . T . . . AC=;AF= GT:AC:AF:NC 0/0:::T=10, +phiX174 1427 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=10, +phiX174 1428 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=10, +phiX174 1429 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=10, +phiX174 1430 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=10, +phiX174 1431 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=10, +phiX174 1432 . T . . . AC=;AF= GT:AC:AF:NC 0/0:::T=10, +phiX174 1433 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=10, +phiX174 1434 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=10, +phiX174 1435 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=10, +phiX174 1436 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=10, +phiX174 1437 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=10, +phiX174 1438 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=10, +phiX174 1439 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=10, +phiX174 1440 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=10, +phiX174 1441 . T . . . AC=;AF= GT:AC:AF:NC 0/0:::T=10, +phiX174 1442 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=10, +phiX174 1443 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=10, +phiX174 1444 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=10, +phiX174 1445 . C . . . AC=;AF= GT:AC:AF:NC 0/0:::C=10, +phiX174 1446 . C T . . AC=3;AF=0.3 GT:AC:AF:NC 0/1:3:0.3:C=7,T=3, +phiX174 1447 . T A . . AC=2;AF=0.222222222222 GT:AC:AF:NC 0/0:2:0.222222222222:A=2,T=7, +phiX174 1448 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=7, +phiX174 1449 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=5, +phiX174 1450 . T . . . AC=;AF= GT:AC:AF:NC 0/0:::T=4, +phiX174 1451 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=3, +phiX174 1452 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::A=2, +phiX174 1453 . G . . . AC=;AF= GT:AC:AF:NC 0/0:::G=1,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX174.fasta Tue May 14 10:14:52 2013 -0400 @@ -0,0 +1,79 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/sam_fa_indices.loc.sample Tue May 14 10:14:52 2013 -0400 @@ -0,0 +1,28 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_indices.loc +#file has this format (white space characters are TAB characters): +# +#index <seq> <location> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/sam/, +#then the sam_fa_indices.loc entry would look like this: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +# +#and your /depot/data2/galaxy/sam/ directory +#would contain hg18.fa and hg18.fa.fai files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai +# +#Your sam_fa_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +#index hg19 /depot/data2/galaxy/sam/hg19.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue May 14 10:14:52 2013 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Location of SAMTools indexes and other files --> + <table name="sam_fa_indexes" comment_char="#"> + <columns>line_type, value, path</columns> + <file path="tool-data/sam_fa_indices.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/naive_variant_detector.py Tue May 14 10:14:52 2013 -0400 @@ -0,0 +1,63 @@ +#Dan Blankenberg +import sys +import optparse + +from pyBamParser.bam import Reader +from pyBamTools.genotyping.naive import VCFReadGroupGenotyper + +def main(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-b', '--bam', dest='bam_file', action='append', type="string", default=[], help='BAM filename, optionally index filename. Multiple allowed.' ) + parser.add_option( '-i', '--index', dest='index_file', action='append', type="string", default=[], help='optionally index filename. Multiple allowed.' ) + parser.add_option( '-o', '--output_vcf_filename', dest='output_vcf_filename', action='store', default = None, type="string", help='Output VCF filename' ) + parser.add_option( '-r', '--reference_genome_filename', dest='reference_genome_filename', action='store', default = None, type="string", help='Input reference file' ) + parser.add_option( '-v', '--variants_only', dest='variants_only', action='store_true', default = False, help='Report only sites with a possible variant allele.' ) + parser.add_option( '-s', '--use_strand', dest='use_strand', action='store_true', default = False, help='Report counts by strand' ) + parser.add_option( '-p', '--ploidy', dest='ploidy', action='store', type="int", default=2, help='Ploidy. Default=2.' ) + parser.add_option( '-d', '--min_support_depth', dest='min_support_depth', action='store', type="int", default=0, help='Minimum number of reads needed to consider a REF/ALT. Default=0.' ) + parser.add_option( '-q', '--min_base_quality', dest='min_base_quality', action='store', type="int", default=None, help='Minimum base quality.' ) + parser.add_option( '-m', '--min_mapping_quality', dest='min_mapping_quality', action='store', type="int", default=None, help='Minimum mapping.' ) + parser.add_option( '-t', '--coverage_dtype', dest='coverage_dtype', action='store', type="string", default='uint8', help='dtype to use for coverage array' ) + parser.add_option( '--region', dest='region', action='append', type="string", default=[], help='region' ) + (options, args) = parser.parse_args() + + if len( options.bam_file ) == 0: + print >>sys.stderr, 'You must provide at least one bam (-b) file.' + parser.print_help( sys.stderr ) + sys.exit( 1 ) + if options.index_file: + assert len( options.index_file ) == len( options.bam_file ), "If you provide a name for an index file, you must provide the index name for all bam files." + bam_files = zip( options.bam_file, options.index_file ) + else: + bam_files = [ ( x, ) for x in options.bam_file ] + if not options.reference_genome_filename: + print >> sys.stderr, "Warning: Reference file has not been specified. Providing a reference genome is highly recommended." + if options.output_vcf_filename: + out = open( options.output_vcf_filename, 'wb' ) + else: + out = sys.stdout + + regions = [] + if options.region: + for region in options.region: + region_split = region.split( ":" ) + region = region_split.pop( 0 ) + if region_split: + region_split = filter( bool, region_split[0].split( '-' ) ) + if region_split: + if len( region_split ) != 2: + print >> sys.stderr, "You must specify both a start and an end, or only a chromosome when specifying regions." + cleanup_before_exit( tmp_dir ) + sys.exit( 1 ) + region = tuple( [ region ] + map( int, region_split ) ) + regions.append( region ) + + coverage = VCFReadGroupGenotyper( map( lambda x: Reader( *x ), bam_files ), options.reference_genome_filename, dtype=options.coverage_dtype, + min_support_depth=options.min_support_depth, min_base_quality=options.min_base_quality, min_mapping_quality=options.min_mapping_quality, + restrict_regions=regions, use_strand=options.use_strand ) + for line in coverage.iter_vcf( ploidy=options.ploidy, variants_only=options.variants_only ): + out.write( "%s\n" % line ) + out.close() + +if __name__ == "__main__": main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/naive_variant_detector.xml Tue May 14 10:14:52 2013 -0400 @@ -0,0 +1,145 @@ +<tool id="naive_variant_detector" name="Naive Variant Detector" version="0.0.1"> + <description>on BAM files</description> + <requirements> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="0.0.1">pyBamParser</requirement> + <requirement type="package" version="0.0.1">pyBamTools</requirement> + </requirements> + <stdio> + <exit_code range="1:" err_level="fatal" /> + <exit_code range=":-1" err_level="fatal" /> + </stdio> + <command interpreter="python">naive_variant_detector.py + -o "${output_vcf}" + + #for $input_bam in $reference_source.input_bams: + -b "${input_bam.input_bam}" + -i "${input_bam.input_bam.metadata.bam_index}" + #end for + + #if $reference_source.reference_source_selector != "history": + -r "${reference_source.ref_file.fields.path}" + #elif $reference_source.ref_file: + -r "${reference_source.ref_file}" + #end if + + #for $region in $regions: + --region "${region.chromosome}:${region.start}-${region.end}" + #end for + + ${variants_only} + + ${use_strand} + + --ploidy "${$ploidy}" + + --min_support_depth "${min_support_depth}" + + #if str($min_base_quality): + --min_base_quality "${min_base_quality}" + #end if + + #if str($min_mapping_quality): + --min_mapping_quality "${min_mapping_quality}" + #end if + + --coverage_dtype "${coverage_dtype}" + + </command> + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <repeat name="input_bams" title="BAM file" min="1" > + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> + </param> + </repeat> + <param name="ref_file" type="select" label="Using reference genome" > + <options from_data_table="sam_fa_indexes"> + <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...--> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> <!-- FIX ME!!!! --> + <repeat name="input_bams" title="BAM file" min="1" > + <param name="input_bam" type="data" format="bam" label="BAM file" > + </param> + </repeat> + <param name="ref_file" type="data" format="fasta" label="Using reference file" optional="True" /> + </when> + </conditional> + + <repeat name="regions" title="Restrict to regions" min="0" > + <param name="chromosome" type="text" value="" optional="False" label="Chromosome" /> + <param name="start" type="integer" value="" optional="True" label="Start" /> + <param name="end" type="integer" value="" optional="True" label="End" /> + </repeat> + + <!-- TODO: enhance filtering --> + <param name="min_support_depth" type="integer" value="0" min="0" label="Minimum number of reads needed to consider a REF/ALT" /> + <param name="min_base_quality" type="integer" value="" label="Minimum base quality" optional="True" /> + <param name="min_mapping_quality" type="integer" value="" label="Minimum mapping quality" optional="True" /> + + + <param name="ploidy" type="integer" value="2" min="1" label="Ploidy" /> + <param name="variants_only" type="boolean" truevalue="--variants_only" falsevalue="" checked="False" label="Only write out positions with with possible alternate alleles"/> + + <param name="use_strand" type="boolean" truevalue="--use_strand" falsevalue="" checked="False" label="Report counts by strand"/> + + <param name="coverage_dtype" type="select" label="Choose the dtype to use for storing coverage information" help="This affects the maximum recorded value for a position, e.g. uint8 would be 255 coverage, but will require the least amount of RAM"> + <option value="uint8" selected="True">uint8</option> + <option value="uint16">uint16</option> + <option value="uint32">uint32</option> + <option value="uint64">uint64</option> + </param> + + </inputs> + <outputs> + <data format="vcf" name="output_vcf" /> + </outputs> + <help> +**What it does** + +This tool is a naive variant detector. + +------ + +**Inputs** + +Accepts one or more BAM input files. + + +**Outputs** + +The output is in VCF format. + +------ + +**Citation** + +If you use this tool, please cite Blankenberg D, et al. *In preparation.* + + </help> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="input_bam" value="test-data/fake_phiX174_reads_1.bam" ftype="bam" /> + <param name="ref_file" value="test-data/phiX174.fasta" ftype="fasta" /> + <param name="min_support_depth" value="0" /> + <param name="min_base_quality" value="" /> + <param name="min_mapping_quality" value="" /> + <param name="ploidy" value="2" /> + <param name="variants_only" value="False" /> + <param name="use_strand" value="False" /> + <param name="coverage_dtype" value="uint8" /> + <output name="output_vcf" file="test-data/fake_phiX174_reads_1_test_out_1.vcf" compare="contains" /> + </test> + </tests> + +</tool>