changeset 1:55b4460cd0ce

Add naive variant detector tool.
author Daniel Blankenberg <dan@bx.psu.edu>
date Tue, 14 May 2013 10:14:52 -0400
parents 4f99c0ee5d2c
children 790104f4918e
files dependency_configs/tool_dependencies.xml test-data/fake_phiX174_reads_1.bam test-data/fake_phiX174_reads_1_test_out_1.vcf test-data/phiX174.fasta tool-data/sam_fa_indices.loc.sample tool_data_table_conf.xml.sample tools/naive_variant_detector.py tools/naive_variant_detector.xml
diffstat 8 files changed, 384 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dependency_configs/tool_dependencies.xml	Tue May 14 10:14:52 2013 -0400
@@ -0,0 +1,11 @@
+<tool_dependency>
+    <package name="numpy" version="1.7.1">
+        <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="package_numpy_1_7" owner="bgruening" changeset_revision="ec80bba4bccb" />
+    </package>
+    <package name="pyBamParser" version="0.0.1">
+        <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="pybamparser" owner="blankenberg" changeset_revision="f513cc9b881c" />
+    </package>
+    <package name="pyBamTools" version="0.0.1">
+        <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="pybamtools" owner="blankenberg" changeset_revision="03f1db7f814d" />
+    </package>
+</tool_dependency>
Binary file test-data/fake_phiX174_reads_1.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fake_phiX174_reads_1_test_out_1.vcf	Tue May 14 10:14:52 2013 -0400
@@ -0,0 +1,51 @@
+##fileformat=VCFv4.1
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##FORMAT=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##FORMAT=<ID=NC,Number=.,Type=String,Description="Nucleotide and indel counts">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	A Fake phiX Sample
+phiX174	1411	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=1,
+phiX174	1412	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=3,
+phiX174	1413	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=5,
+phiX174	1414	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=6,
+phiX174	1415	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=7,
+phiX174	1416	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=8,
+phiX174	1417	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=9,
+phiX174	1418	.	T	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::T=10,
+phiX174	1419	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=10,
+phiX174	1420	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=10,
+phiX174	1421	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=10,
+phiX174	1422	.	T	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::T=10,
+phiX174	1423	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=10,
+phiX174	1424	.	C	A	.	.	AC=7;AF=0.7	GT:AC:AF:NC	1/0:7:0.7:A=7,C=3,
+phiX174	1425	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=10,
+phiX174	1426	.	T	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::T=10,
+phiX174	1427	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=10,
+phiX174	1428	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=10,
+phiX174	1429	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=10,
+phiX174	1430	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=10,
+phiX174	1431	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=10,
+phiX174	1432	.	T	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::T=10,
+phiX174	1433	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=10,
+phiX174	1434	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=10,
+phiX174	1435	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=10,
+phiX174	1436	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=10,
+phiX174	1437	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=10,
+phiX174	1438	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=10,
+phiX174	1439	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=10,
+phiX174	1440	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=10,
+phiX174	1441	.	T	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::T=10,
+phiX174	1442	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=10,
+phiX174	1443	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=10,
+phiX174	1444	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=10,
+phiX174	1445	.	C	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::C=10,
+phiX174	1446	.	C	T	.	.	AC=3;AF=0.3	GT:AC:AF:NC	0/1:3:0.3:C=7,T=3,
+phiX174	1447	.	T	A	.	.	AC=2;AF=0.222222222222	GT:AC:AF:NC	0/0:2:0.222222222222:A=2,T=7,
+phiX174	1448	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=7,
+phiX174	1449	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=5,
+phiX174	1450	.	T	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::T=4,
+phiX174	1451	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=3,
+phiX174	1452	.	A	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::A=2,
+phiX174	1453	.	G	.	.	.	AC=;AF=	GT:AC:AF:NC	0/0:::G=1,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX174.fasta	Tue May 14 10:14:52 2013 -0400
@@ -0,0 +1,79 @@
+>phiX174
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
+CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
+TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
+AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
+CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
+TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
+TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
+CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
+GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
+GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
+ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
+TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
+TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
+ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
+CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
+GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
+CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
+TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
+TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
+TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
+AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
+TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
+ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
+GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
+TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
+TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
+TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
+TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
+CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
+AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
+CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
+TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
+CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
+AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
+GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
+GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
+TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
+CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
+TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
+GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
+CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
+TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
+AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
+TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
+CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
+TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
+TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
+CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
+TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
+ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
+TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
+ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
+GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
+CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
+GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
+GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
+ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
+CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
+CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
+GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
+CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
+CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
+TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
+TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
+TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
+AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
+TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/sam_fa_indices.loc.sample	Tue May 14 10:14:52 2013 -0400
@@ -0,0 +1,28 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a sam_fa_indices.loc file 
+#similar to this one (store it in this directory) that points to 
+#the directories in which those files are stored. The sam_fa_indices.loc 
+#file has this format (white space characters are TAB characters):
+#
+#index	<seq>	<location>
+#
+#So, for example, if you had hg18 indexed stored in 
+#/depot/data2/galaxy/sam/, 
+#then the sam_fa_indices.loc entry would look like this:
+#
+#index	hg18	/depot/data2/galaxy/sam/hg18.fa
+#
+#and your /depot/data2/galaxy/sam/ directory
+#would contain hg18.fa and hg18.fa.fai files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.fa
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.fa.fai
+#
+#Your sam_fa_indices.loc file should include an entry per line for 
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#index	hg18	/depot/data2/galaxy/sam/hg18.fa
+#index	hg19	/depot/data2/galaxy/sam/hg19.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue May 14 10:14:52 2013 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of SAMTools indexes and other files -->
+    <table name="sam_fa_indexes" comment_char="#">
+        <columns>line_type, value, path</columns>
+        <file path="tool-data/sam_fa_indices.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/naive_variant_detector.py	Tue May 14 10:14:52 2013 -0400
@@ -0,0 +1,63 @@
+#Dan Blankenberg
+import sys
+import optparse
+
+from pyBamParser.bam import Reader
+from pyBamTools.genotyping.naive import VCFReadGroupGenotyper
+
+def main():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option( '-b', '--bam', dest='bam_file', action='append', type="string", default=[], help='BAM filename, optionally index filename. Multiple allowed.' )
+    parser.add_option( '-i', '--index', dest='index_file', action='append', type="string", default=[], help='optionally index filename. Multiple allowed.' )
+    parser.add_option( '-o', '--output_vcf_filename', dest='output_vcf_filename', action='store', default = None, type="string", help='Output VCF filename' )
+    parser.add_option( '-r', '--reference_genome_filename', dest='reference_genome_filename', action='store', default = None, type="string", help='Input reference file' )
+    parser.add_option( '-v', '--variants_only', dest='variants_only', action='store_true', default = False, help='Report only sites with a possible variant allele.' )
+    parser.add_option( '-s', '--use_strand', dest='use_strand', action='store_true', default = False, help='Report counts by strand' )
+    parser.add_option( '-p', '--ploidy', dest='ploidy', action='store', type="int", default=2, help='Ploidy. Default=2.' )
+    parser.add_option( '-d', '--min_support_depth', dest='min_support_depth', action='store', type="int", default=0, help='Minimum number of reads needed to consider a REF/ALT. Default=0.' )
+    parser.add_option( '-q', '--min_base_quality', dest='min_base_quality', action='store', type="int", default=None, help='Minimum base quality.' )
+    parser.add_option( '-m', '--min_mapping_quality', dest='min_mapping_quality', action='store', type="int", default=None, help='Minimum mapping.' )
+    parser.add_option( '-t', '--coverage_dtype', dest='coverage_dtype', action='store', type="string", default='uint8', help='dtype to use for coverage array' )
+    parser.add_option( '--region', dest='region', action='append', type="string", default=[], help='region' )
+    (options, args) = parser.parse_args()
+    
+    if len( options.bam_file ) == 0:
+        print >>sys.stderr, 'You must provide at least one bam (-b) file.'
+        parser.print_help( sys.stderr )
+        sys.exit( 1 )
+    if options.index_file:
+        assert len( options.index_file ) == len( options.bam_file ), "If you provide a name for an index file, you must provide the index name for all bam files."
+        bam_files = zip( options.bam_file, options.index_file )
+    else:
+        bam_files = [ ( x, ) for x in options.bam_file ]
+    if not options.reference_genome_filename:
+        print >> sys.stderr, "Warning: Reference file has not been specified. Providing a reference genome is highly recommended."
+    if options.output_vcf_filename:
+        out = open( options.output_vcf_filename, 'wb' )
+    else:
+        out = sys.stdout
+    
+    regions = []
+    if options.region:
+        for region in options.region:
+            region_split = region.split( ":" )
+            region = region_split.pop( 0 )
+            if region_split:
+                region_split = filter( bool, region_split[0].split( '-' ) )
+                if region_split:
+                    if len( region_split ) != 2:
+                        print >> sys.stderr, "You must specify both a start and an end, or only a chromosome when specifying regions."
+                        cleanup_before_exit( tmp_dir )
+                        sys.exit( 1 )
+                    region = tuple( [ region ] + map( int, region_split ) )
+            regions.append( region )
+    
+    coverage = VCFReadGroupGenotyper( map( lambda x: Reader( *x ), bam_files ), options.reference_genome_filename, dtype=options.coverage_dtype,
+                                               min_support_depth=options.min_support_depth, min_base_quality=options.min_base_quality, min_mapping_quality=options.min_mapping_quality,
+                                               restrict_regions=regions, use_strand=options.use_strand )
+    for line in coverage.iter_vcf( ploidy=options.ploidy, variants_only=options.variants_only ):
+        out.write( "%s\n" % line )
+    out.close()
+
+if __name__ == "__main__": main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/naive_variant_detector.xml	Tue May 14 10:14:52 2013 -0400
@@ -0,0 +1,145 @@
+<tool id="naive_variant_detector" name="Naive Variant Detector" version="0.0.1">
+  <description>on BAM files</description>
+  <requirements>
+    <requirement type="package" version="1.7.1">numpy</requirement>
+    <requirement type="package" version="0.0.1">pyBamParser</requirement>
+    <requirement type="package" version="0.0.1">pyBamTools</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" err_level="fatal" />
+    <exit_code range=":-1" err_level="fatal" />
+  </stdio>
+  <command interpreter="python">naive_variant_detector.py
+      -o "${output_vcf}"
+     
+     #for $input_bam in $reference_source.input_bams:
+         -b "${input_bam.input_bam}"
+         -i "${input_bam.input_bam.metadata.bam_index}"
+     #end for
+     
+     #if $reference_source.reference_source_selector != "history":
+         -r "${reference_source.ref_file.fields.path}"
+     #elif $reference_source.ref_file:
+         -r "${reference_source.ref_file}"
+     #end if
+     
+     #for $region in $regions:
+         --region "${region.chromosome}:${region.start}-${region.end}"
+     #end for
+     
+     ${variants_only}
+     
+     ${use_strand}
+     
+     --ploidy "${$ploidy}"
+     
+     --min_support_depth "${min_support_depth}"
+     
+     #if str($min_base_quality):
+         --min_base_quality "${min_base_quality}"
+     #end if
+     
+     #if str($min_mapping_quality):
+         --min_mapping_quality "${min_mapping_quality}"
+     #end if
+     
+     --coverage_dtype "${coverage_dtype}"
+     
+  </command>
+  <inputs>
+    <conditional name="reference_source">
+      <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
+        <option value="cached">Locally cached</option>
+        <option value="history">History</option>
+      </param>
+      <when value="cached">
+        <repeat name="input_bams" title="BAM file" min="1" >
+            <param name="input_bam" type="data" format="bam" label="BAM file">
+              <validator type="unspecified_build" />
+              <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
+            </param>
+        </repeat>
+        <param name="ref_file" type="select" label="Using reference genome" >
+          <options from_data_table="sam_fa_indexes">
+            <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...--> 
+          </options>
+          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+        </param>
+      </when>
+      <when value="history"> <!-- FIX ME!!!! -->
+        <repeat name="input_bams" title="BAM file" min="1" >
+            <param name="input_bam" type="data" format="bam" label="BAM file" >
+            </param>
+        </repeat>
+        <param name="ref_file" type="data" format="fasta" label="Using reference file" optional="True" />
+      </when>
+    </conditional>
+
+    <repeat name="regions" title="Restrict to regions" min="0" >
+        <param name="chromosome" type="text" value="" optional="False" label="Chromosome" />
+        <param name="start" type="integer" value="" optional="True" label="Start" />
+        <param name="end" type="integer" value="" optional="True" label="End" />
+    </repeat>
+
+    <!-- TODO: enhance filtering -->
+    <param name="min_support_depth" type="integer" value="0" min="0" label="Minimum number of reads needed to consider a REF/ALT" />
+    <param name="min_base_quality" type="integer" value="" label="Minimum base quality" optional="True" />
+    <param name="min_mapping_quality" type="integer" value="" label="Minimum mapping quality" optional="True" />
+    
+        
+    <param name="ploidy" type="integer" value="2" min="1" label="Ploidy" />
+    <param name="variants_only" type="boolean" truevalue="--variants_only" falsevalue="" checked="False" label="Only write out positions with with possible alternate alleles"/>
+    
+    <param name="use_strand" type="boolean" truevalue="--use_strand" falsevalue="" checked="False" label="Report counts by strand"/>
+    
+    <param name="coverage_dtype" type="select" label="Choose the dtype to use for storing coverage information" help="This affects the maximum recorded value for a position, e.g. uint8 would be 255 coverage, but will require the least amount of RAM">
+      <option value="uint8" selected="True">uint8</option>
+      <option value="uint16">uint16</option>
+      <option value="uint32">uint32</option>
+      <option value="uint64">uint64</option>
+    </param>
+    
+  </inputs>
+  <outputs>
+    <data format="vcf" name="output_vcf" />
+  </outputs>
+  <help>
+**What it does**
+
+This tool is a naive variant detector. 
+
+------
+
+**Inputs**
+
+Accepts one or more BAM input files.
+
+
+**Outputs**
+
+The output is in VCF format.
+
+------
+
+**Citation**
+
+If you use this tool, please cite Blankenberg D, et al. *In preparation.*
+
+  </help>
+  <tests>
+      <test>
+          <param name="reference_source_selector" value="history" />
+          <param name="input_bam" value="test-data/fake_phiX174_reads_1.bam" ftype="bam" /> 
+          <param name="ref_file" value="test-data/phiX174.fasta" ftype="fasta" />
+          <param name="min_support_depth" value="0" />
+          <param name="min_base_quality" value="" />
+          <param name="min_mapping_quality" value="" />
+          <param name="ploidy" value="2" />
+          <param name="variants_only" value="False" />
+          <param name="use_strand" value="False" />
+          <param name="coverage_dtype" value="uint8" />
+          <output name="output_vcf" file="test-data/fake_phiX174_reads_1_test_out_1.vcf" compare="contains" />
+      </test>
+  </tests>
+  
+</tool>