Mercurial > repos > devteam > bam_to_sam
changeset 4:3bade5114f21 draft
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author | devteam |
---|---|
date | Tue, 13 Oct 2015 12:53:40 -0400 |
parents | ad3c355b8249 |
children | 4ffbc8cedb42 |
files | bam_to_sam.py bam_to_sam.xml macros.xml test-data/bam_to_sam_out1.sam test-data/bam_to_sam_out2.sam test-data/bam_to_sam_out3.sam tool_dependencies.xml |
diffstat | 7 files changed, 131 insertions(+), 211 deletions(-) [+] |
line wrap: on
line diff
--- a/bam_to_sam.py Wed Mar 04 22:28:17 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ -#!/usr/bin/env python -""" -Converts BAM data to sorted SAM data. -usage: bam_to_sam.py [options] - --input1: SAM file to be converted - --output1: output dataset in bam format -""" - -import optparse, os, sys, subprocess, tempfile, shutil -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from bx.cookbook import doc_optparse -#from galaxy import util - -def stop_err( msg ): - sys.stderr.write( '%s\n' % msg ) - sys.exit() - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' ) - parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' ) - parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' ) - ( options, args ) = parser.parse_args() - - # output version # of tool - try: - tmp = tempfile.NamedTemporaryFile().name - tmp_stdout = open( tmp, 'wb' ) - proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout ) - tmp_stdout.close() - returncode = proc.wait() - stdout = None - for line in open( tmp_stdout.name, 'rb' ): - if line.lower().find( 'version' ) >= 0: - stdout = line.strip() - break - if stdout: - sys.stdout.write( 'Samtools %s\n' % stdout ) - else: - raise Exception - except: - sys.stdout.write( 'Could not determine Samtools version\n' ) - - tmp_dir = tempfile.mkdtemp( dir='.' ) - - try: - # exit if input file empty - if os.path.getsize( options.input1 ) == 0: - raise Exception, 'Initial BAM file empty' - # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command - # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted - # into memory ( controlled by option -m ). - tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir ) - tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name - tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name - tmp_sorted_aligns_file.close() - command = 'samtools sort %s %s' % ( options.input1, tmp_sorted_aligns_file_base ) - tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - # exit if sorted BAM file empty - if os.path.getsize( tmp_sorted_aligns_file_name) == 0: - raise Exception, 'Intermediate sorted BAM file empty' - except Exception, e: - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'Error sorting alignments from (%s), %s' % ( options.input1, str( e ) ) ) - - - try: - # Extract all alignments from the input BAM file to SAM format ( since no region is specified, all the alignments will be extracted ). - if options.header: - view_options = "-h" - else: - view_options = "" - command = 'samtools view %s -o %s %s' % ( view_options, options.output1, tmp_sorted_aligns_file_name ) - tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - except Exception, e: - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'Error extracting alignments from (%s), %s' % ( options.input1, str( e ) ) ) - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - # check that there are results in the output file - if os.path.getsize( options.output1 ) > 0: - sys.stdout.write( 'BAM file converted to SAM' ) - else: - stop_err( 'The output file is empty, there may be an error with your input file.' ) - -if __name__=="__main__": __main__()
--- a/bam_to_sam.xml Wed Mar 04 22:28:17 2015 -0500 +++ b/bam_to_sam.xml Tue Oct 13 12:53:40 2015 -0400 @@ -1,63 +1,54 @@ -<tool id="bam_to_sam" name="BAM-to-SAM" version="1.0.4"> - <requirements> - <requirement type="package" version="0.1.19">samtools</requirement> - </requirements> - <description>converts BAM format to SAM format</description> - <command interpreter="python"> - bam_to_sam.py - --input1=$input1 - --output1=$output1 - $header - </command> - <inputs> - <param name="input1" type="data" format="bam" label="BAM File to Convert" /> - <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" label="Include header in output" /> - </inputs> - <outputs> - <data format="sam" name="output1" label="${tool.name} on ${on_string}: converted SAM" /> - </outputs> - <tests> - <test> - <!-- - Bam-to-Sam command: - samtools view -o bam_to_sam_out1.sam test-data/bam_to_sam_in1.bam - bam_to_sam_in1.bam can be created from bam_to_sam_in1.sam - --> - <param name="input1" value="bam_to_sam_in1.bam" ftype="bam" /> - <param name="header" value="" /> - <output name="output1" file="bam_to_sam_out1.sam" sorted="True" /> - </test> - <test> - <!-- - Bam-to-Sam command: - samtools view -o bam_to_sam_out2.sam test-data/bam_to_sam_in2.bam - bam_to_sam_in2.bam can be created from bam_to_sam_in2.sam - --> - <param name="input1" value="bam_to_sam_in2.bam" ftype="bam" /> - <param name="header" value="" /> - <output name="output1" file="bam_to_sam_out2.sam" sorted="True" /> - </test> - <test> - <!-- - Bam-to-Sam command: - samtools view -h -o bam_to_sam_out3.sam test-data/bam_to_sam_in1.bam - bam_to_sam_in1.bam can be created from bam_to_sam_in1.sam - --> - <param name="input1" value="bam_to_sam_in1.bam" ftype="bam" /> - <param name="header" value="--header" /> - <output name="output1" file="bam_to_sam_out3.sam" sorted="True" lines_diff="6" /><!-- header param not working in func tests so won't produce correct 6-line header (fine in browser) --> - </test> - </tests> - <help> +<tool id="bam_to_sam" name="BAM-to-SAM" version="2.0"> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"></expand> + <expand macro="version_command"></expand> + <expand macro="stdio"></expand> + <description>convert BAM to SAM</description> + <command> +<![CDATA[ + samtools view -o "${output1}" ${header} "${input1}" +]]> + </command> + <inputs> + <param format="bam" label="BAM File to Convert" name="input1" type="data" /> + <param name="header" label="Header options" type="select" help="Allows to choose between seeing the entire dataset with the header, header only, or data only."> + <option value="-h">Include header in SAM output (-h)</option> + <option value="-H">Print header only (-H)</option> + <option value="">Exclude header</option> + </param> + </inputs> + <outputs> + <data format="sam" label="${tool.name} on ${on_string}: converted SAM" name="output1" /> + </outputs> + <tests> + <test> + <param ftype="bam" name="input1" value="bam_to_sam_in1.bam" /> + <param name="header" value="-h" /> + <output file="bam_to_sam_out1.sam" name="output1" sorted="True" /> + </test> + <test> + <param ftype="bam" name="input1" value="bam_to_sam_in1.bam" /> + <param name="header" value="-H" /> + <output file="bam_to_sam_out2.sam" name="output1" sorted="True" /> + </test> + <test> + <param ftype="bam" name="input1" value="bam_to_sam_in1.bam" /> + <param name="header" value="" /> + <output file="bam_to_sam_out3.sam" name="output1" sorted="True" /> + </test> + </tests> + <help> +<![CDATA[ **What it does** -This tool uses the SAMTools_ toolkit to produce a SAM file from a BAM file. +Converts BAM dataset to SAM using ``samtools view`` command:: -.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml + samtools view -o [OUTPUT SAM] [-h|-H] [INPUT BAM] +]]> </help> - <citations> - <citation type="doi">10.1093/bioinformatics/btp352</citation> - </citations> + <expand macro="citations"></expand> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Oct 13 12:53:40 2015 -0400 @@ -0,0 +1,70 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @misc{SAM_def, + title={Definition of SAM/BAM format}, + url = {https://samtools.github.io/hts-specs/SAMv1.pdf},} + </citation> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + <citation type="doi">10.1093/bioinformatics/btr076</citation> + <citation type="doi">10.1093/bioinformatics/btr509</citation> + <citation type="bibtex"> + @misc{Danecek_et_al, + Author={Danecek, P., Schiffels, S., Durbin, R.}, + title={Multiallelic calling model in bcftools (-m)}, + url = {http://samtools.github.io/bcftools/call-m.pdf},} + </citation> + <citation type="bibtex"> + @misc{Durbin_VCQC, + Author={Durbin, R.}, + title={Segregation based metric for variant call QC}, + url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} + </citation> + <citation type="bibtex"> + @misc{Li_SamMath, + Author={Li, H.}, + title={Mathematical Notes on SAMtools Algorithms}, + url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} + </citation> + <citation type="bibtex"> + @misc{SamTools_github, + title={SAMTools GitHub page}, + url = {https://github.com/samtools/samtools},} + </citation> + </citations> + </xml> + <xml name="version_command"> + <version_command>samtools --version | head -n 1 | awk '{ print $2 }'</version_command> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </xml> + <token name="@no-chrom-options@"> +----- + +.. class:: warningmark + +**No options available? How to re-detect metadata** + +If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: + +1. Click on the **pencil** icon adjacent to the dataset in the history +2. A new menu will appear in the center pane of the interface +3. Click **Datatype** tab +4. Set **New Type** to **BAM** +5. Click **Save** + +The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. + + </token> + +</macros>
--- a/test-data/bam_to_sam_out1.sam Wed Mar 04 22:28:17 2015 -0500 +++ b/test-data/bam_to_sam_out1.sam Tue Oct 13 12:53:40 2015 -0400 @@ -1,3 +1,9 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! +@PG ID:1 PN:Hey! VN:2.0 both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
--- a/test-data/bam_to_sam_out2.sam Wed Mar 04 22:28:17 2015 -0500 +++ b/test-data/bam_to_sam_out2.sam Tue Oct 13 12:53:40 2015 -0400 @@ -1,18 +1,6 @@ -bar:record:4 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 -bar:record:6 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 -bar:record:3 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:7 77 chr1 20 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:8 77 chr1 30 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:4 141 chr1 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:5 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:6 141 chr1 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg2 -bar:record:2 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 -bar:record:2 141 chr2 30 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg2 -bar:record:3 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:8 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:5 141 chr3 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:9 77 chr4 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 -bar:record:7 141 chr4 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:9 141 chr4 60 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! +@PG ID:1 PN:Hey! VN:2.0
--- a/test-data/bam_to_sam_out3.sam Wed Mar 04 22:28:17 2015 -0500 +++ b/test-data/bam_to_sam_out3.sam Tue Oct 13 12:53:40 2015 -0400 @@ -1,9 +1,3 @@ -@HD VN:1.0 SO:coordinate -@SQ SN:chr1 LN:101 -@SQ SN:chr7 LN:404 -@SQ SN:chr8 LN:202 -@RG ID:0 SM:Hi,Mom! -@PG ID:1 PN:Hey! VN:2.0 both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
--- a/tool_dependencies.xml Wed Mar 04 22:28:17 2015 -0500 +++ b/tool_dependencies.xml Tue Oct 13 12:53:40 2015 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="samtools" version="0.1.19"> - <repository changeset_revision="786e3000ca58" name="package_samtools_0_1_19" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <package name="samtools" version="1.2"> + <repository changeset_revision="192f00129358" name="package_samtools_1_2" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>