# HG changeset patch # User devteam # Date 1444755220 14400 # Node ID 3bade5114f215bca9d3aa5fb9cd7ea580fe76d41 # Parent ad3c355b82491fef2a4997276db1fc40eae941fa planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 diff -r ad3c355b8249 -r 3bade5114f21 bam_to_sam.py --- a/bam_to_sam.py Wed Mar 04 22:28:17 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ -#!/usr/bin/env python -""" -Converts BAM data to sorted SAM data. -usage: bam_to_sam.py [options] - --input1: SAM file to be converted - --output1: output dataset in bam format -""" - -import optparse, os, sys, subprocess, tempfile, shutil -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from bx.cookbook import doc_optparse -#from galaxy import util - -def stop_err( msg ): - sys.stderr.write( '%s\n' % msg ) - sys.exit() - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' ) - parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' ) - parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' ) - ( options, args ) = parser.parse_args() - - # output version # of tool - try: - tmp = tempfile.NamedTemporaryFile().name - tmp_stdout = open( tmp, 'wb' ) - proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout ) - tmp_stdout.close() - returncode = proc.wait() - stdout = None - for line in open( tmp_stdout.name, 'rb' ): - if line.lower().find( 'version' ) >= 0: - stdout = line.strip() - break - if stdout: - sys.stdout.write( 'Samtools %s\n' % stdout ) - else: - raise Exception - except: - sys.stdout.write( 'Could not determine Samtools version\n' ) - - tmp_dir = tempfile.mkdtemp( dir='.' ) - - try: - # exit if input file empty - if os.path.getsize( options.input1 ) == 0: - raise Exception, 'Initial BAM file empty' - # Sort alignments by leftmost coordinates. File .bam will be created. This command - # may also create temporary files .%d.bam when the whole alignment cannot be fitted - # into memory ( controlled by option -m ). - tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir ) - tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name - tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name - tmp_sorted_aligns_file.close() - command = 'samtools sort %s %s' % ( options.input1, tmp_sorted_aligns_file_base ) - tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - # exit if sorted BAM file empty - if os.path.getsize( tmp_sorted_aligns_file_name) == 0: - raise Exception, 'Intermediate sorted BAM file empty' - except Exception, e: - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'Error sorting alignments from (%s), %s' % ( options.input1, str( e ) ) ) - - - try: - # Extract all alignments from the input BAM file to SAM format ( since no region is specified, all the alignments will be extracted ). - if options.header: - view_options = "-h" - else: - view_options = "" - command = 'samtools view %s -o %s %s' % ( view_options, options.output1, tmp_sorted_aligns_file_name ) - tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - except Exception, e: - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'Error extracting alignments from (%s), %s' % ( options.input1, str( e ) ) ) - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - # check that there are results in the output file - if os.path.getsize( options.output1 ) > 0: - sys.stdout.write( 'BAM file converted to SAM' ) - else: - stop_err( 'The output file is empty, there may be an error with your input file.' ) - -if __name__=="__main__": __main__() diff -r ad3c355b8249 -r 3bade5114f21 bam_to_sam.xml --- a/bam_to_sam.xml Wed Mar 04 22:28:17 2015 -0500 +++ b/bam_to_sam.xml Tue Oct 13 12:53:40 2015 -0400 @@ -1,63 +1,54 @@ - - - samtools - - converts BAM format to SAM format - - bam_to_sam.py - --input1=$input1 - --output1=$output1 - $header - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + macros.xml + + + + + convert BAM to SAM + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - 10.1093/bioinformatics/btp352 - + diff -r ad3c355b8249 -r 3bade5114f21 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Oct 13 12:53:40 2015 -0400 @@ -0,0 +1,70 @@ + + + + samtools + + + + + + + @misc{SAM_def, + title={Definition of SAM/BAM format}, + url = {https://samtools.github.io/hts-specs/SAMv1.pdf},} + + 10.1093/bioinformatics/btp352 + 10.1093/bioinformatics/btr076 + 10.1093/bioinformatics/btr509 + + @misc{Danecek_et_al, + Author={Danecek, P., Schiffels, S., Durbin, R.}, + title={Multiallelic calling model in bcftools (-m)}, + url = {http://samtools.github.io/bcftools/call-m.pdf},} + + + @misc{Durbin_VCQC, + Author={Durbin, R.}, + title={Segregation based metric for variant call QC}, + url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} + + + @misc{Li_SamMath, + Author={Li, H.}, + title={Mathematical Notes on SAMtools Algorithms}, + url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} + + + @misc{SamTools_github, + title={SAMTools GitHub page}, + url = {https://github.com/samtools/samtools},} + + + + + samtools --version | head -n 1 | awk '{ print $2 }' + + + + + + + +----- + +.. class:: warningmark + +**No options available? How to re-detect metadata** + +If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: + +1. Click on the **pencil** icon adjacent to the dataset in the history +2. A new menu will appear in the center pane of the interface +3. Click **Datatype** tab +4. Set **New Type** to **BAM** +5. Click **Save** + +The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. + + + + diff -r ad3c355b8249 -r 3bade5114f21 test-data/bam_to_sam_out1.sam --- a/test-data/bam_to_sam_out1.sam Wed Mar 04 22:28:17 2015 -0500 +++ b/test-data/bam_to_sam_out1.sam Tue Oct 13 12:53:40 2015 -0400 @@ -1,3 +1,9 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! +@PG ID:1 PN:Hey! VN:2.0 both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 diff -r ad3c355b8249 -r 3bade5114f21 test-data/bam_to_sam_out2.sam --- a/test-data/bam_to_sam_out2.sam Wed Mar 04 22:28:17 2015 -0500 +++ b/test-data/bam_to_sam_out2.sam Tue Oct 13 12:53:40 2015 -0400 @@ -1,18 +1,6 @@ -bar:record:4 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 -bar:record:6 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 -bar:record:3 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:7 77 chr1 20 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:8 77 chr1 30 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:4 141 chr1 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:5 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 -bar:record:6 141 chr1 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg2 -bar:record:2 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 -bar:record:2 141 chr2 30 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg2 -bar:record:3 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:8 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:5 141 chr3 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:9 77 chr4 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 -bar:record:7 141 chr4 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 -bar:record:9 141 chr4 60 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! +@PG ID:1 PN:Hey! VN:2.0 diff -r ad3c355b8249 -r 3bade5114f21 test-data/bam_to_sam_out3.sam --- a/test-data/bam_to_sam_out3.sam Wed Mar 04 22:28:17 2015 -0500 +++ b/test-data/bam_to_sam_out3.sam Tue Oct 13 12:53:40 2015 -0400 @@ -1,9 +1,3 @@ -@HD VN:1.0 SO:coordinate -@SQ SN:chr1 LN:101 -@SQ SN:chr7 LN:404 -@SQ SN:chr8 LN:202 -@RG ID:0 SM:Hi,Mom! -@PG ID:1 PN:Hey! VN:2.0 both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 diff -r ad3c355b8249 -r 3bade5114f21 tool_dependencies.xml --- a/tool_dependencies.xml Wed Mar 04 22:28:17 2015 -0500 +++ b/tool_dependencies.xml Tue Oct 13 12:53:40 2015 -0400 @@ -1,6 +1,6 @@ - - + +