Mercurial > repos > devteam > sam_pileup
changeset 2:37df866ccf1a draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam_pileup commit 8301d37348be25a038b3c63b049b1178d05f5003"
author | devteam |
---|---|
date | Thu, 06 Feb 2020 12:11:11 +0000 |
parents | efd77ef84903 |
children | |
files | sam_pileup.py sam_pileup.xml tool_dependencies.xml |
diffstat | 3 files changed, 128 insertions(+), 285 deletions(-) [+] |
line wrap: on
line diff
--- a/sam_pileup.py Tue Oct 13 12:30:34 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,146 +0,0 @@ -#!/usr/bin/env python - -""" -Creates a pileup file from a bam file and a reference. - -usage: %prog [options] - -p, --input1=p: bam file - -o, --output1=o: Output pileup - -R, --ref=R: Reference file type - -n, --ownFile=n: User-supplied fasta reference file - -b, --bamIndex=b: BAM index file - -g, --index=g: Path of the indexed reference genome - -s, --lastCol=s: Print the mapping quality as the last column - -i, --indels=i: Only output lines containing indels - -M, --mapCap=M: Cap mapping quality - -c, --consensus=c: Call the consensus sequence using MAQ consensu model - -T, --theta=T: Theta paramter (error dependency coefficient) - -N, --hapNum=N: Number of haplotypes in sample - -r, --fraction=r: Expected fraction of differences between a pair of haplotypes - -I, --phredProb=I: Phred probability of an indel in sequencing/prep - -""" - -import os, shutil, subprocess, sys, tempfile -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from bx.cookbook import doc_optparse - -def stop_err( msg ): - sys.stderr.write( '%s\n' % msg ) - sys.exit() - -def __main__(): - #Parse Command Line - options, args = doc_optparse.parse( __doc__ ) - # output version # of tool - try: - tmp = tempfile.NamedTemporaryFile().name - tmp_stdout = open( tmp, 'wb' ) - proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout ) - tmp_stdout.close() - returncode = proc.wait() - stdout = None - for line in open( tmp_stdout.name, 'rb' ): - if line.lower().find( 'version' ) >= 0: - stdout = line.strip() - break - if stdout: - sys.stdout.write( 'Samtools %s\n' % stdout ) - else: - raise Exception - except: - sys.stdout.write( 'Could not determine Samtools version\n' ) - #prepare file names - tmpDir = tempfile.mkdtemp() - tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir ) - tmpf0_name = tmpf0.name - tmpf0.close() - tmpf0bam_name = '%s.bam' % tmpf0_name - tmpf0bambai_name = '%s.bam.bai' % tmpf0_name - tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) - tmpf1_name = tmpf1.name - tmpf1.close() - #link bam and bam index to working directory (can't move because need to leave original) - os.symlink( options.input1, tmpf0bam_name ) - os.symlink( options.bamIndex, tmpf0bambai_name ) - #get parameters for pileup command - if options.lastCol == 'yes': - lastCol = '-s' - else: - lastCol = '' - if options.indels == 'yes': - indels = '-i' - else: - indels = '' - opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap ) - if options.consensus == 'yes': - opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb ) - #prepare basic pileup command - cmd = 'samtools pileup %s -f %s %s > %s' - try: - # have to nest try-except in try-finally to handle 2.4 - try: - #index reference if necessary and prepare pileup command - if options.ref == 'indexed': - if not os.path.exists( "%s.fai" % options.index ): - raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index - cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 ) - elif options.ref == 'history': - os.symlink( options.ownFile, tmpf1_name ) - cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) - tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - #did index succeed? - if returncode != 0: - raise Exception, 'Error creating index file\n' + stderr - cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 ) - #perform pileup command - tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - #did it succeed? - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - except Exception, e: - stop_err( 'Error running Samtools pileup tool\n' + str( e ) ) - finally: - #clean up temp files - if os.path.exists( tmpDir ): - shutil.rmtree( tmpDir ) - # check that there are results in the output file - if os.path.getsize( options.output1 ) > 0: - sys.stdout.write( 'Converted BAM to pileup' ) - else: - stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) - -if __name__ == "__main__" : __main__()
--- a/sam_pileup.xml Tue Oct 13 12:30:34 2015 -0400 +++ b/sam_pileup.xml Thu Feb 06 12:11:11 2020 +0000 @@ -1,124 +1,126 @@ -<tool id="sam_pileup" name="Generate pileup" version="1.1.2"> - <description>from BAM dataset</description> - <requirements> - <requirement type="package" version="0.1.16">samtools</requirement> - </requirements> - <command interpreter="python"> - sam_pileup.py - --input1=$input1 - --output=$output1 - --ref=$refOrHistory.reference - #if $refOrHistory.reference == "history": - --ownFile=$refOrHistory.ownFile - #else: - --index=${refOrHistory.index.fields.path} - #end if - --bamIndex=${input1.metadata.bam_index} - --lastCol=$lastCol - --indels=$indels - --mapCap=$mapCap - --consensus=$c.consensus - #if $c.consensus == "yes": - --theta=$c.theta - --hapNum=$c.hapNum - --fraction=$c.fraction - --phredProb=$c.phredProb - #else: - --theta="None" - --hapNum="None" - --fraction="None" - --phredProb="None" - #end if - </command> - <inputs> - <conditional name="refOrHistory"> - <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?"> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> - - </param> - <param name="index" type="select" label="Using reference genome"> - <options from_data_table="fasta_indexes"> - <filter type="data_meta" ref="input1" key="dbkey" column="1" /> - <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> - </options> +<tool id="sam_pileup" name="Generate pileup" version="1.1.3" profile="16.04"> + <description>from BAM dataset</description> + <requirements> + <requirement type="package" version="0.1.16">samtools</requirement> + </requirements> + <command><![CDATA[ +ln -s '$input1' input1.bam && +ln -s '${input1.metadata.bam_index}' 'input1.bam.bai' && +#if $refOrHistory.reference == 'history': + ln -s '$refOrHistory.ownFile' reference.fasta && + samtools faidx reference.fasta && +#end if +samtools pileup +#if $lastCol == 'yes': + -s +#end if +#if $indels == 'yes': + -i +#end if +-M $mapCap +#if $c.consensus == 'yes': + -c + -T $c.theta + -N $c.hapNum + -r $c.fraction + -I $c.phredProb +#end if +-f +#if $refOrHistory.reference == 'indexed': + '${refOrHistory.index.fields.path}' +#else: + reference.fasta +#end if +input1.bam +> '$output1' + ]]></command> + <inputs> + <conditional name="refOrHistory"> + <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + </param> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="input1" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when> + <when value="history"> + <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" /> + <param name="ownFile" argument="-f" type="data" format="fasta" label="Select a reference genome" /> + </when> + </conditional> + <param name="lastCol" argument="-s" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient"> + <option value="no">Do not print the mapping quality as the last column</option> + <option value="yes">Print the mapping quality as the last column</option> </param> - </when> - <when value="history"> - <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" /> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" /> - </when> - </conditional> - <param name="lastCol" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient"> - <option value="no">Do not print the mapping quality as the last column</option> - <option value="yes">Print the mapping quality as the last column</option> - </param> - <param name="indels" type="select" label="Whether or not to print only output pileup lines containing indels"> - <option value="no">Print all lines</option> - <option value="yes">Print only lines containing indels</option> - </param> - <param name="mapCap" type="integer" value="60" label="Where to cap mapping quality" /> - <conditional name="c"> - <param name="consensus" type="select" label="Call consensus according to MAQ model?"> - <option selected="true" value="no">No</option> - <option value="yes">Yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="theta" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" /> - <param name="hapNum" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" /> - <param name="fraction" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" /> - <param name="phredProb" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" /> - </when> - </conditional> - </inputs> - <outputs> - <data format="tabular" name="output1" label="${tool.name} on ${on_string}: converted pileup" /> - </outputs> - <tests> - <test> - <!-- - Bam to pileup command: - samtools faidx chr_m.fasta - samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup - chr_m.fasta is the prefix of the index - --> - <param name="reference" value="history" /> - <param name="input1" value="sam_pileup_in1.bam" ftype="bam" /> - <param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" /> - <param name="lastCol" value="no" /> - <param name="indels" value="no" /> - <param name="mapCap" value="60" /> - <param name="consensus" value="no" /> - <output name="output1" file="sam_pileup_out1.pileup" /> - </test> - <test> - <!-- - Bam to pileup command: - samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup - chr_m.fasta is the prefix of the index - --> - <param name="reference" value="indexed" /> - <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" /> - <param name="index" value="chr_m" /> - <param name="lastCol" value="no" /> - <param name="indels" value="no" /> - <param name="mapCap" value="60" /> - <param name="consensus" value="yes" /> - <param name="theta" value="0.85" /> - <param name="hapNum" value="2" /> - <param name="fraction" value="0.001" /> - <param name="phredProb" value="40" /> - <output name="output1" file="sam_pileup_out2.pileup" /> - </test> - </tests> - <help> - + <param name="indels" argument="-i" type="select" label="Whether or not to print only output pileup lines containing indels"> + <option value="no">Print all lines</option> + <option value="yes">Print only lines containing indels</option> + </param> + <param name="mapCap" argument="-M" type="integer" value="60" label="Where to cap mapping quality" /> + <conditional name="c"> + <param name="consensus" argument="-c" type="select" label="Call consensus according to MAQ model?"> + <option selected="true" value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="theta" argument="-T" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" /> + <param name="hapNum" argument="-N" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" /> + <param name="fraction" argument="-r" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" /> + <param name="phredProb" argument="-I" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="output1" format="tabular" label="${tool.name} on ${on_string}: converted pileup" /> + </outputs> + <tests> + <test> + <!-- + Bam to pileup command: + samtools faidx chr_m.fasta + samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup + chr_m.fasta is the prefix of the index + --> + <param name="reference" value="history" /> + <param name="input1" value="sam_pileup_in1.bam" ftype="bam" /> + <param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" /> + <param name="lastCol" value="no" /> + <param name="indels" value="no" /> + <param name="mapCap" value="60" /> + <param name="consensus" value="no" /> + <output name="output1" file="sam_pileup_out1.pileup" /> + </test> + <!-- + <test> + Bam to pileup command: + samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup + chr_m.fasta is the prefix of the index + <param name="reference" value="indexed" /> + <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" /> + <param name="index" value="chr_m" /> + <param name="lastCol" value="no" /> + <param name="indels" value="no" /> + <param name="mapCap" value="60" /> + <param name="consensus" value="yes" /> + <param name="theta" value="0.85" /> + <param name="hapNum" value="2" /> + <param name="fraction" value="0.001" /> + <param name="phredProb" value="40" /> + <output name="output1" file="sam_pileup_out2.pileup" /> + </test> + --> + </tests> + <help><![CDATA[ **What it does** Uses SAMTools_' pileup command to produce a pileup dataset from a provided BAM dataset. It generates two types of pileup datasets depending on the specified options. If *Call consensus according to MAQ model?* option is set to **No**, the tool produces simple pileup. If the option is set to **Yes**, a ten column pileup dataset with consensus is generated. Both types of datasets are briefly summarized below. @@ -141,7 +143,7 @@ chrM 413 G 4 ..t, IIIH chrM 414 C 4 ...a III2 chrM 415 C 4 TTTt III7 - + where:: Column Definition @@ -152,7 +154,7 @@ 4 Coverage (# reads aligning over that position) 5 Bases within reads where (see Galaxy wiki for more info) 6 Quality values (phred33 scale, see Galaxy wiki for more) - + **Ten column pileup** The `ten-column` (consensus_) pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command:: @@ -182,15 +184,8 @@ .. _consensus: http://samtools.sourceforge.net/cns0.shtml - ------- - -**Citation** - -For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ - - - </help> + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + </citations> </tool> - -
--- a/tool_dependencies.xml Tue Oct 13 12:30:34 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="samtools" version="0.1.16"> - <repository changeset_revision="cac9b701b6f8" name="package_samtools_0_1_16" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>