Mercurial > repos > devteam > sam_pileup

--- a/sam_pileup.py	Tue Oct 13 12:30:34 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,146 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Creates a pileup file from a bam file and a reference.
-
-usage: %prog [options]
-   -p, --input1=p: bam file
-   -o, --output1=o: Output pileup
-   -R, --ref=R: Reference file type
-   -n, --ownFile=n: User-supplied fasta reference file
-   -b, --bamIndex=b: BAM index file
-   -g, --index=g: Path of the indexed reference genome
-   -s, --lastCol=s: Print the mapping quality as the last column
-   -i, --indels=i: Only output lines containing indels
-   -M, --mapCap=M: Cap mapping quality
-   -c, --consensus=c: Call the consensus sequence using MAQ consensu model
-   -T, --theta=T: Theta paramter (error dependency coefficient)
-   -N, --hapNum=N: Number of haplotypes in sample
-   -r, --fraction=r: Expected fraction of differences between a pair of haplotypes
-   -I, --phredProb=I: Phred probability of an indel in sequencing/prep
-
-"""
-
-import os, shutil, subprocess, sys, tempfile
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    options, args = doc_optparse.parse( __doc__ )
-    # output version # of tool
-    try:
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( 'Samtools %s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Samtools version\n' )
-    #prepare file names
-    tmpDir = tempfile.mkdtemp()
-    tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir )
-    tmpf0_name = tmpf0.name
-    tmpf0.close()
-    tmpf0bam_name = '%s.bam' % tmpf0_name
-    tmpf0bambai_name = '%s.bam.bai' % tmpf0_name
-    tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir )
-    tmpf1_name = tmpf1.name
-    tmpf1.close()
-    #link bam and bam index to working directory (can't move because need to leave original)
-    os.symlink( options.input1, tmpf0bam_name )
-    os.symlink( options.bamIndex, tmpf0bambai_name )
-    #get parameters for pileup command
-    if options.lastCol == 'yes':
-        lastCol = '-s'
-    else:
-        lastCol = ''
-    if options.indels == 'yes':
-        indels = '-i'
-    else:
-        indels = ''
-    opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap )
-    if options.consensus == 'yes':
-        opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb )
-    #prepare basic pileup command
-    cmd = 'samtools pileup %s -f %s %s > %s'
-    try:
-        # have to nest try-except in try-finally to handle 2.4
-        try:
-            #index reference if necessary and prepare pileup command
-            if options.ref == 'indexed':
-                if not os.path.exists( "%s.fai" % options.index ):
-                    raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index
-                cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 )
-            elif options.ref == 'history':
-                os.symlink( options.ownFile, tmpf1_name )
-                cmdIndex = 'samtools faidx %s' % ( tmpf1_name )
-                tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
-                tmp_stderr = open( tmp, 'wb' )
-                proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
-                returncode = proc.wait()
-                tmp_stderr.close()
-                # get stderr, allowing for case where it's very large
-                tmp_stderr = open( tmp, 'rb' )
-                stderr = ''
-                buffsize = 1048576
-                try:
-                    while True:
-                        stderr += tmp_stderr.read( buffsize )
-                        if not stderr or len( stderr ) % buffsize != 0:
-                            break
-                except OverflowError:
-                    pass
-                tmp_stderr.close()
-                #did index succeed?
-                if returncode != 0:
-                    raise Exception, 'Error creating index file\n' + stderr
-                cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 )
-            #perform pileup command
-            tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
-            tmp_stderr = open( tmp, 'wb' )
-            proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            #did it succeed?
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-        except Exception, e:
-            stop_err( 'Error running Samtools pileup tool\n' + str( e ) )
-    finally:
-        #clean up temp files
-        if os.path.exists( tmpDir ):
-            shutil.rmtree( tmpDir )
-    # check that there are results in the output file
-    if os.path.getsize( options.output1 ) > 0:
-        sys.stdout.write( 'Converted BAM to pileup' )
-    else:
-        stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
-
-if __name__ == "__main__" : __main__()
--- a/sam_pileup.xml	Tue Oct 13 12:30:34 2015 -0400
+++ b/sam_pileup.xml	Thu Feb 06 12:11:11 2020 +0000
@@ -1,124 +1,126 @@
-<tool id="sam_pileup" name="Generate pileup" version="1.1.2">
-  <description>from BAM dataset</description>
-  <requirements>
-    <requirement type="package" version="0.1.16">samtools</requirement>
-  </requirements>
-  <command interpreter="python">
-    sam_pileup.py
-      --input1=$input1
-      --output=$output1
-      --ref=$refOrHistory.reference
-      #if $refOrHistory.reference == "history":
-        --ownFile=$refOrHistory.ownFile
-      #else:
-        --index=${refOrHistory.index.fields.path}
-      #end if
-       --bamIndex=${input1.metadata.bam_index}
-       --lastCol=$lastCol
-       --indels=$indels
-       --mapCap=$mapCap
-       --consensus=$c.consensus
-      #if $c.consensus == "yes":
-        --theta=$c.theta
-        --hapNum=$c.hapNum
-        --fraction=$c.fraction
-        --phredProb=$c.phredProb
-       #else:
-        --theta="None"
-        --hapNum="None"
-        --fraction="None"
-        --phredProb="None"
-      #end if
-  </command>
-  <inputs>
-    <conditional name="refOrHistory">
-      <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for">
-           <validator type="unspecified_build" />
-           <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." />
-
-        </param>
-        <param name="index" type="select" label="Using reference genome">
-          <options from_data_table="fasta_indexes">
-            <filter type="data_meta" ref="input1" key="dbkey" column="1" />
-            <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
-          </options>
+<tool id="sam_pileup" name="Generate pileup" version="1.1.3" profile="16.04">
+    <description>from BAM dataset</description>
+    <requirements>
+        <requirement type="package" version="0.1.16">samtools</requirement>
+    </requirements>
+    <command><![CDATA[
+ln -s '$input1' input1.bam &&
+ln -s '${input1.metadata.bam_index}' 'input1.bam.bai' &&
+#if $refOrHistory.reference == 'history':
+    ln -s '$refOrHistory.ownFile' reference.fasta &&
+    samtools faidx reference.fasta &&
+#end if
+samtools pileup
+#if $lastCol == 'yes':
+    -s
+#end if
+#if $indels == 'yes':
+    -i
+#end if
+-M $mapCap
+#if $c.consensus == 'yes':
+    -c
+    -T $c.theta
+    -N $c.hapNum
+    -r $c.fraction
+    -I $c.phredProb
+#end if
+-f
+#if $refOrHistory.reference == 'indexed':
+    '${refOrHistory.index.fields.path}'
+#else:
+    reference.fasta
+#end if
+input1.bam
+> '$output1'
+    ]]></command>
+    <inputs>
+        <conditional name="refOrHistory">
+            <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?">
+                <option value="indexed">Use a built-in index</option>
+                <option value="history">Use one from the history</option>
+            </param>
+            <when value="indexed">
+                <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for">
+                    <validator type="unspecified_build" />
+                    <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." />
+                </param>
+                <param name="index" type="select" label="Using reference genome">
+                <options from_data_table="fasta_indexes">
+                    <filter type="data_meta" ref="input1" key="dbkey" column="1" />
+                    <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
+                </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" />
+                <param name="ownFile" argument="-f" type="data" format="fasta" label="Select a reference genome" />
+            </when>
+        </conditional>
+        <param name="lastCol" argument="-s" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient">
+            <option value="no">Do not print the mapping quality as the last column</option>
+            <option value="yes">Print the mapping quality as the last column</option>
         </param>
-      </when>
-      <when value="history">
-        <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" />
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" />
-      </when>
-    </conditional>
-    <param name="lastCol" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient">
-      <option value="no">Do not print the mapping quality as the last column</option>
-      <option value="yes">Print the mapping quality as the last column</option>
-    </param>
-    <param name="indels" type="select" label="Whether or not to print only output pileup lines containing indels">
-      <option value="no">Print all lines</option>
-      <option value="yes">Print only lines containing indels</option>
-    </param>
-    <param name="mapCap" type="integer" value="60" label="Where to cap mapping quality" />
-    <conditional name="c">
-      <param name="consensus" type="select" label="Call consensus according to MAQ model?">
-        <option selected="true" value="no">No</option>
-        <option value="yes">Yes</option>
-      </param>
-      <when value="no" />
-      <when value="yes">
-        <param name="theta" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" />
-        <param name="hapNum" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" />
-        <param name="fraction" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" />
-        <param name="phredProb" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" />
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output1" label="${tool.name} on ${on_string}: converted pileup" />
-  </outputs>
-  <tests>
-    <test>
-      <!--
-      Bam to pileup command:
-      samtools faidx chr_m.fasta
-      samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup
-      chr_m.fasta is the prefix of the index
-      -->
-      <param name="reference" value="history" />
-      <param name="input1" value="sam_pileup_in1.bam" ftype="bam" />
-      <param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
-      <param name="lastCol" value="no" />
-      <param name="indels" value="no" />
-      <param name="mapCap" value="60" />
-      <param name="consensus" value="no" />
-      <output name="output1" file="sam_pileup_out1.pileup" />
-    </test>
-    <test>
-      <!--
-      Bam to pileup command:
-      samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup
-      chr_m.fasta is the prefix of the index
-      -->
-      <param name="reference" value="indexed" />
-      <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" />
-      <param name="index" value="chr_m" />
-      <param name="lastCol" value="no" />
-      <param name="indels" value="no" />
-      <param name="mapCap" value="60" />
-      <param name="consensus" value="yes" />
-      <param name="theta" value="0.85" />
-      <param name="hapNum" value="2" />
-      <param name="fraction" value="0.001" />
-      <param name="phredProb" value="40" />
-      <output name="output1" file="sam_pileup_out2.pileup" />
-    </test>
-  </tests>
-  <help>
-
+        <param name="indels" argument="-i" type="select" label="Whether or not to print only output pileup lines containing indels">
+            <option value="no">Print all lines</option>
+            <option value="yes">Print only lines containing indels</option>
+        </param>
+        <param name="mapCap" argument="-M" type="integer" value="60" label="Where to cap mapping quality" />
+        <conditional name="c">
+            <param name="consensus" argument="-c" type="select" label="Call consensus according to MAQ model?">
+                <option selected="true" value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no" />
+            <when value="yes">
+                <param name="theta" argument="-T" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" />
+                <param name="hapNum" argument="-N" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" />
+                <param name="fraction" argument="-r" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" />
+                <param name="phredProb" argument="-I" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="${tool.name} on ${on_string}: converted pileup" />
+    </outputs>
+    <tests>
+        <test>
+            <!--
+            Bam to pileup command:
+            samtools faidx chr_m.fasta
+            samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup
+            chr_m.fasta is the prefix of the index
+            -->
+            <param name="reference" value="history" />
+            <param name="input1" value="sam_pileup_in1.bam" ftype="bam" />
+            <param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
+            <param name="lastCol" value="no" />
+            <param name="indels" value="no" />
+            <param name="mapCap" value="60" />
+            <param name="consensus" value="no" />
+            <output name="output1" file="sam_pileup_out1.pileup" />
+        </test>
+        <!--
+        <test>
+            Bam to pileup command:
+            samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup
+            chr_m.fasta is the prefix of the index
+            <param name="reference" value="indexed" />
+            <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" />
+            <param name="index" value="chr_m" />
+            <param name="lastCol" value="no" />
+            <param name="indels" value="no" />
+            <param name="mapCap" value="60" />
+            <param name="consensus" value="yes" />
+            <param name="theta" value="0.85" />
+            <param name="hapNum" value="2" />
+            <param name="fraction" value="0.001" />
+            <param name="phredProb" value="40" />
+            <output name="output1" file="sam_pileup_out2.pileup" />
+        </test>
+        -->
+    </tests>
+    <help><![CDATA[
 **What it does**

 Uses SAMTools_' pileup command to produce a pileup dataset from a provided BAM dataset. It generates two types of pileup datasets depending on the specified options. If *Call consensus according to MAQ model?* option is set to **No**, the tool produces simple pileup. If the option is set to **Yes**, a ten column pileup dataset with consensus is generated. Both types of datasets are briefly summarized below.
@@ -141,7 +143,7 @@
  chrM  413  G  4     ..t,     IIIH
  chrM  414  C  4     ...a     III2
  chrM  415  C  4     TTTt     III7
-
+
 where::

   Column Definition
@@ -152,7 +154,7 @@
        4 Coverage (# reads aligning over that position)
        5 Bases within reads where (see Galaxy wiki for more info)
        6 Quality values (phred33 scale, see Galaxy wiki for more)
-
+
 **Ten column pileup**

 The `ten-column` (consensus_) pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command::
@@ -182,15 +184,8 @@


 .. _consensus: http://samtools.sourceforge.net/cns0.shtml
-
-------
-
-**Citation**
-
-For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505943&gt;`_
-
-
-  </help>
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btp352</citation>
+    </citations>
 </tool>
-
-
--- a/tool_dependencies.xml	Tue Oct 13 12:30:34 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="samtools" version="0.1.16">
-        <repository changeset_revision="cac9b701b6f8" name="package_samtools_0_1_16" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>