changeset 0:bc71417f212c

Uploaded
author g2cmnty@test-web1.g2.bx.psu.edu
date Fri, 17 Jun 2011 15:25:25 -0400
parents
children 2f3ae18f13ed
files bam_to_bigwig/README.txt bam_to_bigwig/bam_to_bigwig.xml bam_to_bigwig/bam_to_wiggle.py
diffstat 3 files changed, 166 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam_to_bigwig/README.txt	Fri Jun 17 15:25:25 2011 -0400
@@ -0,0 +1,18 @@
+Convert a BAM file into a BigWig coverage file. This can be used directly from 
+Galaxy for display at UCSC. The advantage over standard Wiggle format is that 
+the data is stored in a compressed format and can be retrieved by genome
+region. This allows you to view regions of arbitrarily large Wiggle file data
+at UCSC while avoiding the upload costs.
+
+The latest version of the bam_to_wiggle.py script is available at:
+
+https://github.com/chapmanb/bcbb/blob/master/nextgen/scripts/bam_to_wiggle.py
+
+Place the script in the same directory as the XML configuration file, or
+provide a soft link to it.
+
+This requires:
+
+Python2, version 2.6 or better
+pysam (http://code.google.com/p/pysam/)
+wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam_to_bigwig/bam_to_bigwig.xml	Fri Jun 17 15:25:25 2011 -0400
@@ -0,0 +1,25 @@
+<tool id="bam_to_bigwig" name="BAM to BigWig" version="0.0.2">
+  <description>Calculates coverage from a BAM alignment file</description>
+  <command interpreter="python">bam_to_wiggle.py $align --outfile=$out</command>
+  <inputs>
+    <param format="bam" name="align" type="data" label="BAM alignment file"/>
+  </inputs>
+  <outputs>
+    <data format="bigwig" name="out" />
+  </outputs>
+
+<help>
+**What it does**
+
+Creates a coverage file in BigWig format, given a BAM alignment file. 
+
+**Input**
+
+A BAM alignment file. This needs to have the genome database build used in alignment annotated. If your file has '?' for the database build, click on the pencil icon to edit the alignment attributes, and specify the organism used to align against.
+
+**Output**
+
+BigWig files can be loaded directly from Galaxy into the UCSC browser. They can be loaded incrementally by UCSC, so a single file can be used to represent the entire genome without having to upload the entire thing as a custom track.
+</help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam_to_bigwig/bam_to_wiggle.py	Fri Jun 17 15:25:25 2011 -0400
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+"""Convert BAM files to BigWig file format in a specified region.
+
+Usage:
+    bam_to_wiggle.py <BAM file> [<YAML config>]
+    [--outfile=<output file name>
+     --chrom=<chrom>
+     --start=<start>
+     --end=<end>]
+
+chrom start and end are optional, in which case they default to everything.
+
+The config file is in YAML format and specifies the location of the wigToBigWig
+program from UCSC:
+
+program:
+  ucsc_bigwig: wigToBigWig
+
+If not specified, these will be assumed to be present in the system path.
+
+The script requires:
+    pysam (http://code.google.com/p/pysam/)
+    wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
+If a configuration file is used, then PyYAML is also required (http://pyyaml.org/)
+"""
+import os
+import sys
+import subprocess
+import tempfile
+from optparse import OptionParser
+from contextlib import contextmanager, closing
+
+import pysam
+
+def main(bam_file, config_file=None, chrom='all', start=0, end=None,
+         outfile=None):
+    if config_file:
+        import yaml
+        with open(config_file) as in_handle:
+            config = yaml.load(in_handle)
+    else:
+        config = {"program": {"ucsc_bigwig" : "wigToBigWig"}}
+    if outfile is None:
+        outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
+    if start > 0:
+        start = int(start) - 1
+    if end is not None:
+        end = int(end)
+    regions = [(chrom, start, end)]
+    if os.path.abspath(bam_file) == os.path.abspath(outfile):
+        sys.stderr.write("Bad arguments, input and output files are the same.\n")
+        sys.exit(1)
+    if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0):
+        #Use a temp file to avoid any possiblity of not having write permission
+        out_handle = tempfile.NamedTemporaryFile(delete=False)
+        wig_file = out_handle.name
+        with closing(out_handle):
+            chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle)
+        try:
+            if wig_valid:
+                convert_to_bigwig(wig_file, chr_sizes, config, outfile)
+        finally:
+            os.remove(wig_file)
+
+@contextmanager
+def indexed_bam(bam_file, config):
+    if not os.path.exists(bam_file + ".bai"):
+        pysam.index(bam_file)
+    sam_reader = pysam.Samfile(bam_file, "rb")
+    yield sam_reader
+    sam_reader.close()
+
+def write_bam_track(bam_file, regions, config, out_handle):
+    out_handle.write("track %s\n" % " ".join(["type=wiggle_0",
+        "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0],
+        "visibility=full",
+        ]))
+    is_valid = False
+    with indexed_bam(bam_file, config) as work_bam:
+        sizes = zip(work_bam.references, work_bam.lengths)
+        if len(regions) == 1 and regions[0][0] == "all":
+            regions = [(name, 0, length) for name, length in sizes]
+        for chrom, start, end in regions:
+            if end is None and chrom in work_bam.references:
+                end = work_bam.lengths[work_bam.references.index(chrom)]
+            assert end is not None, "Could not find %s in header" % chrom
+            out_handle.write("variableStep chrom=%s\n" % chrom)
+            for col in work_bam.pileup(chrom, start, end):
+                out_handle.write("%s %s\n" % (col.pos+1, col.n))
+                is_valid = True
+    return sizes, is_valid
+
+def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None):
+    if not bw_file:
+        bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0])
+    size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0])
+    with open(size_file, "w") as out_handle:
+        for chrom, size in chr_sizes:
+            out_handle.write("%s\t%s\n" % (chrom, size))
+    try:
+        cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file]
+        subprocess.check_call(cl)
+    finally:
+        os.remove(size_file)
+    return bw_file
+
+if __name__ == "__main__":
+    parser = OptionParser()
+    parser.add_option("-o", "--outfile", dest="outfile")
+    parser.add_option("-c", "--chrom", dest="chrom")
+    parser.add_option("-s", "--start", dest="start")
+    parser.add_option("-e", "--end", dest="end")
+    (options, args) = parser.parse_args()
+    if len(args) not in [1, 2]:
+        print "Incorrect arguments"
+        print __doc__
+        sys.exit()
+    kwargs = dict(
+        outfile=options.outfile,
+        chrom=options.chrom or 'all',
+        start=options.start or 0,
+        end=options.end)
+    main(*args, **kwargs)