changeset 2:15889783e759 draft

Fix bugs in tool operation and update dependencies.
author pjbriggs
date Thu, 12 Feb 2015 08:29:07 -0500
parents d0986d2be693
children 881a48588832
files README.rst macs21_wrapper.py macs21_wrapper.xml tool_dependencies.xml
diffstat 4 files changed, 244 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Thu Feb 12 08:29:07 2015 -0500
@@ -0,0 +1,98 @@
+MACS21: Model-based Analysis of ChIP-Seq (MACS 2.1.0)
+=====================================================
+
+Galaxy tool wrapper for the MACS 2.1.0 ChIP-seq peak calling program. MACS has been
+developed by Tao Lui
+https://github.com/taoliu/MACS/
+
+The reference for MACS is:
+
+- Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers
+  RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol.
+  2008;9(9):R137
+
+Automated installation
+======================
+
+Installation via the Galaxy Tool Shed will take of installing the tool wrapper and
+the MACS 2.1.0 program.
+
+Manual Installation
+===================
+
+There are two files to install:
+
+- ``macs21_wrapper.xml`` (the Galaxy tool definition)
+- ``macs21_wrapper.py.sh`` (the Python script wrapper)
+
+The suggested location is in a ``tools/macs21/`` folder. You will then
+need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool
+by adding the line:
+
+    <tool file="macs21/macs21_wrapper.xml" />
+
+You will also need to install MACS 2.1.0:
+
+- https://pypi.python.org/pypi/MACS2
+
+and ensure that it's on your Galaxy user's ``PATH`` when running the tool.
+
+If you want to run the functional tests, copy the sample test files under
+sample test files under Galaxy's ``test-data/`` directory. Then:
+
+    ./run_tests.sh -id macs2_wrapper
+
+(However there are no tests defined at present.)
+
+History
+=======
+
+This tool is based on the ``modencode-dcc`` MACS2 tool developed by Ziru Zhou
+(ziruzhou@gmail.com), specifically the ``16:14f378e35191`` revision of the
+tool at
+
+- http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2 
+
+It has been substantially modified both to adapt it to MACS 2.1.0, and to
+re-implement the internal workings of the tool to conform with current practices
+in invoking commands from Galaxy.
+
+========== ======================================================================
+Version    Changes
+---------- ----------------------------------------------------------------------
+2.1.0-2    - Add option to create bigWig file from bedGraphs; fix bug with -B
+             option.
+2.1.0-1    - Initial version
+========== ======================================================================
+
+
+Developers
+==========
+
+This tool is developed on the following GitHub repository:
+https://github.com/fls-bioinformatics-core/galaxy-tools/tree/master/macs21
+
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use
+the ``package_macs21_wrapper.sh`` script.
+
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/macs21_wrapper.py	Thu Jan 29 11:11:21 2015 -0500
+++ b/macs21_wrapper.py	Thu Feb 12 08:29:07 2015 -0500
@@ -53,6 +53,65 @@
             fp.write( '\t'.join( fields ) )
     fp.close()
 
+def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file,
+                              chrom_sizes,working_dir=None):
+    """Make bigWig file from a bedGraph
+
+    The protocol is:
+
+    $ fetchChromSizes.sh mm9 > mm9.chrom.sizes
+    $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped
+    $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw
+
+    Get the binaries from
+    http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/
+
+    We skip the fetchChromSizes step if the 'chrom_sizes'
+    argument supplied a valid file with the chromosome sizes
+    for the genome build in question.
+
+    """
+    print "Generating bigWig from bedGraph..."
+    # Check for chromosome sizes
+    if not os.path.exists(chrom_sizes):
+        # Determine genome build
+        chrom_sizes = os.path.basename(chrom_sizes)
+        genome_build = chrom_sizes.split('.')[0]
+        print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build
+        # Run fetchChromSizes
+        chrom_sizes = os.path.join(working_dir,chrom_sizes)
+        stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr")
+        cmd = "fetchChromSizes %s" % genome_build
+        print "Running %s" % cmd
+        proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir,
+                                stdout=open(chrom_sizes,'wb'),
+                                stderr=open(stderr_file,'wb'))
+        proc.wait()
+        # Copy stderr from fetchChromSizes for information only
+        for line in open(stderr_file,'r'):
+            print line.strip()
+        # Check that the sizes file was downloaded
+        if not os.path.exists(chrom_sizes):
+            sys.stderr.write("Failed to download chrom sizes for '%s'" % genome_build)
+            sys.exit(1)
+    # Run bedClip
+    treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file)
+    cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped)
+    print "Running %s" % cmd
+    proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
+    proc.wait()
+    # Check that clipped file exists
+    treat_clipped = os.path.join(working_dir,treat_clipped)
+    if not os.path.exists(treat_clipped):
+        sys.stderr.write("Failed to create clipped bed file")
+        sys.exit(1)
+    # Run bedGraphToBigWig
+    cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes,
+                                         bigwig_file)
+    print "Running %s" % cmd
+    proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
+    proc.wait()
+
 if __name__ == "__main__":
 
     # Echo the command line
@@ -67,10 +126,14 @@
     output_narrowpeaks = None
     output_treat_pileup = None
     output_lambda_bedgraph = None
+    output_bigwig = None
     output_xls_to_interval_peaks_file = None
     output_peaks = None
     output_bdgcmp = None
 
+    # Other initialisations
+    chrom_sizes_file = None
+
     # Build the MACS 2.1 command line
     # Initial arguments are always the same: command & input ChIP-seq file name
     cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])]
@@ -85,6 +148,9 @@
             # Replace whitespace in name with underscores
             experiment_name = '_'.join(arg.split('=')[1].split())
             cmdline.append("--name=%s" % experiment_name)
+        elif arg.startswith('--length='):
+            # Extract chromosome size file
+            chrom_sizes_file = arg.split('=')[1]
         elif arg.startswith('--output-'):
             # Handle destinations for output files
             arg0,filen = arg.split('=')
@@ -104,6 +170,8 @@
                 output_treat_pileup = filen
             elif  arg0 == '--output-lambda-bedgraph':
                 output_lambda_bedgraph = filen
+            elif  arg0 == '--output-bigwig':
+                output_bigwig = filen
             elif  arg0 == '--output-xls-to-interval':
                 output_xls_to_interval_peaks_file = filen
             elif  arg0 == '--output-peaks':
@@ -142,6 +210,13 @@
         if os.path.exists(peaks_xls_file):
             convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file,
                                     header='peaks file')
+
+    # Create bigWig from bedGraph, if requested
+    if output_bigwig is not None:
+        treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name)
+        if os.path.exists(treat_bedgraph_file):
+            make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig,
+                                      chrom_sizes_file,working_dir)
         
     # Move MACS2 output files from working dir to their final destinations
     move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits)
--- a/macs21_wrapper.xml	Thu Jan 29 11:11:21 2015 -0500
+++ b/macs21_wrapper.xml	Thu Feb 12 08:29:07 2015 -0500
@@ -1,7 +1,7 @@
-<tool id="macs2_1_peakcalling" name="MACS2.1.0" version="2.1.0-1">
+<tool id="macs2_1_peakcalling" name="MACS2.1.0" version="2.1.0-2">
   <requirements>
     <requirement type="package" version="2.7">python</requirement>
-    <requirement type="package" version="1.8.1">numpy</requirement>
+    <requirement type="package" version="1.9">numpy</requirement>
     <requirement type="package" version="2.1.0.20140616">macs2</requirement>
   </requirements>
   <description>Model-based Analysis of ChIP-Seq</description>
@@ -49,13 +49,15 @@
           --pvalue=$major_command.pq_options.pvalue
        #end if
        ##
+       ## Bedgraph options
+       #if $major_command.bdg_options.bdg == True
+          -B $major_command.bdg_options.spmr
+       #end if
+       ##
        ## Advanced options
        #if str($major_command.advanced_options.advanced_options_selector) == 'on'
           --mfold $major_command.advanced_options.mfoldlo $major_command.advanced_options.mfoldhi
           $major_command.advanced_options.nolambda
-	  #if $major_command.bdg_options.bdg == True
-             -B $major_command.bdg_options.spmr
-	  #end if
           $major_command.advanced_options.call_summits
           #if str($major_command.advanced_options.keep_duplicates.keep_dup) == ''
              --keep-dup $major_command.advanced_options.keep_duplicates.maximum_tags
@@ -64,7 +66,7 @@
           #end if
        #else
           ## Defaults if advanced options not set
-          --mfold 5 50 --keep-dup 1
+          --mfold 10 30 --keep-dup 1
        #end if
        ##
        ## Output files
@@ -82,8 +84,12 @@
        ##
        ## Bedgraph outputs
        #if str($major_command.bdg_options.bdg) == 'True'
-          --output-pileup $output_treat_pileup_file 
+          --output-pileup=$output_treat_pileup_file 
           --output-lambda-bedgraph=$output_lambda_bedgraph_file
+	  #if str($major_command.bdg_options.make_bigwig) == 'True'
+             --output-bigwig=$output_bigwig_file
+             --length=$GALAXY_DATA_INDEX_DIR/shared/ucsc/chrom/${major_command.input_chipseq_file1.dbkey}.len
+	  #end if
        #end if
        ##
        ## XLS/interval output
@@ -153,13 +159,17 @@
 
 	<conditional name="bdg_options">
 	  <param name="bdg"
-		 label="Save fragment pileup, control lambda, -log10pvalue/qvalue in bedGraph"
+		 label="Save treatment and control lambda pileups in bedGraph"
 		 type="boolean" truevalue="-B" falsevalue="" checked="False" />
 	  <when value="-B">
 	    <param name="spmr"
 		   type="boolean" truevalue="--SPMR" falsevalue="" checked="False"
 		   label="Save signal per million reads for fragment pileup profiles"
 		   help="(--SPMR)" />
+	    <param name="make_bigwig" type="boolean" checked="True"
+		   truevalue="True" falsevalue=""
+		   label="Also generate bigWig file from bedGraph"
+		   help="bigWig file can used in subsequent analyses e.g. CEAS" />
 	  </when>
 	  <when value="">
 	    <!-- Display nothing -->
@@ -300,6 +310,12 @@
       <filter>major_command['bdg_options']['bdg'] is True</filter>
       <filter>major_command['major_command_selector'] == 'callpeak'</filter>
     </data>
+    <data name="output_bigwig_file" format="bigwig"
+	  label="${tool.name}: callpeak on ${on_string} (treat pileup: bigWig)">
+      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
+      <filter>major_command['bdg_options']['bdg'] is True</filter>
+      <filter>major_command['bdg_options']['make_bigwig'] is True</filter>
+    </data>
     <!--bdgcmp output-->
     <data name="output_bdgcmp_file" format="bdg"
 	  label="${tool.name}: bdgcmp on ${on_string} (bdg)">
--- a/tool_dependencies.xml	Thu Jan 29 11:11:21 2015 -0500
+++ b/tool_dependencies.xml	Thu Feb 12 08:29:07 2015 -0500
@@ -1,23 +1,60 @@
 <?xml version="1.0"?>
 <tool_dependency>
-  <package name="python" version="2.7">
-    <repository changeset_revision="2c52c900e56a" name="package_python_2_7" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+  <!-- Dependencies from local toolshed
+  <package name="numpy" version="1.9">
+    <repository name="package_numpy_1_9" prior_installation_required="True" owner="pjbriggs" />
+  </package>
+  -->
+  <!-- Dependencies from main/test toolsheds -->
+  <package name="numpy" version="1.9">
+    <repository changeset_revision="266529386609" name="package_numpy_1_9" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
   </package>
-  <package name="numpy" version="1.8.1">
-    <repository changeset_revision="0f9f634dec8a" name="package_numpy_1_8" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+  <!-- Subset of UCSC tools -->
+  <package name="ucsc_tools" version="1.0">
+      <install version="1.0">
+            <actions>
+	      <!-- fetchChromSizes -->
+              <action type="download_binary">
+                <url_template architecture="x86_64" os="linux">http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/fetchChromSizes</url_template>
+              </action>
+              <action type="chmod">
+                <file mode="755">$INSTALL_DIR/fetchChromSizes</file>
+              </action>
+	      <!-- bedClip -->
+              <action type="download_binary">
+                <url_template architecture="x86_64" os="linux">http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bedClip</url_template>
+              </action>
+              <action type="chmod">
+                <file mode="755">$INSTALL_DIR/bedClip</file>
+              </action>
+	      <!-- bedGraphToBigWig -->
+              <action type="download_binary">
+                <url_template architecture="x86_64" os="linux">http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bedGraphToBigWig</url_template>
+              </action>
+              <action type="chmod">
+                <file mode="755">$INSTALL_DIR/bedGraphToBigWig</file>
+              </action>
+	    </actions>
+      </install>
   </package>
+  <!-- MACS 2.1.0 -->
   <package name="macs2" version="2.1.0.20140616">
       <install version="1.0">
             <actions>
               <action type="download_by_url">https://pypi.python.org/packages/source/M/MACS2/MACS2-2.1.0.20140616.tar.gz</action>
-              <action type="set_environment_for_install">
-		<repository changeset_revision="2c52c900e56a" name="package_python_2_7" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu">
-		  <package name="python" version="2.7" />
-		</repository>
-		<repository changeset_revision="0f9f634dec8a" name="package_numpy_1_8" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu">
-		  <package name="numpy" version="1.8.1" />
+	      <!-- Install environment for local toolshed
+		<repository name="package_numpy_1_9" owner="pjbriggs">
+		  <package name="python_numpy" version="1.9" />
 		</repository>
 	      </action>
+	      -->
+	      <!-- Install environment for main & test toolsheds -->
+              <action type="set_environment_for_install">
+		<repository changeset_revision="266529386609" name="package_numpy_1_9" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu">
+		  <package name="numpy" version="1.9" />
+		</repository>
+	      </action>
+	      --&gt;
               <action type="make_directory">$INSTALL_DIR/lib/python</action>
               <action type="shell_command">
                 export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python &amp;&amp;