Mercurial > repos > pjbriggs > macs21
changeset 2:15889783e759 draft
Fix bugs in tool operation and update dependencies.
author | pjbriggs |
---|---|
date | Thu, 12 Feb 2015 08:29:07 -0500 |
parents | d0986d2be693 |
children | 881a48588832 |
files | README.rst macs21_wrapper.py macs21_wrapper.xml tool_dependencies.xml |
diffstat | 4 files changed, 244 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Thu Feb 12 08:29:07 2015 -0500 @@ -0,0 +1,98 @@ +MACS21: Model-based Analysis of ChIP-Seq (MACS 2.1.0) +===================================================== + +Galaxy tool wrapper for the MACS 2.1.0 ChIP-seq peak calling program. MACS has been +developed by Tao Lui +https://github.com/taoliu/MACS/ + +The reference for MACS is: + +- Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers + RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. + 2008;9(9):R137 + +Automated installation +====================== + +Installation via the Galaxy Tool Shed will take of installing the tool wrapper and +the MACS 2.1.0 program. + +Manual Installation +=================== + +There are two files to install: + +- ``macs21_wrapper.xml`` (the Galaxy tool definition) +- ``macs21_wrapper.py.sh`` (the Python script wrapper) + +The suggested location is in a ``tools/macs21/`` folder. You will then +need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool +by adding the line: + + <tool file="macs21/macs21_wrapper.xml" /> + +You will also need to install MACS 2.1.0: + +- https://pypi.python.org/pypi/MACS2 + +and ensure that it's on your Galaxy user's ``PATH`` when running the tool. + +If you want to run the functional tests, copy the sample test files under +sample test files under Galaxy's ``test-data/`` directory. Then: + + ./run_tests.sh -id macs2_wrapper + +(However there are no tests defined at present.) + +History +======= + +This tool is based on the ``modencode-dcc`` MACS2 tool developed by Ziru Zhou +(ziruzhou@gmail.com), specifically the ``16:14f378e35191`` revision of the +tool at + +- http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2 + +It has been substantially modified both to adapt it to MACS 2.1.0, and to +re-implement the internal workings of the tool to conform with current practices +in invoking commands from Galaxy. + +========== ====================================================================== +Version Changes +---------- ---------------------------------------------------------------------- +2.1.0-2 - Add option to create bigWig file from bedGraphs; fix bug with -B + option. +2.1.0-1 - Initial version +========== ====================================================================== + + +Developers +========== + +This tool is developed on the following GitHub repository: +https://github.com/fls-bioinformatics-core/galaxy-tools/tree/master/macs21 + +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use +the ``package_macs21_wrapper.sh`` script. + + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE.
--- a/macs21_wrapper.py Thu Jan 29 11:11:21 2015 -0500 +++ b/macs21_wrapper.py Thu Feb 12 08:29:07 2015 -0500 @@ -53,6 +53,65 @@ fp.write( '\t'.join( fields ) ) fp.close() +def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file, + chrom_sizes,working_dir=None): + """Make bigWig file from a bedGraph + + The protocol is: + + $ fetchChromSizes.sh mm9 > mm9.chrom.sizes + $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped + $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw + + Get the binaries from + http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/ + + We skip the fetchChromSizes step if the 'chrom_sizes' + argument supplied a valid file with the chromosome sizes + for the genome build in question. + + """ + print "Generating bigWig from bedGraph..." + # Check for chromosome sizes + if not os.path.exists(chrom_sizes): + # Determine genome build + chrom_sizes = os.path.basename(chrom_sizes) + genome_build = chrom_sizes.split('.')[0] + print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build + # Run fetchChromSizes + chrom_sizes = os.path.join(working_dir,chrom_sizes) + stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr") + cmd = "fetchChromSizes %s" % genome_build + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, + stdout=open(chrom_sizes,'wb'), + stderr=open(stderr_file,'wb')) + proc.wait() + # Copy stderr from fetchChromSizes for information only + for line in open(stderr_file,'r'): + print line.strip() + # Check that the sizes file was downloaded + if not os.path.exists(chrom_sizes): + sys.stderr.write("Failed to download chrom sizes for '%s'" % genome_build) + sys.exit(1) + # Run bedClip + treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file) + cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped) + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) + proc.wait() + # Check that clipped file exists + treat_clipped = os.path.join(working_dir,treat_clipped) + if not os.path.exists(treat_clipped): + sys.stderr.write("Failed to create clipped bed file") + sys.exit(1) + # Run bedGraphToBigWig + cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes, + bigwig_file) + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) + proc.wait() + if __name__ == "__main__": # Echo the command line @@ -67,10 +126,14 @@ output_narrowpeaks = None output_treat_pileup = None output_lambda_bedgraph = None + output_bigwig = None output_xls_to_interval_peaks_file = None output_peaks = None output_bdgcmp = None + # Other initialisations + chrom_sizes_file = None + # Build the MACS 2.1 command line # Initial arguments are always the same: command & input ChIP-seq file name cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])] @@ -85,6 +148,9 @@ # Replace whitespace in name with underscores experiment_name = '_'.join(arg.split('=')[1].split()) cmdline.append("--name=%s" % experiment_name) + elif arg.startswith('--length='): + # Extract chromosome size file + chrom_sizes_file = arg.split('=')[1] elif arg.startswith('--output-'): # Handle destinations for output files arg0,filen = arg.split('=') @@ -104,6 +170,8 @@ output_treat_pileup = filen elif arg0 == '--output-lambda-bedgraph': output_lambda_bedgraph = filen + elif arg0 == '--output-bigwig': + output_bigwig = filen elif arg0 == '--output-xls-to-interval': output_xls_to_interval_peaks_file = filen elif arg0 == '--output-peaks': @@ -142,6 +210,13 @@ if os.path.exists(peaks_xls_file): convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file, header='peaks file') + + # Create bigWig from bedGraph, if requested + if output_bigwig is not None: + treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name) + if os.path.exists(treat_bedgraph_file): + make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig, + chrom_sizes_file,working_dir) # Move MACS2 output files from working dir to their final destinations move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits)
--- a/macs21_wrapper.xml Thu Jan 29 11:11:21 2015 -0500 +++ b/macs21_wrapper.xml Thu Feb 12 08:29:07 2015 -0500 @@ -1,7 +1,7 @@ -<tool id="macs2_1_peakcalling" name="MACS2.1.0" version="2.1.0-1"> +<tool id="macs2_1_peakcalling" name="MACS2.1.0" version="2.1.0-2"> <requirements> <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.8.1">numpy</requirement> + <requirement type="package" version="1.9">numpy</requirement> <requirement type="package" version="2.1.0.20140616">macs2</requirement> </requirements> <description>Model-based Analysis of ChIP-Seq</description> @@ -49,13 +49,15 @@ --pvalue=$major_command.pq_options.pvalue #end if ## + ## Bedgraph options + #if $major_command.bdg_options.bdg == True + -B $major_command.bdg_options.spmr + #end if + ## ## Advanced options #if str($major_command.advanced_options.advanced_options_selector) == 'on' --mfold $major_command.advanced_options.mfoldlo $major_command.advanced_options.mfoldhi $major_command.advanced_options.nolambda - #if $major_command.bdg_options.bdg == True - -B $major_command.bdg_options.spmr - #end if $major_command.advanced_options.call_summits #if str($major_command.advanced_options.keep_duplicates.keep_dup) == '' --keep-dup $major_command.advanced_options.keep_duplicates.maximum_tags @@ -64,7 +66,7 @@ #end if #else ## Defaults if advanced options not set - --mfold 5 50 --keep-dup 1 + --mfold 10 30 --keep-dup 1 #end if ## ## Output files @@ -82,8 +84,12 @@ ## ## Bedgraph outputs #if str($major_command.bdg_options.bdg) == 'True' - --output-pileup $output_treat_pileup_file + --output-pileup=$output_treat_pileup_file --output-lambda-bedgraph=$output_lambda_bedgraph_file + #if str($major_command.bdg_options.make_bigwig) == 'True' + --output-bigwig=$output_bigwig_file + --length=$GALAXY_DATA_INDEX_DIR/shared/ucsc/chrom/${major_command.input_chipseq_file1.dbkey}.len + #end if #end if ## ## XLS/interval output @@ -153,13 +159,17 @@ <conditional name="bdg_options"> <param name="bdg" - label="Save fragment pileup, control lambda, -log10pvalue/qvalue in bedGraph" + label="Save treatment and control lambda pileups in bedGraph" type="boolean" truevalue="-B" falsevalue="" checked="False" /> <when value="-B"> <param name="spmr" type="boolean" truevalue="--SPMR" falsevalue="" checked="False" label="Save signal per million reads for fragment pileup profiles" help="(--SPMR)" /> + <param name="make_bigwig" type="boolean" checked="True" + truevalue="True" falsevalue="" + label="Also generate bigWig file from bedGraph" + help="bigWig file can used in subsequent analyses e.g. CEAS" /> </when> <when value=""> <!-- Display nothing --> @@ -300,6 +310,12 @@ <filter>major_command['bdg_options']['bdg'] is True</filter> <filter>major_command['major_command_selector'] == 'callpeak'</filter> </data> + <data name="output_bigwig_file" format="bigwig" + label="${tool.name}: callpeak on ${on_string} (treat pileup: bigWig)"> + <filter>major_command['major_command_selector'] == 'callpeak'</filter> + <filter>major_command['bdg_options']['bdg'] is True</filter> + <filter>major_command['bdg_options']['make_bigwig'] is True</filter> + </data> <!--bdgcmp output--> <data name="output_bdgcmp_file" format="bdg" label="${tool.name}: bdgcmp on ${on_string} (bdg)">
--- a/tool_dependencies.xml Thu Jan 29 11:11:21 2015 -0500 +++ b/tool_dependencies.xml Thu Feb 12 08:29:07 2015 -0500 @@ -1,23 +1,60 @@ <?xml version="1.0"?> <tool_dependency> - <package name="python" version="2.7"> - <repository changeset_revision="2c52c900e56a" name="package_python_2_7" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <!-- Dependencies from local toolshed + <package name="numpy" version="1.9"> + <repository name="package_numpy_1_9" prior_installation_required="True" owner="pjbriggs" /> + </package> + --> + <!-- Dependencies from main/test toolsheds --> + <package name="numpy" version="1.9"> + <repository changeset_revision="266529386609" name="package_numpy_1_9" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> - <package name="numpy" version="1.8.1"> - <repository changeset_revision="0f9f634dec8a" name="package_numpy_1_8" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <!-- Subset of UCSC tools --> + <package name="ucsc_tools" version="1.0"> + <install version="1.0"> + <actions> + <!-- fetchChromSizes --> + <action type="download_binary"> + <url_template architecture="x86_64" os="linux">http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/fetchChromSizes</url_template> + </action> + <action type="chmod"> + <file mode="755">$INSTALL_DIR/fetchChromSizes</file> + </action> + <!-- bedClip --> + <action type="download_binary"> + <url_template architecture="x86_64" os="linux">http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bedClip</url_template> + </action> + <action type="chmod"> + <file mode="755">$INSTALL_DIR/bedClip</file> + </action> + <!-- bedGraphToBigWig --> + <action type="download_binary"> + <url_template architecture="x86_64" os="linux">http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bedGraphToBigWig</url_template> + </action> + <action type="chmod"> + <file mode="755">$INSTALL_DIR/bedGraphToBigWig</file> + </action> + </actions> + </install> </package> + <!-- MACS 2.1.0 --> <package name="macs2" version="2.1.0.20140616"> <install version="1.0"> <actions> <action type="download_by_url">https://pypi.python.org/packages/source/M/MACS2/MACS2-2.1.0.20140616.tar.gz</action> - <action type="set_environment_for_install"> - <repository changeset_revision="2c52c900e56a" name="package_python_2_7" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> - <package name="python" version="2.7" /> - </repository> - <repository changeset_revision="0f9f634dec8a" name="package_numpy_1_8" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> - <package name="numpy" version="1.8.1" /> + <!-- Install environment for local toolshed + <repository name="package_numpy_1_9" owner="pjbriggs"> + <package name="python_numpy" version="1.9" /> </repository> </action> + --> + <!-- Install environment for main & test toolsheds --> + <action type="set_environment_for_install"> + <repository changeset_revision="266529386609" name="package_numpy_1_9" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> + <package name="numpy" version="1.9" /> + </repository> + </action> + --> <action type="make_directory">$INSTALL_DIR/lib/python</action> <action type="shell_command"> export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python &&