changeset 0:8447ba178b06

Init
author Saket Choudhary <saketkc@gmail.com>
date Wed, 20 Nov 2013 01:15:34 +0530
parents
children 386fa4776fde
files tools/oncodrivefm_tool/README.rst tools/oncodrivefm_tool/oncodrivefm_tool.py tools/oncodrivefm_tool/oncodrivefm_tool.xml tools/oncodrivefm_tool/tool_dependencies.xml
diffstat 4 files changed, 245 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/oncodrivefm_tool/README.rst	Wed Nov 20 01:15:34 2013 +0530
@@ -0,0 +1,34 @@
+Galaxy wrapper for the CHASM webservice at CRAVAT(v2.0)
+===================================================
+
+This tool is copyright 2013 by Saket Choudhary, Indian Institute of Technology Bombay
+All rights reserved. MIT licensed.
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Citations
+===========
+
+
+If you use this Galaxy tool in work leading to a scientific publication please cite:
+
+ Gonzalez-Perez A and Lopez-Bigas N. 2012. Functional impact bias reveals cancer drivers. Nucleic Acids Res., 10.1093/nar/gks743.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/oncodrivefm_tool/oncodrivefm_tool.py	Wed Nov 20 01:15:34 2013 +0530
@@ -0,0 +1,131 @@
+#!/usr/bin/python
+import argparse
+import sys
+import os
+import tempfile
+import shutil
+import subprocess
+import ntpath
+
+"""
+ -h, --help            show this help message and exit
+  -o PATH, --output-path PATH
+                        Directory where output files will be written
+  -n NAME               Analysis name
+  --output-format FORMAT
+                        The FORMAT for the output file
+  -N NUMBER, --samplings NUMBER
+                        Number of samplings to compute the FM bias pvalue
+  -e ESTIMATOR, --estimator ESTIMATOR
+                        Test estimator for computation.
+  --gt THRESHOLD, --gene-threshold THRESHOLD
+                        Minimum number of mutations per gene to compute the FM
+                        bias
+  --pt THRESHOLD, --pathway-threshold THRESHOLD
+                        Minimum number of mutations per pathway to compute the
+                        FM bias
+  -s SLICES, --slices SLICES
+                        Slices to process separated by commas
+  -m PATH, --mapping PATH
+                        File with mappings between genes and pathways to be
+                        analysed
+  --save-data           The input data matrix will be saved
+  --save-analysis       The analysis results will be saved
+  -j CORES, --cores CORES
+                        Number of cores to use for calculations. Default is 0
+                        that means all the available cores
+  -D KEY=VALUE          Define external parameters to be saved in the results
+  -L LEVEL, --log-level LEVEL
+                        Define log level: debug, info, warn, error, critical,
+                        notset
+"""
+def stop_err( msg ):
+    sys.stderr.write( '%s\n' % msg )
+    sys.exit()
+def main(params):
+    parser = argparse.ArgumentParser()
+    ##TAKEN directly from the source code
+    parser.add_argument("-N", "--samplings", dest="num_samplings", type=int, default=10000, metavar="NUMBER",
+                                        help="Number of samplings to compute the FM bias pvalue")
+    parser.add_argument("-e", "--estimator", dest="estimator", metavar="ESTIMATOR",
+                                        choices=["mean", "median"], default="mean",
+                                        help="Test estimator for computation.")
+    parser.add_argument("--gt", "--gene-threshold", dest="mut_gene_threshold", type=int, default=2, metavar="THRESHOLD",
+                                        help="Minimum number of mutations per gene to compute the FM bias")
+    parser.add_argument("--pt", "--pathway-threshold", dest="mut_pathway_threshold", type=int, default=10, metavar="THRESHOLD",
+                                        help="Minimum number of mutations per pathway to compute the FM bias")
+    parser.add_argument("-s", "--slices", dest="slices", metavar="SLICES",
+                                        help="Slices to process separated by commas")
+    parser.add_argument("-m", "--mapping", dest="mapping", metavar="PATH",
+                                        help="File with mappings between genes and pathways to be analysed")
+    parser.add_argument("-f", "--filter", dest="filter", metavar="PATH",
+                                        help="File containing the features to be filtered. By default labels are includes,"
+                                                    " labels preceded with - are excludes.")
+    #parser.add_argument("-o", "--output_path", type=str, required=True, help="Directory where output files will be written")
+    parser.add_argument("-o1", "--output1", type=str, dest="output1", required=True)
+
+    parser.add_argument("-o2", "--output2", type=str, dest="output2", required=False)
+    parser.add_argument("-n", "--analysis_name", type=str, required=False, help="Analysis name")
+    #parser.add_argument("-e", "--estimator", type=str, required=False, choices=["mean-empirical","median-empirical","mean-zscore","median-zscore"], help="Test estimator for computation")
+    parser.add_argument("--output-format", dest="output_format", required=False,
+                        metavar="FORMAT",
+                        choices=["tsv", "tsv.gz", "tsv.bz2"],
+                        default="tsv",
+                        help="The FORMAT for the output file")
+    parser.add_argument("-j", "--cores", dest="num_cores", type=int,
+                        metavar="CORES",
+                        help="Number of cores to use for calculations.\
+                        Default is 0 that means all the available cores")
+    parser.add_argument("-D", dest="defines", metavar="KEY=VALUE", action="append", help="Define external parameters to be saved in the results")
+    parser.add_argument("-L", "--log-level", dest="log_level", metavar="LEVEL", default=None,
+                        choices=["debug", "info", "warn", "error", "critical", "notset"],
+                        help="Define log level: debug, info, warn, error, critical, notset")
+    parser.add_argument("-i", "--input", dest="input_path", required=True, type=str, help="Path to input file")
+    args = vars(parser.parse_args(params))
+    try:
+        mapping_path = args["mapping_path"]
+    except KeyError:
+        mapping_path = "no_mapping_path"
+    #if mapping_path=="no_mapping_path":
+        #params.remove(mapping_path)
+        #params.remove("-m")
+    output_dir = tempfile.mkdtemp()
+    params.append("-o")
+    params.append(output_dir)
+    params.append(args["input_path"])
+    cmd = "oncodrivefm "
+    i=0
+    while i<len(params):
+        p=params[i]
+        if p=="-i" or p=="-o1" or p=="-o2":
+            i+=2
+        else:
+            i+=1
+            cmd += " "+p
+    cmd += " 2>&1 "
+    #tmp = tempfile.NamedTemporaryFile( dir=output_dir ).name
+    #tmp_stderr = open( tmp, 'wb' )
+    print cmd
+    proc = subprocess.Popen(args=cmd, shell=True)
+    returncode = proc.wait()
+    #tmp_stderr.close()
+
+    if args['analysis_name'] is not None:
+        prefix = args["analysis_name"]
+    else:
+        ##refer: http://stackoverflow.com/a/8384788/756986
+        prefix = ntpath.basename(args["input_path"]).split(".")[0]
+    if args["mapping"] is not None:
+        pathway_file = prefix+"-pathways"
+    else:
+        pathway_file = None
+    output_format = args["output_format"]
+    genes_output_file_name = os.path.join(output_dir, prefix+"-genes."+output_format)
+    shutil.move(genes_output_file_name,args["output1"])
+    if pathway_file:
+        pathway_output_file_name = os.path.join(output_dir, pathway_file+"."+output_format)
+        shutil.move(pathway_output_file_name,args["output2"])
+    if os.path.exists( output_dir ):
+        shutil.rmtree( output_dir )
+if __name__=="__main__":
+    main(sys.argv[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/oncodrivefm_tool/oncodrivefm_tool.xml	Wed Nov 20 01:15:34 2013 +0530
@@ -0,0 +1,73 @@
+<tool id="oncodrivefm_tool" name="OncodriveFM">
+    <description>OncodriveFM </description>
+    <tool_dependency>
+        <package name="oncodrivefm">
+            <install version="0.6-dev">
+                <actions>
+                    <action type="setup_virtualenv">distribute</action>
+                    <action type="setup_virtualenv">python-dateutil==2.1</action>
+                    <action type="setup_virtualenv">pytz==2013b</action>
+                    <action type="setup_virtualenv">numpy==1.7.1</action>
+                    <action type="setup_virtualenv">scipy==0.12.0</action>
+                    <action type="setup_virtualenv">pandas==0.12.0</action>
+                    <action type="setup_virtualenv">statsmodels==0.4.3</action>
+                    <action type="setup_virtualenv">https://bitbucket.org/bbglab/oncodrivefm/get/master.tar.gz</action>
+                </actions>
+            </install>
+            <readme>OncodriveFm avalable at https://bitbucket.org/bbglab/oncodrivefm/</readme>
+        </package>
+    </tool_dependency>
+
+    <command interpreter="python">
+        #if $mappingpathcondition.hasmapfile=="yes"
+            oncodrivefm_tool.py -N $samplings -e $estimator --gt $gene_threshold --pt $pathway_threshold -s $slices -m $mapping_path -o1 $output1 -o2 $output2 -i $input
+        #else
+            oncodrivefm_tool.py -N $samplings -e $estimator --gt $gene_threshold --pt $pathway_threshold -s $slices -o1 $output1 -o2 $output2 -i $input
+        #end if
+    </command>
+    <inputs>
+        <param name="input" format="tabular" type="data" label="Input" />
+        <param name="samplings" type="integer" label="Samplings" help="Number of samplings to compute the FM bias pvalue" optional="true" value="10000"/>
+
+        <param name="estimator" type="select" label="Estimator" help="Test estimator for computation" optional="true">
+            <option value="median">Median</option>
+            <option value="mean">Mean</option>
+        </param>
+
+
+        <param name="gene_threshold" type="integer" label="Gene Threshold" help="Minimum number of mutations per gene to compute the FM bias" optional="true" value="2"/>
+        <param name="pathway_threshold" type="integer" label="Pathway Threshold" help="Minimum number of mutations per pathway to compute the FM bias" optional="true" value="10" />
+        <param name="slices" type="text" label="Slices" help="Slices to process separated by commas(slices=SIFT,PPH2,MA)" optional="true" value="SIFT,PPH2,MA"/>
+        <conditional name="mappingpathcondition">
+            <param name="hasmapfile" type="select" label="Have a mapping gene file">
+                <option value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="yes">
+                <param name="mapping_path" type="data" format="tabular" label=" File with mappings between genes and pathways to be analysed"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+
+        <data format="tabular" name="output1"/>
+        <data format="tabular" name="output2" >
+            <filter>mappingpathcondition["hasmapfile"]=="yes"</filter>
+        </data>
+    </outputs>
+    <help>
+        **What it does**
+        Oncodrive-fm is an approach to uncover driver genes or gene modules.
+        It computes a metric of functional impact using three well-known methods (SIFT, PolyPhen2 and MutationAssessor)
+        and assesses how the functional impact of variants found in a gene across several tumor samples deviates from
+        a null distribution. It is thus based on the assumption that any bias towards the accumulation of variants
+        with high functional impact is an indication of positive selection and can thus be used to detect candidate
+        driver genes or gene modules.
+
+         **Citation**
+         If you use this Galaxy tool in work leading to a scientific publication please cite:
+
+         Gonzalez-Perez A and Lopez-Bigas N. 2012. Functional impact bias reveals cancer drivers. Nucleic Acids Res., 10.1093/nar/gks743.
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/oncodrivefm_tool/tool_dependencies.xml	Wed Nov 20 01:15:34 2013 +0530
@@ -0,0 +1,7 @@
+<?xml version='1.0' encoding='utf-8'?>
+<tool_dependency>
+     <package name="requests" version="2.0.1">
+         <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="package_requests_2_0" owner="saketkc"/>
+    </package>
+</tool_dependency>
+