annotate tools/oncodrivefm_tool/oncodrivefm_tool.py @ 2:09f9829f1400

tag
author Saket Choudhary <saketkc@gmail.com>
date Wed, 20 Nov 2013 01:47:49 +0530
parents 8447ba178b06
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
1 #!/usr/bin/python
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
2 import argparse
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
3 import sys
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
4 import os
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
5 import tempfile
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
6 import shutil
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
7 import subprocess
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
8 import ntpath
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
9
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
10 """
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
11 -h, --help show this help message and exit
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
12 -o PATH, --output-path PATH
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
13 Directory where output files will be written
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
14 -n NAME Analysis name
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
15 --output-format FORMAT
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
16 The FORMAT for the output file
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
17 -N NUMBER, --samplings NUMBER
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
18 Number of samplings to compute the FM bias pvalue
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
19 -e ESTIMATOR, --estimator ESTIMATOR
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
20 Test estimator for computation.
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
21 --gt THRESHOLD, --gene-threshold THRESHOLD
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
22 Minimum number of mutations per gene to compute the FM
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
23 bias
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
24 --pt THRESHOLD, --pathway-threshold THRESHOLD
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
25 Minimum number of mutations per pathway to compute the
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
26 FM bias
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
27 -s SLICES, --slices SLICES
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
28 Slices to process separated by commas
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
29 -m PATH, --mapping PATH
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
30 File with mappings between genes and pathways to be
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
31 analysed
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
32 --save-data The input data matrix will be saved
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
33 --save-analysis The analysis results will be saved
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
34 -j CORES, --cores CORES
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
35 Number of cores to use for calculations. Default is 0
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
36 that means all the available cores
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
37 -D KEY=VALUE Define external parameters to be saved in the results
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
38 -L LEVEL, --log-level LEVEL
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
39 Define log level: debug, info, warn, error, critical,
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
40 notset
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
41 """
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
42 def stop_err( msg ):
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
43 sys.stderr.write( '%s\n' % msg )
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
44 sys.exit()
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
45 def main(params):
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
46 parser = argparse.ArgumentParser()
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
47 ##TAKEN directly from the source code
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
48 parser.add_argument("-N", "--samplings", dest="num_samplings", type=int, default=10000, metavar="NUMBER",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
49 help="Number of samplings to compute the FM bias pvalue")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
50 parser.add_argument("-e", "--estimator", dest="estimator", metavar="ESTIMATOR",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
51 choices=["mean", "median"], default="mean",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
52 help="Test estimator for computation.")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
53 parser.add_argument("--gt", "--gene-threshold", dest="mut_gene_threshold", type=int, default=2, metavar="THRESHOLD",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
54 help="Minimum number of mutations per gene to compute the FM bias")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
55 parser.add_argument("--pt", "--pathway-threshold", dest="mut_pathway_threshold", type=int, default=10, metavar="THRESHOLD",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
56 help="Minimum number of mutations per pathway to compute the FM bias")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
57 parser.add_argument("-s", "--slices", dest="slices", metavar="SLICES",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
58 help="Slices to process separated by commas")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
59 parser.add_argument("-m", "--mapping", dest="mapping", metavar="PATH",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
60 help="File with mappings between genes and pathways to be analysed")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
61 parser.add_argument("-f", "--filter", dest="filter", metavar="PATH",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
62 help="File containing the features to be filtered. By default labels are includes,"
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
63 " labels preceded with - are excludes.")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
64 #parser.add_argument("-o", "--output_path", type=str, required=True, help="Directory where output files will be written")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
65 parser.add_argument("-o1", "--output1", type=str, dest="output1", required=True)
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
66
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
67 parser.add_argument("-o2", "--output2", type=str, dest="output2", required=False)
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
68 parser.add_argument("-n", "--analysis_name", type=str, required=False, help="Analysis name")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
69 #parser.add_argument("-e", "--estimator", type=str, required=False, choices=["mean-empirical","median-empirical","mean-zscore","median-zscore"], help="Test estimator for computation")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
70 parser.add_argument("--output-format", dest="output_format", required=False,
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
71 metavar="FORMAT",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
72 choices=["tsv", "tsv.gz", "tsv.bz2"],
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
73 default="tsv",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
74 help="The FORMAT for the output file")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
75 parser.add_argument("-j", "--cores", dest="num_cores", type=int,
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
76 metavar="CORES",
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
77 help="Number of cores to use for calculations.\
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
78 Default is 0 that means all the available cores")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
79 parser.add_argument("-D", dest="defines", metavar="KEY=VALUE", action="append", help="Define external parameters to be saved in the results")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
80 parser.add_argument("-L", "--log-level", dest="log_level", metavar="LEVEL", default=None,
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
81 choices=["debug", "info", "warn", "error", "critical", "notset"],
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
82 help="Define log level: debug, info, warn, error, critical, notset")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
83 parser.add_argument("-i", "--input", dest="input_path", required=True, type=str, help="Path to input file")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
84 args = vars(parser.parse_args(params))
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
85 try:
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
86 mapping_path = args["mapping_path"]
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
87 except KeyError:
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
88 mapping_path = "no_mapping_path"
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
89 #if mapping_path=="no_mapping_path":
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
90 #params.remove(mapping_path)
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
91 #params.remove("-m")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
92 output_dir = tempfile.mkdtemp()
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
93 params.append("-o")
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
94 params.append(output_dir)
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
95 params.append(args["input_path"])
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
96 cmd = "oncodrivefm "
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
97 i=0
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
98 while i<len(params):
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
99 p=params[i]
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
100 if p=="-i" or p=="-o1" or p=="-o2":
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
101 i+=2
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
102 else:
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
103 i+=1
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
104 cmd += " "+p
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
105 cmd += " 2>&1 "
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
106 #tmp = tempfile.NamedTemporaryFile( dir=output_dir ).name
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
107 #tmp_stderr = open( tmp, 'wb' )
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
108 print cmd
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
109 proc = subprocess.Popen(args=cmd, shell=True)
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
110 returncode = proc.wait()
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
111 #tmp_stderr.close()
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
112
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
113 if args['analysis_name'] is not None:
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
114 prefix = args["analysis_name"]
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
115 else:
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
116 ##refer: http://stackoverflow.com/a/8384788/756986
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
117 prefix = ntpath.basename(args["input_path"]).split(".")[0]
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
118 if args["mapping"] is not None:
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
119 pathway_file = prefix+"-pathways"
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
120 else:
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
121 pathway_file = None
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
122 output_format = args["output_format"]
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
123 genes_output_file_name = os.path.join(output_dir, prefix+"-genes."+output_format)
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
124 shutil.move(genes_output_file_name,args["output1"])
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
125 if pathway_file:
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
126 pathway_output_file_name = os.path.join(output_dir, pathway_file+"."+output_format)
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
127 shutil.move(pathway_output_file_name,args["output2"])
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
128 if os.path.exists( output_dir ):
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
129 shutil.rmtree( output_dir )
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
130 if __name__=="__main__":
Saket Choudhary <saketkc@gmail.com>
parents:
diff changeset
131 main(sys.argv[1:])