Mercurial > repos > simon-gladman > degust
comparison degust.py @ 1:773107d91822 draft
Uploaded
author | simon-gladman |
---|---|
date | Mon, 24 Feb 2014 00:41:33 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:504bf58a4022 | 1:773107d91822 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse, json, re, sys, csv, StringIO, math | |
4 | |
5 def embed(csv, args): | |
6 html=""" | |
7 <html> | |
8 <head profile="http://www.w3.org/2005/10/profile"> | |
9 <link rel="icon" type="image/png" href="images/favicon.png"/> | |
10 | |
11 <!-- Externals CSS --> | |
12 <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/lib.css' /> | |
13 | |
14 <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/common.css' type="text/css" /> | |
15 <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/compare.css' type="text/css "/> | |
16 | |
17 <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/common.js'></script> | |
18 <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/slickgrid.js'></script> | |
19 <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/compare.js'></script> | |
20 </head> | |
21 <body> | |
22 <div id="replace-me" class="container"> | |
23 <div class="jumbotron"> | |
24 <h1>Degust</h1> | |
25 <p><a href='http://victorian-bioinformatics-consortium.github.io/degust/'>Degust</a> is preparing your data... prepare for degustation...</p> | |
26 <img src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/images/front-loader.gif'> | |
27 </div> | |
28 </div> | |
29 | |
30 <script type="text/javascript"> | |
31 window.settings = { }; | |
32 </script> | |
33 </body> | |
34 </html> | |
35 | |
36 """ | |
37 enc = json.dumps(csv) | |
38 columns = \ | |
39 ["{idx:%s, name: %s, type:'info'}"%(json.dumps(c),json.dumps(c)) for c in args.info] + \ | |
40 ["{idx:%s, name: 'FDR', type: 'fdr'}"%json.dumps(args.fdr)] + \ | |
41 ["{idx:%s, name: 'Average', type: 'avg'}"%json.dumps(args.avg)] + \ | |
42 ["{idx:%s, name: %s, type: 'primary'}"%(json.dumps(args.primary), json.dumps(args.primary))] + \ | |
43 ["{idx:%s, name: %s, type:'fc'}"%(json.dumps(c),json.dumps(c)) for c in args.logFC] + \ | |
44 ["{idx:%s, name: %s, type:'link'}"%(json.dumps(c),json.dumps(c)) for c in args.link_col] | |
45 | |
46 settings = ["html_version: '0.11.2'", | |
47 "asset_base: 'http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/'", | |
48 "csv_data: data", | |
49 "csv_format: %s"%("false" if args.tab else "true"), | |
50 "name: %s"%json.dumps(args.name), | |
51 "columns:[%s]"%(",".join(columns)), | |
52 ] | |
53 if args.notour: | |
54 settings += ["show_tour: false"] | |
55 if args.link_url: | |
56 settings += ["link_url: %s"%json.dumps(args.link_url)] | |
57 | |
58 window_settings = "window.settings = {%s};"%(",".join(settings)) | |
59 s = html.replace('window.settings = { };', "var data=%s;\n\n%s"%(enc,window_settings), 1) | |
60 return s | |
61 | |
62 def check_args(args, csv_file): | |
63 # Check args match csv file. | |
64 delim = "\t" if args.tab else "," | |
65 reader = csv.reader(csv_file.split('\n'), delimiter=delim) | |
66 headers = reader.next() | |
67 err = False | |
68 if args.avg is None: | |
69 sys.stderr.write("ERROR: Column for average expression not defined (use --avg) necessary for the ma-plot\n") | |
70 err=True | |
71 elif args.avg not in headers: | |
72 sys.stderr.write("ERROR: Column for average expression not found (%s)\n"%args.avg) | |
73 err=True | |
74 | |
75 if args.fdr not in headers: | |
76 sys.stderr.write("ERROR: Column for FDR not found (%s)\n"%args.fdr) | |
77 err=True | |
78 | |
79 if args.logFC is None: | |
80 sys.stderr.write("ERROR: No columns defined for log-fold-change, --logFC\n") | |
81 err=True | |
82 else: | |
83 for f in args.logFC: | |
84 if f not in headers: | |
85 sys.stderr.write("ERROR: Column for logFC not found, --logFC : (%s)\n"%f) | |
86 err=True | |
87 | |
88 if args.info is None: | |
89 sys.stderr.write("ERROR: No columns defined for per-gene information, eg. gene IDs (use --info)\n") | |
90 err=True | |
91 else: | |
92 for f in args.info: | |
93 if f not in headers: | |
94 sys.stderr.write("ERROR: Column for info not found (%s)\n"%f) | |
95 err=True | |
96 return err | |
97 | |
98 | |
99 def cuffdiff_avg(str): | |
100 """Given a string that is the output from cuffdiff, create and log2(average expression) column. | |
101 Acutally, it is just the average log2() of the FPKM, but that should be enough for visualisation | |
102 """ | |
103 delim = "\t" if args.tab else "," | |
104 reader = csv.reader(csv_file.split('\n'), delimiter=delim) | |
105 si = StringIO.StringIO() | |
106 cw = csv.writer(si, delimiter=delim) | |
107 | |
108 headers = reader.next() | |
109 cw.writerow(headers + ['Avg']) | |
110 idx1 = headers.index("value_1") | |
111 idx2 = headers.index("value_2") | |
112 tst_idx = headers.index("status") | |
113 for r in reader: | |
114 if len(r)>=max(idx1,idx2) and r[tst_idx] == 'OK': | |
115 v1 = max(float(r[idx1]),1) | |
116 v2 = max(float(r[idx2]),1) | |
117 v = 0.5 * (math.log(v1,2) + math.log(v2,2)) | |
118 cw.writerow(r + [v]) | |
119 return si.getvalue() | |
120 | |
121 parser = argparse.ArgumentParser(description='Produce a standalone Degust html file from a CSV file containing DGE.') | |
122 parser.add_argument('csvfile', type=argparse.FileType('r'), | |
123 nargs='?', default='-', | |
124 help="CSV file to process (default stdin)") | |
125 parser.add_argument('-o','--out', type=argparse.FileType('w'), | |
126 default='-', | |
127 help="Output file (default stdout)") | |
128 | |
129 parser.add_argument('--name', default='Unnamed', | |
130 help='Name for this DGE comparison') | |
131 parser.add_argument('--notour', | |
132 help='Do not show the tour on first load') | |
133 parser.add_argument('--primary', default='pri', | |
134 help='Name for the primary condition that the fold-changes are relative to') | |
135 parser.add_argument('--avg', | |
136 help='Name for average intensity column in CSV file') | |
137 parser.add_argument('--fdr', default='adj.P.Val', | |
138 help='Name for "FDR" column in CSV file (default "adj.P.Val")') | |
139 parser.add_argument('--logFC', | |
140 help='Comma separated names for "logFC" columns in CSV file') | |
141 parser.add_argument('--info', | |
142 help='Comma separated names for info columns in CSV file') | |
143 parser.add_argument('--link-col', | |
144 help='Name for column to use with "--link-url"') | |
145 parser.add_argument('--link-url', | |
146 help='Gene info URL. Used when double-clicking the gene-table. Any "%%s" will be replaced with the value from the specified "--link-col"') | |
147 parser.add_argument('--tab', action='store_true', default=False, | |
148 help='Specify that the csv file is actually tab delimited') | |
149 parser.add_argument('--cuffdiff', action='store_true', default=False, | |
150 help='Input file is from cuffdiff (gene_exp.diff). This will set the columns automatically. Note this is still experimental') | |
151 | |
152 args = parser.parse_args() | |
153 | |
154 #print args | |
155 if args.info: args.info = args.info.split(",") | |
156 if args.logFC: args.logFC = args.logFC.split(",") | |
157 args.link_col = [args.link_col] if args.link_col else [] | |
158 | |
159 # print args | |
160 | |
161 if args.csvfile == sys.stdin: | |
162 sys.stderr.write("Reading from stdin...\n") | |
163 | |
164 csv_file = args.csvfile.read() | |
165 | |
166 if args.cuffdiff: | |
167 args.info = ['gene_id','gene'] | |
168 args.logFC = ['log2(fold_change)'] | |
169 args.tab = True | |
170 args.fdr = 'q_value' | |
171 args.avg = 'Avg' | |
172 csv_file = cuffdiff_avg(csv_file) | |
173 | |
174 err = check_args(args, csv_file) | |
175 | |
176 if not err: | |
177 args.out.write(embed(csv_file, args)) | |
178 |