1
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import argparse, json, re, sys, csv, StringIO, math
|
|
4
|
|
5 def embed(csv, args):
|
|
6 html="""
|
|
7 <html>
|
|
8 <head profile="http://www.w3.org/2005/10/profile">
|
|
9 <link rel="icon" type="image/png" href="images/favicon.png"/>
|
|
10
|
|
11 <!-- Externals CSS -->
|
|
12 <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/lib.css' />
|
|
13
|
|
14 <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/common.css' type="text/css" />
|
|
15 <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/compare.css' type="text/css "/>
|
|
16
|
|
17 <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/common.js'></script>
|
|
18 <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/slickgrid.js'></script>
|
|
19 <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/compare.js'></script>
|
|
20 </head>
|
|
21 <body>
|
|
22 <div id="replace-me" class="container">
|
|
23 <div class="jumbotron">
|
|
24 <h1>Degust</h1>
|
|
25 <p><a href='http://victorian-bioinformatics-consortium.github.io/degust/'>Degust</a> is preparing your data... prepare for degustation...</p>
|
|
26 <img src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/images/front-loader.gif'>
|
|
27 </div>
|
|
28 </div>
|
|
29
|
|
30 <script type="text/javascript">
|
|
31 window.settings = { };
|
|
32 </script>
|
|
33 </body>
|
|
34 </html>
|
|
35
|
|
36 """
|
|
37 enc = json.dumps(csv)
|
|
38 columns = \
|
|
39 ["{idx:%s, name: %s, type:'info'}"%(json.dumps(c),json.dumps(c)) for c in args.info] + \
|
|
40 ["{idx:%s, name: 'FDR', type: 'fdr'}"%json.dumps(args.fdr)] + \
|
|
41 ["{idx:%s, name: 'Average', type: 'avg'}"%json.dumps(args.avg)] + \
|
|
42 ["{idx:%s, name: %s, type: 'primary'}"%(json.dumps(args.primary), json.dumps(args.primary))] + \
|
|
43 ["{idx:%s, name: %s, type:'fc'}"%(json.dumps(c),json.dumps(c)) for c in args.logFC] + \
|
|
44 ["{idx:%s, name: %s, type:'link'}"%(json.dumps(c),json.dumps(c)) for c in args.link_col]
|
|
45
|
|
46 settings = ["html_version: '0.11.2'",
|
|
47 "asset_base: 'http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/'",
|
|
48 "csv_data: data",
|
|
49 "csv_format: %s"%("false" if args.tab else "true"),
|
|
50 "name: %s"%json.dumps(args.name),
|
|
51 "columns:[%s]"%(",".join(columns)),
|
|
52 ]
|
|
53 if args.notour:
|
|
54 settings += ["show_tour: false"]
|
|
55 if args.link_url:
|
|
56 settings += ["link_url: %s"%json.dumps(args.link_url)]
|
|
57
|
|
58 window_settings = "window.settings = {%s};"%(",".join(settings))
|
|
59 s = html.replace('window.settings = { };', "var data=%s;\n\n%s"%(enc,window_settings), 1)
|
|
60 return s
|
|
61
|
|
62 def check_args(args, csv_file):
|
|
63 # Check args match csv file.
|
|
64 delim = "\t" if args.tab else ","
|
|
65 reader = csv.reader(csv_file.split('\n'), delimiter=delim)
|
|
66 headers = reader.next()
|
|
67 err = False
|
|
68 if args.avg is None:
|
|
69 sys.stderr.write("ERROR: Column for average expression not defined (use --avg) necessary for the ma-plot\n")
|
|
70 err=True
|
|
71 elif args.avg not in headers:
|
|
72 sys.stderr.write("ERROR: Column for average expression not found (%s)\n"%args.avg)
|
|
73 err=True
|
|
74
|
|
75 if args.fdr not in headers:
|
|
76 sys.stderr.write("ERROR: Column for FDR not found (%s)\n"%args.fdr)
|
|
77 err=True
|
|
78
|
|
79 if args.logFC is None:
|
|
80 sys.stderr.write("ERROR: No columns defined for log-fold-change, --logFC\n")
|
|
81 err=True
|
|
82 else:
|
|
83 for f in args.logFC:
|
|
84 if f not in headers:
|
|
85 sys.stderr.write("ERROR: Column for logFC not found, --logFC : (%s)\n"%f)
|
|
86 err=True
|
|
87
|
|
88 if args.info is None:
|
|
89 sys.stderr.write("ERROR: No columns defined for per-gene information, eg. gene IDs (use --info)\n")
|
|
90 err=True
|
|
91 else:
|
|
92 for f in args.info:
|
|
93 if f not in headers:
|
|
94 sys.stderr.write("ERROR: Column for info not found (%s)\n"%f)
|
|
95 err=True
|
|
96 return err
|
|
97
|
|
98
|
|
99 def cuffdiff_avg(str):
|
|
100 """Given a string that is the output from cuffdiff, create and log2(average expression) column.
|
|
101 Acutally, it is just the average log2() of the FPKM, but that should be enough for visualisation
|
|
102 """
|
|
103 delim = "\t" if args.tab else ","
|
|
104 reader = csv.reader(csv_file.split('\n'), delimiter=delim)
|
|
105 si = StringIO.StringIO()
|
|
106 cw = csv.writer(si, delimiter=delim)
|
|
107
|
|
108 headers = reader.next()
|
|
109 cw.writerow(headers + ['Avg'])
|
|
110 idx1 = headers.index("value_1")
|
|
111 idx2 = headers.index("value_2")
|
|
112 tst_idx = headers.index("status")
|
|
113 for r in reader:
|
|
114 if len(r)>=max(idx1,idx2) and r[tst_idx] == 'OK':
|
|
115 v1 = max(float(r[idx1]),1)
|
|
116 v2 = max(float(r[idx2]),1)
|
|
117 v = 0.5 * (math.log(v1,2) + math.log(v2,2))
|
|
118 cw.writerow(r + [v])
|
|
119 return si.getvalue()
|
|
120
|
|
121 parser = argparse.ArgumentParser(description='Produce a standalone Degust html file from a CSV file containing DGE.')
|
|
122 parser.add_argument('csvfile', type=argparse.FileType('r'),
|
|
123 nargs='?', default='-',
|
|
124 help="CSV file to process (default stdin)")
|
|
125 parser.add_argument('-o','--out', type=argparse.FileType('w'),
|
|
126 default='-',
|
|
127 help="Output file (default stdout)")
|
|
128
|
|
129 parser.add_argument('--name', default='Unnamed',
|
|
130 help='Name for this DGE comparison')
|
|
131 parser.add_argument('--notour',
|
|
132 help='Do not show the tour on first load')
|
|
133 parser.add_argument('--primary', default='pri',
|
|
134 help='Name for the primary condition that the fold-changes are relative to')
|
|
135 parser.add_argument('--avg',
|
|
136 help='Name for average intensity column in CSV file')
|
|
137 parser.add_argument('--fdr', default='adj.P.Val',
|
|
138 help='Name for "FDR" column in CSV file (default "adj.P.Val")')
|
|
139 parser.add_argument('--logFC',
|
|
140 help='Comma separated names for "logFC" columns in CSV file')
|
|
141 parser.add_argument('--info',
|
|
142 help='Comma separated names for info columns in CSV file')
|
|
143 parser.add_argument('--link-col',
|
|
144 help='Name for column to use with "--link-url"')
|
|
145 parser.add_argument('--link-url',
|
|
146 help='Gene info URL. Used when double-clicking the gene-table. Any "%%s" will be replaced with the value from the specified "--link-col"')
|
|
147 parser.add_argument('--tab', action='store_true', default=False,
|
|
148 help='Specify that the csv file is actually tab delimited')
|
|
149 parser.add_argument('--cuffdiff', action='store_true', default=False,
|
|
150 help='Input file is from cuffdiff (gene_exp.diff). This will set the columns automatically. Note this is still experimental')
|
|
151
|
|
152 args = parser.parse_args()
|
|
153
|
|
154 #print args
|
|
155 if args.info: args.info = args.info.split(",")
|
|
156 if args.logFC: args.logFC = args.logFC.split(",")
|
|
157 args.link_col = [args.link_col] if args.link_col else []
|
|
158
|
|
159 # print args
|
|
160
|
|
161 if args.csvfile == sys.stdin:
|
|
162 sys.stderr.write("Reading from stdin...\n")
|
|
163
|
|
164 csv_file = args.csvfile.read()
|
|
165
|
|
166 if args.cuffdiff:
|
|
167 args.info = ['gene_id','gene']
|
|
168 args.logFC = ['log2(fold_change)']
|
|
169 args.tab = True
|
|
170 args.fdr = 'q_value'
|
|
171 args.avg = 'Avg'
|
|
172 csv_file = cuffdiff_avg(csv_file)
|
|
173
|
|
174 err = check_args(args, csv_file)
|
|
175
|
|
176 if not err:
|
|
177 args.out.write(embed(csv_file, args))
|
|
178
|