comparison degust.py @ 1:773107d91822 draft

Uploaded
author simon-gladman
date Mon, 24 Feb 2014 00:41:33 -0500
parents
children
comparison
equal deleted inserted replaced
0:504bf58a4022 1:773107d91822
1 #!/usr/bin/env python
2
3 import argparse, json, re, sys, csv, StringIO, math
4
5 def embed(csv, args):
6 html="""
7 <html>
8 <head profile="http://www.w3.org/2005/10/profile">
9 <link rel="icon" type="image/png" href="images/favicon.png"/>
10
11 <!-- Externals CSS -->
12 <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/lib.css' />
13
14 <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/common.css' type="text/css" />
15 <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/compare.css' type="text/css "/>
16
17 <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/common.js'></script>
18 <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/slickgrid.js'></script>
19 <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/compare.js'></script>
20 </head>
21 <body>
22 <div id="replace-me" class="container">
23 <div class="jumbotron">
24 <h1>Degust</h1>
25 <p><a href='http://victorian-bioinformatics-consortium.github.io/degust/'>Degust</a> is preparing your data... prepare for degustation...</p>
26 <img src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/images/front-loader.gif'>
27 </div>
28 </div>
29
30 <script type="text/javascript">
31 window.settings = { };
32 </script>
33 </body>
34 </html>
35
36 """
37 enc = json.dumps(csv)
38 columns = \
39 ["{idx:%s, name: %s, type:'info'}"%(json.dumps(c),json.dumps(c)) for c in args.info] + \
40 ["{idx:%s, name: 'FDR', type: 'fdr'}"%json.dumps(args.fdr)] + \
41 ["{idx:%s, name: 'Average', type: 'avg'}"%json.dumps(args.avg)] + \
42 ["{idx:%s, name: %s, type: 'primary'}"%(json.dumps(args.primary), json.dumps(args.primary))] + \
43 ["{idx:%s, name: %s, type:'fc'}"%(json.dumps(c),json.dumps(c)) for c in args.logFC] + \
44 ["{idx:%s, name: %s, type:'link'}"%(json.dumps(c),json.dumps(c)) for c in args.link_col]
45
46 settings = ["html_version: '0.11.2'",
47 "asset_base: 'http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/'",
48 "csv_data: data",
49 "csv_format: %s"%("false" if args.tab else "true"),
50 "name: %s"%json.dumps(args.name),
51 "columns:[%s]"%(",".join(columns)),
52 ]
53 if args.notour:
54 settings += ["show_tour: false"]
55 if args.link_url:
56 settings += ["link_url: %s"%json.dumps(args.link_url)]
57
58 window_settings = "window.settings = {%s};"%(",".join(settings))
59 s = html.replace('window.settings = { };', "var data=%s;\n\n%s"%(enc,window_settings), 1)
60 return s
61
62 def check_args(args, csv_file):
63 # Check args match csv file.
64 delim = "\t" if args.tab else ","
65 reader = csv.reader(csv_file.split('\n'), delimiter=delim)
66 headers = reader.next()
67 err = False
68 if args.avg is None:
69 sys.stderr.write("ERROR: Column for average expression not defined (use --avg) necessary for the ma-plot\n")
70 err=True
71 elif args.avg not in headers:
72 sys.stderr.write("ERROR: Column for average expression not found (%s)\n"%args.avg)
73 err=True
74
75 if args.fdr not in headers:
76 sys.stderr.write("ERROR: Column for FDR not found (%s)\n"%args.fdr)
77 err=True
78
79 if args.logFC is None:
80 sys.stderr.write("ERROR: No columns defined for log-fold-change, --logFC\n")
81 err=True
82 else:
83 for f in args.logFC:
84 if f not in headers:
85 sys.stderr.write("ERROR: Column for logFC not found, --logFC : (%s)\n"%f)
86 err=True
87
88 if args.info is None:
89 sys.stderr.write("ERROR: No columns defined for per-gene information, eg. gene IDs (use --info)\n")
90 err=True
91 else:
92 for f in args.info:
93 if f not in headers:
94 sys.stderr.write("ERROR: Column for info not found (%s)\n"%f)
95 err=True
96 return err
97
98
99 def cuffdiff_avg(str):
100 """Given a string that is the output from cuffdiff, create and log2(average expression) column.
101 Acutally, it is just the average log2() of the FPKM, but that should be enough for visualisation
102 """
103 delim = "\t" if args.tab else ","
104 reader = csv.reader(csv_file.split('\n'), delimiter=delim)
105 si = StringIO.StringIO()
106 cw = csv.writer(si, delimiter=delim)
107
108 headers = reader.next()
109 cw.writerow(headers + ['Avg'])
110 idx1 = headers.index("value_1")
111 idx2 = headers.index("value_2")
112 tst_idx = headers.index("status")
113 for r in reader:
114 if len(r)>=max(idx1,idx2) and r[tst_idx] == 'OK':
115 v1 = max(float(r[idx1]),1)
116 v2 = max(float(r[idx2]),1)
117 v = 0.5 * (math.log(v1,2) + math.log(v2,2))
118 cw.writerow(r + [v])
119 return si.getvalue()
120
121 parser = argparse.ArgumentParser(description='Produce a standalone Degust html file from a CSV file containing DGE.')
122 parser.add_argument('csvfile', type=argparse.FileType('r'),
123 nargs='?', default='-',
124 help="CSV file to process (default stdin)")
125 parser.add_argument('-o','--out', type=argparse.FileType('w'),
126 default='-',
127 help="Output file (default stdout)")
128
129 parser.add_argument('--name', default='Unnamed',
130 help='Name for this DGE comparison')
131 parser.add_argument('--notour',
132 help='Do not show the tour on first load')
133 parser.add_argument('--primary', default='pri',
134 help='Name for the primary condition that the fold-changes are relative to')
135 parser.add_argument('--avg',
136 help='Name for average intensity column in CSV file')
137 parser.add_argument('--fdr', default='adj.P.Val',
138 help='Name for "FDR" column in CSV file (default "adj.P.Val")')
139 parser.add_argument('--logFC',
140 help='Comma separated names for "logFC" columns in CSV file')
141 parser.add_argument('--info',
142 help='Comma separated names for info columns in CSV file')
143 parser.add_argument('--link-col',
144 help='Name for column to use with "--link-url"')
145 parser.add_argument('--link-url',
146 help='Gene info URL. Used when double-clicking the gene-table. Any "%%s" will be replaced with the value from the specified "--link-col"')
147 parser.add_argument('--tab', action='store_true', default=False,
148 help='Specify that the csv file is actually tab delimited')
149 parser.add_argument('--cuffdiff', action='store_true', default=False,
150 help='Input file is from cuffdiff (gene_exp.diff). This will set the columns automatically. Note this is still experimental')
151
152 args = parser.parse_args()
153
154 #print args
155 if args.info: args.info = args.info.split(",")
156 if args.logFC: args.logFC = args.logFC.split(",")
157 args.link_col = [args.link_col] if args.link_col else []
158
159 # print args
160
161 if args.csvfile == sys.stdin:
162 sys.stderr.write("Reading from stdin...\n")
163
164 csv_file = args.csvfile.read()
165
166 if args.cuffdiff:
167 args.info = ['gene_id','gene']
168 args.logFC = ['log2(fold_change)']
169 args.tab = True
170 args.fdr = 'q_value'
171 args.avg = 'Avg'
172 csv_file = cuffdiff_avg(csv_file)
173
174 err = check_args(args, csv_file)
175
176 if not err:
177 args.out.write(embed(csv_file, args))
178