annotate vennt.py @ 3:3918317b6e63 draft

Uploaded
author simon-gladman
date Mon, 24 Feb 2014 21:53:27 -0500
parents b001248f393a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
1 #!/usr/bin/env python
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
2
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
3 import argparse
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
4 import json
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
5 import re
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
6 import sys,os
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
7 import csv, StringIO
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
8
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
9 bigFC = 100
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
10
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
11 def error(message):
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
12 sys.stderr.write("Error: %s\n" % message)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
13 sys.exit(1)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
14
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
15 def embed(csv, args):
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
16 html="""
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
17 <html>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
18 <head>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
19
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
20 <link rel="stylesheet" href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css" />
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
21 <link rel="stylesheet" href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.10.3/themes/ui-lightness/jquery-ui.min.css" />
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
22
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
23 <link rel="stylesheet" type="text/css" href='http://drpowell.github.io/vennt/dist/main.min.css'>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
24 <script type="text/javascript" src='http://drpowell.github.io/vennt/dist/main.js'></script>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
25 </head>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
26
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
27 <body>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
28 <script type="text/javascript">
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
29 window.venn_settings = { };
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
30 </script>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
31
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
32 <div id='loading'><img src='http://drpowell.github.io/vennt/dist/images/ajax-loader.gif'></div>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
33 </body>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
34 </html>
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
35
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
36 """
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
37 enc = json.dumps(csv)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
38 settings = ("window.venn_settings = {key_column: %s, id_column: %s, fdr_column: %s,"
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
39 "logFC_column: %s, info_columns: %s, csv_data: data};")%(
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
40 json.dumps(args.key), json.dumps(args.id), json.dumps(args.fdr),
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
41 json.dumps(args.logFC), json.dumps(args.info))
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
42 s = html.replace('window.venn_settings = { };', "var data=%s;\n\n%s"%(enc,settings), 1)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
43 return s
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
44
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
45 def combine_csv(files,key):
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
46 data = []
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
47 sys.stderr.write("Using a separate CSV files\n")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
48 for f in files:
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
49 sys.stderr.write(" Reading : %s\n"%f)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
50 d = open(f).read()
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
51 # Separate header (and keep if it is the first)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
52 hdr, d = d.split("\n",1)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
53 if len(data)==0:
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
54 data.append('"%s",'%(key)+hdr+"\n")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
55 d = re.sub(r'^(.{2})',r'"%s",\1'%os.path.splitext(os.path.basename(f))[0], d, 0, re.MULTILINE) # Add a key column to all rows
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
56 data.append(d)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
57
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
58 return ''.join(data)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
59
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
60 def cuffdiff_process(f):
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
61 with open(f, 'r') as csvfile:
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
62 reader = csv.reader(csvfile, delimiter="\t")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
63 si = StringIO.StringIO()
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
64 cw = csv.writer(si, delimiter=",")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
65
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
66 headers = reader.next()
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
67 cw.writerow(headers + ['key'])
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
68 idx1 = headers.index("sample_1")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
69 idx2 = headers.index("sample_2")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
70 fcIdx = headers.index("log2(fold_change)")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
71 for r in reader:
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
72 # Replace an infinite fold-change with something vennt can handle
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
73 if r[fcIdx]=='inf':
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
74 r[fcIdx]=bigFC
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
75 if r[fcIdx]=='-inf':
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
76 r[fcIdx]=-bigFC
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
77 k = r[idx1] + ' vs ' + r[idx2]
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
78 cw.writerow(r + [k])
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
79
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
80 return si.getvalue()
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
81
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
82 parser = argparse.ArgumentParser(description='Produce a standalone Vennt html file from a CSV file containing gene-lists. You may use a single CSV file containing all the gene lists - in which case you should have a "key" column specifying the gene lists. Alternatively, you can use separate CSV files for each gene list then a "key" column will be created based on the filenames. With separate CSV files they are expected to be in the same format with the same column names in the same column order.')
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
83 parser.add_argument('csvfile',
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
84 nargs='*', default='-',
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
85 help="CSV file to process (default stdin). Multiple files may be specified - in which case it is assumed each file contains one gene list and the filenames will be used to create a 'key' column")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
86 parser.add_argument('-o','--out', type=argparse.FileType('w'),
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
87 default='-',
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
88 help="Output file (default stdout)")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
89 parser.add_argument('--key', default='key',
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
90 help='Name for "key" column in CSV file (default "key"). Ignored if using multiple CSV files.')
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
91 parser.add_argument('--id', default='Feature',
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
92 help='Name for "id" column in CSV file (default "Feature")')
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
93 parser.add_argument('--fdr', default='adj.P.Val',
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
94 help='Name for "FDR" column in CSV file (default "adj.P.Val")')
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
95 parser.add_argument('--logFC', default='logFC',
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
96 help='Name for "logFC" column in CSV file (default "logFC")')
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
97 parser.add_argument('--info', default=['Feature'], nargs='*',
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
98 help='Names for info columns in CSV file - accepts multiple strings (default "Feature")')
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
99 parser.add_argument('--cuffdiff', action='store_true', default=False,
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
100 help='Input file is from cuffdiff (gene_exp.diff). Other options will be ignored')
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
101
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
102 args = parser.parse_args()
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
103
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
104 #print args
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
105
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
106 csv_data = None
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
107 if args.csvfile == '-':
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
108 sys.stderr.write("Reading from stdin...\n")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
109 csv_data = sys.stdin.read()
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
110 elif len(args.csvfile)==1:
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
111 if args.cuffdiff:
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
112 csv_data = cuffdiff_process(args.csvfile[0])
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
113 args.id = 'test_id'
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
114 args.fdr = 'q_value'
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
115 args.logFC = 'log2(fold_change)'
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
116 args.info = ['gene_id','gene']
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
117 else:
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
118 sys.stderr.write("Using a single CSV file with the key column '%s'\n"%(args.key))
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
119 csv_data = open(args.csvfile[0],'r').read()
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
120 else:
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
121 if args.cuffdiff:
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
122 error("Only 1 file (gene_exp.diff) expected when using --cuffdiff")
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
123 csv_data = combine_csv(args.csvfile, args.key)
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
124
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
125 args.out.write(embed(csv_data, args))
b001248f393a Uploaded
simon-gladman
parents:
diff changeset
126