0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import argparse
|
|
4 import json
|
|
5 import re
|
|
6 import sys,os
|
|
7 import csv, StringIO
|
|
8
|
|
9 bigFC = 100
|
|
10
|
|
11 def error(message):
|
|
12 sys.stderr.write("Error: %s\n" % message)
|
|
13 sys.exit(1)
|
|
14
|
|
15 def embed(csv, args):
|
|
16 html="""
|
|
17 <html>
|
|
18 <head>
|
|
19
|
|
20 <link rel="stylesheet" href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css" />
|
|
21 <link rel="stylesheet" href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.10.3/themes/ui-lightness/jquery-ui.min.css" />
|
|
22
|
|
23 <link rel="stylesheet" type="text/css" href='http://drpowell.github.io/vennt/dist/main.min.css'>
|
|
24 <script type="text/javascript" src='http://drpowell.github.io/vennt/dist/main.js'></script>
|
|
25 </head>
|
|
26
|
|
27 <body>
|
|
28 <script type="text/javascript">
|
|
29 window.venn_settings = { };
|
|
30 </script>
|
|
31
|
|
32 <div id='loading'><img src='http://drpowell.github.io/vennt/dist/images/ajax-loader.gif'></div>
|
|
33 </body>
|
|
34 </html>
|
|
35
|
|
36 """
|
|
37 enc = json.dumps(csv)
|
|
38 settings = ("window.venn_settings = {key_column: %s, id_column: %s, fdr_column: %s,"
|
|
39 "logFC_column: %s, info_columns: %s, csv_data: data};")%(
|
|
40 json.dumps(args.key), json.dumps(args.id), json.dumps(args.fdr),
|
|
41 json.dumps(args.logFC), json.dumps(args.info))
|
|
42 s = html.replace('window.venn_settings = { };', "var data=%s;\n\n%s"%(enc,settings), 1)
|
|
43 return s
|
|
44
|
|
45 def combine_csv(files,key):
|
|
46 data = []
|
|
47 sys.stderr.write("Using a separate CSV files\n")
|
|
48 for f in files:
|
|
49 sys.stderr.write(" Reading : %s\n"%f)
|
|
50 d = open(f).read()
|
|
51 # Separate header (and keep if it is the first)
|
|
52 hdr, d = d.split("\n",1)
|
|
53 if len(data)==0:
|
|
54 data.append('"%s",'%(key)+hdr+"\n")
|
|
55 d = re.sub(r'^(.{2})',r'"%s",\1'%os.path.splitext(os.path.basename(f))[0], d, 0, re.MULTILINE) # Add a key column to all rows
|
|
56 data.append(d)
|
|
57
|
|
58 return ''.join(data)
|
|
59
|
|
60 def cuffdiff_process(f):
|
|
61 with open(f, 'r') as csvfile:
|
|
62 reader = csv.reader(csvfile, delimiter="\t")
|
|
63 si = StringIO.StringIO()
|
|
64 cw = csv.writer(si, delimiter=",")
|
|
65
|
|
66 headers = reader.next()
|
|
67 cw.writerow(headers + ['key'])
|
|
68 idx1 = headers.index("sample_1")
|
|
69 idx2 = headers.index("sample_2")
|
|
70 fcIdx = headers.index("log2(fold_change)")
|
|
71 for r in reader:
|
|
72 # Replace an infinite fold-change with something vennt can handle
|
|
73 if r[fcIdx]=='inf':
|
|
74 r[fcIdx]=bigFC
|
|
75 if r[fcIdx]=='-inf':
|
|
76 r[fcIdx]=-bigFC
|
|
77 k = r[idx1] + ' vs ' + r[idx2]
|
|
78 cw.writerow(r + [k])
|
|
79
|
|
80 return si.getvalue()
|
|
81
|
|
82 parser = argparse.ArgumentParser(description='Produce a standalone Vennt html file from a CSV file containing gene-lists. You may use a single CSV file containing all the gene lists - in which case you should have a "key" column specifying the gene lists. Alternatively, you can use separate CSV files for each gene list then a "key" column will be created based on the filenames. With separate CSV files they are expected to be in the same format with the same column names in the same column order.')
|
|
83 parser.add_argument('csvfile',
|
|
84 nargs='*', default='-',
|
|
85 help="CSV file to process (default stdin). Multiple files may be specified - in which case it is assumed each file contains one gene list and the filenames will be used to create a 'key' column")
|
|
86 parser.add_argument('-o','--out', type=argparse.FileType('w'),
|
|
87 default='-',
|
|
88 help="Output file (default stdout)")
|
|
89 parser.add_argument('--key', default='key',
|
|
90 help='Name for "key" column in CSV file (default "key"). Ignored if using multiple CSV files.')
|
|
91 parser.add_argument('--id', default='Feature',
|
|
92 help='Name for "id" column in CSV file (default "Feature")')
|
|
93 parser.add_argument('--fdr', default='adj.P.Val',
|
|
94 help='Name for "FDR" column in CSV file (default "adj.P.Val")')
|
|
95 parser.add_argument('--logFC', default='logFC',
|
|
96 help='Name for "logFC" column in CSV file (default "logFC")')
|
|
97 parser.add_argument('--info', default=['Feature'], nargs='*',
|
|
98 help='Names for info columns in CSV file - accepts multiple strings (default "Feature")')
|
|
99 parser.add_argument('--cuffdiff', action='store_true', default=False,
|
|
100 help='Input file is from cuffdiff (gene_exp.diff). Other options will be ignored')
|
|
101
|
|
102 args = parser.parse_args()
|
|
103
|
|
104 #print args
|
|
105
|
|
106 csv_data = None
|
|
107 if args.csvfile == '-':
|
|
108 sys.stderr.write("Reading from stdin...\n")
|
|
109 csv_data = sys.stdin.read()
|
|
110 elif len(args.csvfile)==1:
|
|
111 if args.cuffdiff:
|
|
112 csv_data = cuffdiff_process(args.csvfile[0])
|
|
113 args.id = 'test_id'
|
|
114 args.fdr = 'q_value'
|
|
115 args.logFC = 'log2(fold_change)'
|
|
116 args.info = ['gene_id','gene']
|
|
117 else:
|
|
118 sys.stderr.write("Using a single CSV file with the key column '%s'\n"%(args.key))
|
|
119 csv_data = open(args.csvfile[0],'r').read()
|
|
120 else:
|
|
121 if args.cuffdiff:
|
|
122 error("Only 1 file (gene_exp.diff) expected when using --cuffdiff")
|
|
123 csv_data = combine_csv(args.csvfile, args.key)
|
|
124
|
|
125 args.out.write(embed(csv_data, args))
|
|
126
|