comparison vennt.py @ 0:b001248f393a draft

Uploaded
author simon-gladman
date Mon, 24 Feb 2014 21:44:41 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b001248f393a
1 #!/usr/bin/env python
2
3 import argparse
4 import json
5 import re
6 import sys,os
7 import csv, StringIO
8
9 bigFC = 100
10
11 def error(message):
12 sys.stderr.write("Error: %s\n" % message)
13 sys.exit(1)
14
15 def embed(csv, args):
16 html="""
17 <html>
18 <head>
19
20 <link rel="stylesheet" href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css" />
21 <link rel="stylesheet" href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.10.3/themes/ui-lightness/jquery-ui.min.css" />
22
23 <link rel="stylesheet" type="text/css" href='http://drpowell.github.io/vennt/dist/main.min.css'>
24 <script type="text/javascript" src='http://drpowell.github.io/vennt/dist/main.js'></script>
25 </head>
26
27 <body>
28 <script type="text/javascript">
29 window.venn_settings = { };
30 </script>
31
32 <div id='loading'><img src='http://drpowell.github.io/vennt/dist/images/ajax-loader.gif'></div>
33 </body>
34 </html>
35
36 """
37 enc = json.dumps(csv)
38 settings = ("window.venn_settings = {key_column: %s, id_column: %s, fdr_column: %s,"
39 "logFC_column: %s, info_columns: %s, csv_data: data};")%(
40 json.dumps(args.key), json.dumps(args.id), json.dumps(args.fdr),
41 json.dumps(args.logFC), json.dumps(args.info))
42 s = html.replace('window.venn_settings = { };', "var data=%s;\n\n%s"%(enc,settings), 1)
43 return s
44
45 def combine_csv(files,key):
46 data = []
47 sys.stderr.write("Using a separate CSV files\n")
48 for f in files:
49 sys.stderr.write(" Reading : %s\n"%f)
50 d = open(f).read()
51 # Separate header (and keep if it is the first)
52 hdr, d = d.split("\n",1)
53 if len(data)==0:
54 data.append('"%s",'%(key)+hdr+"\n")
55 d = re.sub(r'^(.{2})',r'"%s",\1'%os.path.splitext(os.path.basename(f))[0], d, 0, re.MULTILINE) # Add a key column to all rows
56 data.append(d)
57
58 return ''.join(data)
59
60 def cuffdiff_process(f):
61 with open(f, 'r') as csvfile:
62 reader = csv.reader(csvfile, delimiter="\t")
63 si = StringIO.StringIO()
64 cw = csv.writer(si, delimiter=",")
65
66 headers = reader.next()
67 cw.writerow(headers + ['key'])
68 idx1 = headers.index("sample_1")
69 idx2 = headers.index("sample_2")
70 fcIdx = headers.index("log2(fold_change)")
71 for r in reader:
72 # Replace an infinite fold-change with something vennt can handle
73 if r[fcIdx]=='inf':
74 r[fcIdx]=bigFC
75 if r[fcIdx]=='-inf':
76 r[fcIdx]=-bigFC
77 k = r[idx1] + ' vs ' + r[idx2]
78 cw.writerow(r + [k])
79
80 return si.getvalue()
81
82 parser = argparse.ArgumentParser(description='Produce a standalone Vennt html file from a CSV file containing gene-lists. You may use a single CSV file containing all the gene lists - in which case you should have a "key" column specifying the gene lists. Alternatively, you can use separate CSV files for each gene list then a "key" column will be created based on the filenames. With separate CSV files they are expected to be in the same format with the same column names in the same column order.')
83 parser.add_argument('csvfile',
84 nargs='*', default='-',
85 help="CSV file to process (default stdin). Multiple files may be specified - in which case it is assumed each file contains one gene list and the filenames will be used to create a 'key' column")
86 parser.add_argument('-o','--out', type=argparse.FileType('w'),
87 default='-',
88 help="Output file (default stdout)")
89 parser.add_argument('--key', default='key',
90 help='Name for "key" column in CSV file (default "key"). Ignored if using multiple CSV files.')
91 parser.add_argument('--id', default='Feature',
92 help='Name for "id" column in CSV file (default "Feature")')
93 parser.add_argument('--fdr', default='adj.P.Val',
94 help='Name for "FDR" column in CSV file (default "adj.P.Val")')
95 parser.add_argument('--logFC', default='logFC',
96 help='Name for "logFC" column in CSV file (default "logFC")')
97 parser.add_argument('--info', default=['Feature'], nargs='*',
98 help='Names for info columns in CSV file - accepts multiple strings (default "Feature")')
99 parser.add_argument('--cuffdiff', action='store_true', default=False,
100 help='Input file is from cuffdiff (gene_exp.diff). Other options will be ignored')
101
102 args = parser.parse_args()
103
104 #print args
105
106 csv_data = None
107 if args.csvfile == '-':
108 sys.stderr.write("Reading from stdin...\n")
109 csv_data = sys.stdin.read()
110 elif len(args.csvfile)==1:
111 if args.cuffdiff:
112 csv_data = cuffdiff_process(args.csvfile[0])
113 args.id = 'test_id'
114 args.fdr = 'q_value'
115 args.logFC = 'log2(fold_change)'
116 args.info = ['gene_id','gene']
117 else:
118 sys.stderr.write("Using a single CSV file with the key column '%s'\n"%(args.key))
119 csv_data = open(args.csvfile[0],'r').read()
120 else:
121 if args.cuffdiff:
122 error("Only 1 file (gene_exp.diff) expected when using --cuffdiff")
123 csv_data = combine_csv(args.csvfile, args.key)
124
125 args.out.write(embed(csv_data, args))
126