Mercurial > repos > simon-gladman > degust
view degust.py @ 2:1e4307fd98f3 draft
Uploaded
author | simon-gladman |
---|---|
date | Mon, 24 Feb 2014 19:02:18 -0500 |
parents | 773107d91822 |
children |
line wrap: on
line source
#!/usr/bin/env python import argparse, json, re, sys, csv, StringIO, math def embed(csv, args): html=""" <html> <head profile="http://www.w3.org/2005/10/profile"> <link rel="icon" type="image/png" href="images/favicon.png"/> <!-- Externals CSS --> <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/lib.css' /> <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/common.css' type="text/css" /> <link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/compare.css' type="text/css "/> <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/common.js'></script> <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/slickgrid.js'></script> <script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/compare.js'></script> </head> <body> <div id="replace-me" class="container"> <div class="jumbotron"> <h1>Degust</h1> <p><a href='http://victorian-bioinformatics-consortium.github.io/degust/'>Degust</a> is preparing your data... prepare for degustation...</p> <img src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/images/front-loader.gif'> </div> </div> <script type="text/javascript"> window.settings = { }; </script> </body> </html> """ enc = json.dumps(csv) columns = \ ["{idx:%s, name: %s, type:'info'}"%(json.dumps(c),json.dumps(c)) for c in args.info] + \ ["{idx:%s, name: 'FDR', type: 'fdr'}"%json.dumps(args.fdr)] + \ ["{idx:%s, name: 'Average', type: 'avg'}"%json.dumps(args.avg)] + \ ["{idx:%s, name: %s, type: 'primary'}"%(json.dumps(args.primary), json.dumps(args.primary))] + \ ["{idx:%s, name: %s, type:'fc'}"%(json.dumps(c),json.dumps(c)) for c in args.logFC] + \ ["{idx:%s, name: %s, type:'link'}"%(json.dumps(c),json.dumps(c)) for c in args.link_col] settings = ["html_version: '0.11.2'", "asset_base: 'http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/'", "csv_data: data", "csv_format: %s"%("false" if args.tab else "true"), "name: %s"%json.dumps(args.name), "columns:[%s]"%(",".join(columns)), ] if args.notour: settings += ["show_tour: false"] if args.link_url: settings += ["link_url: %s"%json.dumps(args.link_url)] window_settings = "window.settings = {%s};"%(",".join(settings)) s = html.replace('window.settings = { };', "var data=%s;\n\n%s"%(enc,window_settings), 1) return s def check_args(args, csv_file): # Check args match csv file. delim = "\t" if args.tab else "," reader = csv.reader(csv_file.split('\n'), delimiter=delim) headers = reader.next() err = False if args.avg is None: sys.stderr.write("ERROR: Column for average expression not defined (use --avg) necessary for the ma-plot\n") err=True elif args.avg not in headers: sys.stderr.write("ERROR: Column for average expression not found (%s)\n"%args.avg) err=True if args.fdr not in headers: sys.stderr.write("ERROR: Column for FDR not found (%s)\n"%args.fdr) err=True if args.logFC is None: sys.stderr.write("ERROR: No columns defined for log-fold-change, --logFC\n") err=True else: for f in args.logFC: if f not in headers: sys.stderr.write("ERROR: Column for logFC not found, --logFC : (%s)\n"%f) err=True if args.info is None: sys.stderr.write("ERROR: No columns defined for per-gene information, eg. gene IDs (use --info)\n") err=True else: for f in args.info: if f not in headers: sys.stderr.write("ERROR: Column for info not found (%s)\n"%f) err=True return err def cuffdiff_avg(str): """Given a string that is the output from cuffdiff, create and log2(average expression) column. Acutally, it is just the average log2() of the FPKM, but that should be enough for visualisation """ delim = "\t" if args.tab else "," reader = csv.reader(csv_file.split('\n'), delimiter=delim) si = StringIO.StringIO() cw = csv.writer(si, delimiter=delim) headers = reader.next() cw.writerow(headers + ['Avg']) idx1 = headers.index("value_1") idx2 = headers.index("value_2") tst_idx = headers.index("status") for r in reader: if len(r)>=max(idx1,idx2) and r[tst_idx] == 'OK': v1 = max(float(r[idx1]),1) v2 = max(float(r[idx2]),1) v = 0.5 * (math.log(v1,2) + math.log(v2,2)) cw.writerow(r + [v]) return si.getvalue() parser = argparse.ArgumentParser(description='Produce a standalone Degust html file from a CSV file containing DGE.') parser.add_argument('csvfile', type=argparse.FileType('r'), nargs='?', default='-', help="CSV file to process (default stdin)") parser.add_argument('-o','--out', type=argparse.FileType('w'), default='-', help="Output file (default stdout)") parser.add_argument('--name', default='Unnamed', help='Name for this DGE comparison') parser.add_argument('--notour', help='Do not show the tour on first load') parser.add_argument('--primary', default='pri', help='Name for the primary condition that the fold-changes are relative to') parser.add_argument('--avg', help='Name for average intensity column in CSV file') parser.add_argument('--fdr', default='adj.P.Val', help='Name for "FDR" column in CSV file (default "adj.P.Val")') parser.add_argument('--logFC', help='Comma separated names for "logFC" columns in CSV file') parser.add_argument('--info', help='Comma separated names for info columns in CSV file') parser.add_argument('--link-col', help='Name for column to use with "--link-url"') parser.add_argument('--link-url', help='Gene info URL. Used when double-clicking the gene-table. Any "%%s" will be replaced with the value from the specified "--link-col"') parser.add_argument('--tab', action='store_true', default=False, help='Specify that the csv file is actually tab delimited') parser.add_argument('--cuffdiff', action='store_true', default=False, help='Input file is from cuffdiff (gene_exp.diff). This will set the columns automatically. Note this is still experimental') args = parser.parse_args() #print args if args.info: args.info = args.info.split(",") if args.logFC: args.logFC = args.logFC.split(",") args.link_col = [args.link_col] if args.link_col else [] # print args if args.csvfile == sys.stdin: sys.stderr.write("Reading from stdin...\n") csv_file = args.csvfile.read() if args.cuffdiff: args.info = ['gene_id','gene'] args.logFC = ['log2(fold_change)'] args.tab = True args.fdr = 'q_value' args.avg = 'Avg' csv_file = cuffdiff_avg(csv_file) err = check_args(args, csv_file) if not err: args.out.write(embed(csv_file, args))