degust: degust.py comparison

comparison degust.py @ 1:773107d91822 draft

Uploaded

author	simon-gladman
date	Mon, 24 Feb 2014 00:41:33 -0500
parents
children

comparison

equal deleted inserted replaced

-:504bf58a4022
+:773107d91822
+#!/usr/bin/env python
+import argparse, json, re, sys, csv, StringIO, math
+def embed(csv, args):
+html="""
+<html>
+<head profile="http://www.w3.org/2005/10/profile">
+<link rel="icon" type="image/png" href="images/favicon.png"/>
+<!-- Externals CSS -->
+<link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/lib.css' />
+<link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/common.css' type="text/css" />
+<link rel="stylesheet" href='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/css/compare.css' type="text/css "/>
+<script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/common.js'></script>
+<script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/slickgrid.js'></script>
+<script type="text/javascript" src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/compare.js'></script>
+</head>
+<body>
+<div id="replace-me" class="container">
+<div class="jumbotron">
+<h1>Degust</h1>
+<p><a href='http://victorian-bioinformatics-consortium.github.io/degust/'>Degust</a> is preparing your data...  prepare for degustation...</p>
+<img src='http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/images/front-loader.gif'>
+</div>
+</div>
+<script type="text/javascript">
+window.settings = { };
+</script>
+</body>
+</html>
+"""
+enc = json.dumps(csv)
+columns = \
+["{idx:%s, name: %s, type:'info'}"%(json.dumps(c),json.dumps(c)) for c in args.info] + \
+["{idx:%s, name: 'FDR', type: 'fdr'}"%json.dumps(args.fdr)] + \
+["{idx:%s, name: 'Average', type: 'avg'}"%json.dumps(args.avg)] + \
+["{idx:%s, name: %s, type: 'primary'}"%(json.dumps(args.primary), json.dumps(args.primary))] + \
+["{idx:%s, name: %s, type:'fc'}"%(json.dumps(c),json.dumps(c)) for c in args.logFC] + \
+["{idx:%s, name: %s, type:'link'}"%(json.dumps(c),json.dumps(c)) for c in args.link_col]
+settings = ["html_version: '0.11.2'",
+"asset_base: 'http://victorian-bioinformatics-consortium.github.io/degust/dist/latest/'",
+"csv_data: data",
+"csv_format: %s"%("false" if args.tab else "true"),
+"name: %s"%json.dumps(args.name),
+"columns:[%s]"%(",".join(columns)),
+]
+if args.notour:
+settings += ["show_tour: false"]
+if args.link_url:
+settings += ["link_url: %s"%json.dumps(args.link_url)]
+window_settings = "window.settings = {%s};"%(",".join(settings))
+s = html.replace('window.settings = { };', "var data=%s;\n\n%s"%(enc,window_settings), 1)
+return s
+def check_args(args, csv_file):
+# Check args match csv file.
+delim = "\t" if args.tab else ","
+reader = csv.reader(csv_file.split('\n'), delimiter=delim)
+headers = reader.next()
+err = False
+if args.avg is None:
+sys.stderr.write("ERROR: Column for average expression not defined (use --avg) necessary for the ma-plot\n")
+err=True
+elif args.avg not in headers:
+sys.stderr.write("ERROR: Column for average expression not found (%s)\n"%args.avg)
+err=True
+if args.fdr not in headers:
+sys.stderr.write("ERROR: Column for FDR not found (%s)\n"%args.fdr)
+err=True
+if args.logFC is None:
+sys.stderr.write("ERROR: No columns defined for log-fold-change, --logFC\n")
+err=True
+else:
+for f in args.logFC:
+if f not in headers:
+sys.stderr.write("ERROR: Column for logFC not found, --logFC : (%s)\n"%f)
+err=True
+if args.info is None:
+sys.stderr.write("ERROR: No columns defined for per-gene information, eg. gene IDs (use --info)\n")
+err=True
+else:
+for f in args.info:
+if f not in headers:
+sys.stderr.write("ERROR: Column for info not found (%s)\n"%f)
+err=True
+return err
+def cuffdiff_avg(str):
+"""Given a string that is the output from cuffdiff, create and log2(average expression) column.
+Acutally, it is just the average log2() of the FPKM, but that should be enough for visualisation
+"""
+delim = "\t" if args.tab else ","
+reader = csv.reader(csv_file.split('\n'), delimiter=delim)
+si = StringIO.StringIO()
+cw = csv.writer(si, delimiter=delim)
+headers = reader.next()
+cw.writerow(headers + ['Avg'])
+idx1 = headers.index("value_1")
+idx2 = headers.index("value_2")
+tst_idx = headers.index("status")
+for r in reader:
+if len(r)>=max(idx1,idx2) and r[tst_idx] == 'OK':
+v1 = max(float(r[idx1]),1)
+v2 = max(float(r[idx2]),1)
+v = 0.5 * (math.log(v1,2) + math.log(v2,2))
+cw.writerow(r + [v])
+return si.getvalue()
+parser = argparse.ArgumentParser(description='Produce a standalone Degust html file from a CSV file containing DGE.')
+parser.add_argument('csvfile', type=argparse.FileType('r'),
+nargs='?', default='-',
+help="CSV file to process (default stdin)")
+parser.add_argument('-o','--out', type=argparse.FileType('w'),
+default='-',
+help="Output file (default stdout)")
+parser.add_argument('--name', default='Unnamed',
+help='Name for this DGE comparison')
+parser.add_argument('--notour',
+help='Do not show the tour on first load')
+parser.add_argument('--primary', default='pri',
+help='Name for the primary condition that the fold-changes are relative to')
+parser.add_argument('--avg',
+help='Name for average intensity column in CSV file')
+parser.add_argument('--fdr', default='adj.P.Val',
+help='Name for "FDR" column in CSV file (default "adj.P.Val")')
+parser.add_argument('--logFC',
+help='Comma separated names for "logFC" columns in CSV file')
+parser.add_argument('--info',
+help='Comma separated names for info columns in CSV file')
+parser.add_argument('--link-col',
+help='Name for column to use with "--link-url"')
+parser.add_argument('--link-url',
+help='Gene info URL.  Used when double-clicking the gene-table.  Any "%%s" will be replaced with the value from the specified "--link-col"')
+parser.add_argument('--tab', action='store_true', default=False,
+help='Specify that the csv file is actually tab delimited')
+parser.add_argument('--cuffdiff', action='store_true', default=False,
+help='Input file is from cuffdiff (gene_exp.diff).  This will set the columns automatically.  Note this is still experimental')
+args = parser.parse_args()
+#print args
+if args.info:  args.info = args.info.split(",")
+if args.logFC: args.logFC = args.logFC.split(",")
+args.link_col = [args.link_col] if args.link_col else []
+# print args
+if args.csvfile == sys.stdin:
+sys.stderr.write("Reading from stdin...\n")
+csv_file = args.csvfile.read()
+if args.cuffdiff:
+args.info = ['gene_id','gene']
+args.logFC = ['log2(fold_change)']
+args.tab = True
+args.fdr = 'q_value'
+args.avg = 'Avg'
+csv_file = cuffdiff_avg(csv_file)
+err = check_args(args, csv_file)
+if not err:
+args.out.write(embed(csv_file, args))

Mercurial > repos > simon-gladman > degust

comparison degust.py @ 1:773107d91822 draft