Mercurial > repos > bcclaywell > argo_navis
view bin/ess.py @ 2:7eaf6f9abd28 draft default tip
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b-dirty
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:57:38 -0400 |
parents | d67268158946 |
children |
line wrap: on
line source
#!/usr/bin/env python import csv import numpy from biopy import bayesianStats as bs import argparse import re commented_regex = re.compile("^\s*\#.*") doc_template = """ <html> <body> <h1>Effective Sample Size Results</h1> <p>Recommendation: {status}</p> <h2>Explanation</h2> <p> Ideally, you want all these values to be greater than or equal to 200. If any of them aren't, it's probably a good idea to perform a Resume run with BEAST. On average, running 3x as long will increase your ESS by 3. This should help give you some sense of how much longer you should run BEAST. </p> <p> Note that while ESS statistics are valuable for getting a rough sense of whether to run or not, it is <em>always</em> recommended that you manually look at your logfile traces using <a href="http://tree.bio.ed.ac.uk/software/tracer/">Tracer</a>, or another trace analysis software before accepting your analysis as "final". </p> <h2>Detailed Results</h2> <table> <tr><th>Parameter</th><th>ESS</th></tr> {table_content} </table> </body> </html> """ tr_template = "<tr><td>{parameter}</td><td {style}>{ess}</td></tr>" def format_tr(result, threshold=200): "Takes a result [param, ess] pair and makes the appropriate tr html" param, ess = result passes = ess >= threshold style = "" if passes else 'style="color: red;"' return tr_template.format(parameter=param, ess=ess, style=style) def dict_reader_to_cols(dict_reader): d = dict() for row in dict_reader: for k, v in row.iteritems(): try: v = float(v) try: d[k].append(v) except KeyError: d[k] = [v] except ValueError: if k == "" and v == "": # Don't worry about this, there is a tailing \t on each line pass else: # Otherwise... something weird is happening print "Could not cast to float for kv pair:", k, v return d def table_contents(results, threshold=200): trs = [format_tr(result, threshold) for result in sorted(results)] return "\n".join(trs) def html_contents(results, threshold): table_content = table_contents(results, threshold) within_tol = [x[1] > threshold for x in results] ess_good = all(within_tol) status = "Good to go" if all(within_tol) else "Should run longer" return doc_template.format(status=status, table_content=table_content) def get_args(): parser = argparse.ArgumentParser() parser.add_argument('logfile', type=argparse.FileType('r')) parser.add_argument('ess_out', type=argparse.FileType('w')) parser.add_argument('-t', '--threshold', type=float, default=200, help="""For formatting html, what's the minimal ESS score to be considered high enough? (default: 200""") parser.add_argument('--html-out', action="store_true", help="Default is CSV output") return parser.parse_args() def main(args): non_comment_lines = (row for row in args.logfile if not commented_regex.match(row)) reader = csv.DictReader(non_comment_lines, delimiter="\t") data_columns = dict_reader_to_cols(reader) results = [] for colname, data in data_columns.iteritems(): if colname != "Sample": results.append([colname, bs.effectiveSampleSize(data)]) if args.html_out: html_content = html_contents(results, args.threshold) args.ess_out.write(html_content) else: writer = csv.writer(args.ess_out) writer.writerow(["statistic", "ess"]) for result in results: writer.writerow(result) args.logfile.close() args.ess_out.close() if __name__ == '__main__': main(get_args())