changeset 35:015d1fb47ec4 draft

Updated script with png plot and --major-only option
author boris
date Fri, 14 Jun 2013 13:29:16 -0400
parents 6935429eef05
children dbf0b3c13dcf
files phylorelatives.py
diffstat 1 files changed, 74 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/phylorelatives.py	Fri Jun 14 13:27:49 2013 -0400
+++ b/phylorelatives.py	Fri Jun 14 13:29:16 2013 -0400
@@ -47,15 +47,22 @@
     nj_tree = ape.nj(dist)
     return nj_tree
 
-def ape_consensus(tree,tree_function,data,iters=1000):
-    """Return majority rule consensus tree"""
+def ape_bootstrap(tree,tree_function,data,iters=1000):
+    """Return bootstrap tree with bootstrap values on nodes"""
     ape = importr('ape')
     tree_func = robjects.r(tree_function)
     bootstrap = ape.boot_phylo(tree, data, tree_func,
                               B=iters, quiet=1, trees=1)
-    bs_trees  = bootstrap.rx('trees')[0]
-    cons_tree = ape.consensus(bs_trees,p=0.5)
-    return cons_tree
+    robjects.r('''
+                add_bs_value = function(tree, boot, iters) {
+                tree$node.label = ((boot$BP)/iters)*100
+                return(tree)
+                }''')
+    add_bs_value = robjects.r['add_bs_value']
+    bs_tree =  add_bs_value(tree, bootstrap, iters)
+    #bs_trees  = bootstrap.rx('trees')[0]
+    #bs_tree = ape.consensus(bs_trees,p=0.5)
+    return bs_tree
 
 
 def dendro_relatives(tree,minor):
@@ -86,6 +93,48 @@
     return ascii_tree
 
 
+def ape_plot_tree(outfile, tree1, tree2=None, root=False):
+    """Plot tree to png file"""
+    ape = importr('ape')
+    graphics  = importr('graphics')
+    grdevices = importr('grDevices')
+    if tree2 is None:
+        grdevices.png(file=outfile, width=1024, height=768)
+        if root:
+            ape.plot_phylo(ape.root(tree1,root),use_edge_length=0,
+                          show_node_label=1,edge_width=2, font=2,
+                          cex=1,underscore=0, no_margin=1)
+        else:
+            ape.plot_phylo(tree1,use_edge_length=0,
+                          show_node_label=1,edge_width=2, font=2,
+                          cex=1,underscore=0, no_margin=1)
+        #graphics.title(main='Neighbor Joining')
+        grdevices.dev_off()
+    elif tree2 is not None:
+        grdevices.png(file=outfile, width=1024, height=768)
+        graphics.par(mfcol=array.array('i',[1,2]))
+        if root:
+            ape.plot_phylo(ape.root(tree1,root),use_edge_length=0,
+                          show_node_label=1,edge_width=2, font=2,
+                          cex=1,underscore=0, no_margin=1)
+        else:
+            ape.plot_phylo(tree1,use_edge_length=0,
+                          show_node_label=1,edge_width=2, font=2,
+                          cex=1,underscore=0, no_margin=1)
+        graphics.title(main='Neighbor Joining', cex=1.5, font=2)
+        if root:
+            ape.plot_phylo(ape.root(tree2,root),use_edge_length=0,
+                          show_node_label=1,edge_width=2, font=2,
+                          cex=1,underscore=0, no_margin=1)
+        else:
+            ape.plot_phylo(tree2,use_edge_length=0,
+                          show_node_label=1,edge_width=2, font=2,
+                          cex=1,underscore=0, no_margin=1)
+        graphics.title(main='Maximum Parsimony',cex=1.5, font=2)
+        grdevices.dev_off()
+    return
+
+
 def write_nwk(tree):
     "Write proper Newick string"
     ape = importr('ape')
@@ -99,12 +148,13 @@
     parser.add_argument('-b', '--bootstrap', type=int, metavar='INT',default=1000, help='Change number of replicas. 0 to deactivate. (Default: 1000)')
     parser.add_argument('-p', '--pairwise', action='store_true', help='Use pairwise deletion of gaps/missing data. (Default: Complete deletion)')
     parser.add_argument('-r', '--root', type=str, metavar='FASTA', default=False, help='Root trees using FASTA sequence as outgroup. (Default: Display unrooted trees)')
+    parser.add_argument('-j', '--major-only', action='store_true', help='In major-only mode no minor allele sequence is required and only a NJ major alleles tree is generated (Default: Require minor allele sequences)')
     parser.add_argument('--relatives-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
     parser.add_argument('--newick-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
     parser.add_argument('--trees-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
     parser.add_argument('-m', '--multi-fasta', type=str, default='multi-fasta.fa', help=argparse.SUPPRESS)
     args = parser.parse_args()
-    
+    #parser.print_help()
     
     if args.input:
         for fasta in args.input:
@@ -143,7 +193,7 @@
     fasta_ids = [seqname.strip()[1:] for seqname in list(open(args.multi_fasta)) if seqname.startswith('>')]
     minor_ids = [seqname for seqname in fasta_ids if seqname.endswith('minor') or seqname.endswith('test')]
 
-    if len(minor_ids) == 0:
+    if len(minor_ids) == 0 and not args.major_only:
         sys.exit("\nERROR: No test sequences found. _minor or _test suffixes are required in the sequence name!\n")
     else:
         pass
@@ -156,11 +206,11 @@
         nj_func = 'function (xx) nj(dist.dna(xx, model="raw"))'
     
     if args.bootstrap == 0:
-        cons_tree = nj_tree
+        bs_tree = nj_tree
     elif args.bootstrap !=1000:
-        cons_tree = ape_consensus(nj_tree,nj_func,data,iters=args.bootstrap)
+        bs_tree = ape_bootstrap(nj_tree,nj_func,data,iters=args.bootstrap)
     else:
-        cons_tree = ape_consensus(nj_tree,nj_func,data)
+        bs_tree = ape_bootstrap(nj_tree,nj_func,data)
     
     # Generate report, trees, and Newick strings
     if args.relatives_out is not None:
@@ -170,19 +220,24 @@
     if args.newick_out is not None:
         newick = open(args.newick_out,'w+')
     else:
-        newick = open(args.multi_fasta+'-newick.txt','w+')
+        newick = open(args.multi_fasta+'-newick.nwk','w+')
     if args.trees_out is not None:
         tree_plot_file = open(args.trees_out, 'w+')
     else:
-        tree_plot_file = open(args.multi_fasta+'-tree-ascii.txt', 'w+')
+        #tree_plot_file = open(args.multi_fasta+'-tree-ascii.txt', 'w+')
+        tree_plot_file = args.multi_fasta+'-tree.png'
 
-    newick.write('%s\n' % (write_nwk(cons_tree)))
-    relatives.write('#source\tsample\trelatives\n')
-    for node in minor_ids:
-        nj_relatives = [relative for relative in dendro_relatives(cons_tree,node) if relative != node]
-        relatives.write( 'Neighbor_Joining_tree\t%s\t%s\n' % (node,','.join(sorted(nj_relatives))) )
-
-    tree_plot_file.write(dendro_plot(cons_tree,root=root_id))
+    newick.write('%s\n' % (write_nwk(bs_tree)))
+    #tree_plot_file.write(dendro_plot(bs_tree,root=root_id))
+    ape_plot_tree(tree_plot_file,bs_tree,root=root_id)
+    
+    if args.major_only:
+        relatives.write('Major allele only mode cannot generate a report')
+    else:
+        relatives.write('#source\tsample\trelatives\n')
+        for node in minor_ids:
+            nj_relatives = [relative for relative in dendro_relatives(bs_tree,node) if relative != node]
+            relatives.write( 'Neighbor_Joining_tree\t%s\t%s\n' % (node,','.join(sorted(nj_relatives))) )
 
     newick.close()
     relatives.close()