comparison phylorelatives.py @ 35:015d1fb47ec4 draft

Updated script with png plot and --major-only option
author boris
date Fri, 14 Jun 2013 13:29:16 -0400
parents 8d663949bdc8
children 0f66636a3f88
comparison
equal deleted inserted replaced
34:6935429eef05 35:015d1fb47ec4
45 ape = importr('ape') 45 ape = importr('ape')
46 dist = ape.dist_dna(data,model="raw",pairwise=missing_info_option) 46 dist = ape.dist_dna(data,model="raw",pairwise=missing_info_option)
47 nj_tree = ape.nj(dist) 47 nj_tree = ape.nj(dist)
48 return nj_tree 48 return nj_tree
49 49
50 def ape_consensus(tree,tree_function,data,iters=1000): 50 def ape_bootstrap(tree,tree_function,data,iters=1000):
51 """Return majority rule consensus tree""" 51 """Return bootstrap tree with bootstrap values on nodes"""
52 ape = importr('ape') 52 ape = importr('ape')
53 tree_func = robjects.r(tree_function) 53 tree_func = robjects.r(tree_function)
54 bootstrap = ape.boot_phylo(tree, data, tree_func, 54 bootstrap = ape.boot_phylo(tree, data, tree_func,
55 B=iters, quiet=1, trees=1) 55 B=iters, quiet=1, trees=1)
56 bs_trees = bootstrap.rx('trees')[0] 56 robjects.r('''
57 cons_tree = ape.consensus(bs_trees,p=0.5) 57 add_bs_value = function(tree, boot, iters) {
58 return cons_tree 58 tree$node.label = ((boot$BP)/iters)*100
59 return(tree)
60 }''')
61 add_bs_value = robjects.r['add_bs_value']
62 bs_tree = add_bs_value(tree, bootstrap, iters)
63 #bs_trees = bootstrap.rx('trees')[0]
64 #bs_tree = ape.consensus(bs_trees,p=0.5)
65 return bs_tree
59 66
60 67
61 def dendro_relatives(tree,minor): 68 def dendro_relatives(tree,minor):
62 """Return minor allele sequence relatives in tree""" 69 """Return minor allele sequence relatives in tree"""
63 ape = importr('ape') 70 ape = importr('ape')
84 t = dendropy.Tree.get_from_string(newick,"newick") 91 t = dendropy.Tree.get_from_string(newick,"newick")
85 ascii_tree = t.as_ascii_plot() 92 ascii_tree = t.as_ascii_plot()
86 return ascii_tree 93 return ascii_tree
87 94
88 95
96 def ape_plot_tree(outfile, tree1, tree2=None, root=False):
97 """Plot tree to png file"""
98 ape = importr('ape')
99 graphics = importr('graphics')
100 grdevices = importr('grDevices')
101 if tree2 is None:
102 grdevices.png(file=outfile, width=1024, height=768)
103 if root:
104 ape.plot_phylo(ape.root(tree1,root),use_edge_length=0,
105 show_node_label=1,edge_width=2, font=2,
106 cex=1,underscore=0, no_margin=1)
107 else:
108 ape.plot_phylo(tree1,use_edge_length=0,
109 show_node_label=1,edge_width=2, font=2,
110 cex=1,underscore=0, no_margin=1)
111 #graphics.title(main='Neighbor Joining')
112 grdevices.dev_off()
113 elif tree2 is not None:
114 grdevices.png(file=outfile, width=1024, height=768)
115 graphics.par(mfcol=array.array('i',[1,2]))
116 if root:
117 ape.plot_phylo(ape.root(tree1,root),use_edge_length=0,
118 show_node_label=1,edge_width=2, font=2,
119 cex=1,underscore=0, no_margin=1)
120 else:
121 ape.plot_phylo(tree1,use_edge_length=0,
122 show_node_label=1,edge_width=2, font=2,
123 cex=1,underscore=0, no_margin=1)
124 graphics.title(main='Neighbor Joining', cex=1.5, font=2)
125 if root:
126 ape.plot_phylo(ape.root(tree2,root),use_edge_length=0,
127 show_node_label=1,edge_width=2, font=2,
128 cex=1,underscore=0, no_margin=1)
129 else:
130 ape.plot_phylo(tree2,use_edge_length=0,
131 show_node_label=1,edge_width=2, font=2,
132 cex=1,underscore=0, no_margin=1)
133 graphics.title(main='Maximum Parsimony',cex=1.5, font=2)
134 grdevices.dev_off()
135 return
136
137
89 def write_nwk(tree): 138 def write_nwk(tree):
90 "Write proper Newick string" 139 "Write proper Newick string"
91 ape = importr('ape') 140 ape = importr('ape')
92 nwk = list(ape.write_tree(tree))[0] 141 nwk = list(ape.write_tree(tree))[0]
93 return nwk 142 return nwk
97 parser = argparse.ArgumentParser(description='Constructs relatedness of a set of sequences based on the pairwise proportion of different sites. It reports the test sequences relatives, tree plots and trees in Newick format. One or more test sequences are accepted as long as their name includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor). IMPORTANT: Sequences must have the same length!', epilog='Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)') 146 parser = argparse.ArgumentParser(description='Constructs relatedness of a set of sequences based on the pairwise proportion of different sites. It reports the test sequences relatives, tree plots and trees in Newick format. One or more test sequences are accepted as long as their name includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor). IMPORTANT: Sequences must have the same length!', epilog='Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)')
98 parser.add_argument('-i', '--input', metavar='FASTA', action='append', type=str, help='This option can be specified multiple times. Sequences will be added to "multi-fasta.fa". (e.g. -i major1.fa -i major2.fa -i minor1.fa)') 147 parser.add_argument('-i', '--input', metavar='FASTA', action='append', type=str, help='This option can be specified multiple times. Sequences will be added to "multi-fasta.fa". (e.g. -i major1.fa -i major2.fa -i minor1.fa)')
99 parser.add_argument('-b', '--bootstrap', type=int, metavar='INT',default=1000, help='Change number of replicas. 0 to deactivate. (Default: 1000)') 148 parser.add_argument('-b', '--bootstrap', type=int, metavar='INT',default=1000, help='Change number of replicas. 0 to deactivate. (Default: 1000)')
100 parser.add_argument('-p', '--pairwise', action='store_true', help='Use pairwise deletion of gaps/missing data. (Default: Complete deletion)') 149 parser.add_argument('-p', '--pairwise', action='store_true', help='Use pairwise deletion of gaps/missing data. (Default: Complete deletion)')
101 parser.add_argument('-r', '--root', type=str, metavar='FASTA', default=False, help='Root trees using FASTA sequence as outgroup. (Default: Display unrooted trees)') 150 parser.add_argument('-r', '--root', type=str, metavar='FASTA', default=False, help='Root trees using FASTA sequence as outgroup. (Default: Display unrooted trees)')
151 parser.add_argument('-j', '--major-only', action='store_true', help='In major-only mode no minor allele sequence is required and only a NJ major alleles tree is generated (Default: Require minor allele sequences)')
102 parser.add_argument('--relatives-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS) 152 parser.add_argument('--relatives-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
103 parser.add_argument('--newick-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS) 153 parser.add_argument('--newick-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
104 parser.add_argument('--trees-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS) 154 parser.add_argument('--trees-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
105 parser.add_argument('-m', '--multi-fasta', type=str, default='multi-fasta.fa', help=argparse.SUPPRESS) 155 parser.add_argument('-m', '--multi-fasta', type=str, default='multi-fasta.fa', help=argparse.SUPPRESS)
106 args = parser.parse_args() 156 args = parser.parse_args()
107 157 #parser.print_help()
108 158
109 if args.input: 159 if args.input:
110 for fasta in args.input: 160 for fasta in args.input:
111 try: 161 try:
112 open(fasta) 162 open(fasta)
141 191
142 # Get sequence ids in alignment and identify test sequences 192 # Get sequence ids in alignment and identify test sequences
143 fasta_ids = [seqname.strip()[1:] for seqname in list(open(args.multi_fasta)) if seqname.startswith('>')] 193 fasta_ids = [seqname.strip()[1:] for seqname in list(open(args.multi_fasta)) if seqname.startswith('>')]
144 minor_ids = [seqname for seqname in fasta_ids if seqname.endswith('minor') or seqname.endswith('test')] 194 minor_ids = [seqname for seqname in fasta_ids if seqname.endswith('minor') or seqname.endswith('test')]
145 195
146 if len(minor_ids) == 0: 196 if len(minor_ids) == 0 and not args.major_only:
147 sys.exit("\nERROR: No test sequences found. _minor or _test suffixes are required in the sequence name!\n") 197 sys.exit("\nERROR: No test sequences found. _minor or _test suffixes are required in the sequence name!\n")
148 else: 198 else:
149 pass 199 pass
150 200
151 if args.pairwise: 201 if args.pairwise:
154 else: 204 else:
155 nj_tree = ape_nj(data) 205 nj_tree = ape_nj(data)
156 nj_func = 'function (xx) nj(dist.dna(xx, model="raw"))' 206 nj_func = 'function (xx) nj(dist.dna(xx, model="raw"))'
157 207
158 if args.bootstrap == 0: 208 if args.bootstrap == 0:
159 cons_tree = nj_tree 209 bs_tree = nj_tree
160 elif args.bootstrap !=1000: 210 elif args.bootstrap !=1000:
161 cons_tree = ape_consensus(nj_tree,nj_func,data,iters=args.bootstrap) 211 bs_tree = ape_bootstrap(nj_tree,nj_func,data,iters=args.bootstrap)
162 else: 212 else:
163 cons_tree = ape_consensus(nj_tree,nj_func,data) 213 bs_tree = ape_bootstrap(nj_tree,nj_func,data)
164 214
165 # Generate report, trees, and Newick strings 215 # Generate report, trees, and Newick strings
166 if args.relatives_out is not None: 216 if args.relatives_out is not None:
167 relatives = open(args.relatives_out,'w+') 217 relatives = open(args.relatives_out,'w+')
168 else: 218 else:
169 relatives = open(args.multi_fasta+'-relatives.tab','w+') 219 relatives = open(args.multi_fasta+'-relatives.tab','w+')
170 if args.newick_out is not None: 220 if args.newick_out is not None:
171 newick = open(args.newick_out,'w+') 221 newick = open(args.newick_out,'w+')
172 else: 222 else:
173 newick = open(args.multi_fasta+'-newick.txt','w+') 223 newick = open(args.multi_fasta+'-newick.nwk','w+')
174 if args.trees_out is not None: 224 if args.trees_out is not None:
175 tree_plot_file = open(args.trees_out, 'w+') 225 tree_plot_file = open(args.trees_out, 'w+')
176 else: 226 else:
177 tree_plot_file = open(args.multi_fasta+'-tree-ascii.txt', 'w+') 227 #tree_plot_file = open(args.multi_fasta+'-tree-ascii.txt', 'w+')
178 228 tree_plot_file = args.multi_fasta+'-tree.png'
179 newick.write('%s\n' % (write_nwk(cons_tree))) 229
180 relatives.write('#source\tsample\trelatives\n') 230 newick.write('%s\n' % (write_nwk(bs_tree)))
181 for node in minor_ids: 231 #tree_plot_file.write(dendro_plot(bs_tree,root=root_id))
182 nj_relatives = [relative for relative in dendro_relatives(cons_tree,node) if relative != node] 232 ape_plot_tree(tree_plot_file,bs_tree,root=root_id)
183 relatives.write( 'Neighbor_Joining_tree\t%s\t%s\n' % (node,','.join(sorted(nj_relatives))) ) 233
184 234 if args.major_only:
185 tree_plot_file.write(dendro_plot(cons_tree,root=root_id)) 235 relatives.write('Major allele only mode cannot generate a report')
236 else:
237 relatives.write('#source\tsample\trelatives\n')
238 for node in minor_ids:
239 nj_relatives = [relative for relative in dendro_relatives(bs_tree,node) if relative != node]
240 relatives.write( 'Neighbor_Joining_tree\t%s\t%s\n' % (node,','.join(sorted(nj_relatives))) )
186 241
187 newick.close() 242 newick.close()
188 relatives.close() 243 relatives.close()
189 244
190 if __name__ == "__main__": main() 245 if __name__ == "__main__": main()