annotate phylorelatives.py @ 35:015d1fb47ec4 draft

Updated script with png plot and --major-only option
author boris
date Fri, 14 Jun 2013 13:29:16 -0400
parents 8d663949bdc8
children 0f66636a3f88
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
1 #!/usr/bin/env python
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
2 #
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
3 # Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
4 #usage: phylorelatives.py [-h] [-i FASTA] [-b INT] [-p] [-r FASTA]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
5 #
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
6 #Constructs relatedness of a set of sequences based on the pairwise proportion
31
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
7 #of different sites. It reports the test sequences relatives, tree plot and
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
8 #tree in Newick format. One or more test sequences are accepted as long as
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
9 #their name includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor).
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
10 #IMPORTANT: Sequences must have the same length!
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
11 #
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
12 #optional arguments:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
13 # -h, --help show this help message and exit
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
14 # -i FASTA, --input FASTA
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
15 # This option can be specified multiple times. Sequences
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
16 # will be added to "multi-fasta.fa". (e.g. -i major1.fa
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
17 # -i major2.fa -i minor1.fa)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
18 # -b INT, --bootstrap INT
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
19 # Change number of replicas. 0 to deactivate. (Default:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
20 # 1000)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
21 # -p, --pairwise Use pairwise deletion of gaps/missing data. (Default:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
22 # Complete deletion)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
23 # -r FASTA, --root FASTA
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
24 # Root trees using FASTA sequence as outgroup. (Default:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
25 # Display unrooted trees)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
26
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
27 import sys
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
28 import argparse
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
29 import array
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
30 import dendropy
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
31 import rpy2.rinterface
25
29ca7d28f6c7 added --vanilla to rpy2 init options
boris
parents: 23
diff changeset
32 rpy2.rinterface.set_initoptions(('rpy2','--vanilla','--quiet'))
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
33 import rpy2.robjects as robjects
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
34 from rpy2.robjects.packages import importr
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
35
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
36
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
37 def ape_read_dna(infasta):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
38 """Read multi-fasta into phylo object"""
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
39 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
40 data = ape.read_dna(file=infasta,format="fasta")
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
41 return data
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
42
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
43 def ape_nj(data,missing_info_option=0):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
44 """Return ape nj tree"""
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
45 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
46 dist = ape.dist_dna(data,model="raw",pairwise=missing_info_option)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
47 nj_tree = ape.nj(dist)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
48 return nj_tree
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
49
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
50 def ape_bootstrap(tree,tree_function,data,iters=1000):
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
51 """Return bootstrap tree with bootstrap values on nodes"""
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
52 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
53 tree_func = robjects.r(tree_function)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
54 bootstrap = ape.boot_phylo(tree, data, tree_func,
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
55 B=iters, quiet=1, trees=1)
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
56 robjects.r('''
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
57 add_bs_value = function(tree, boot, iters) {
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
58 tree$node.label = ((boot$BP)/iters)*100
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
59 return(tree)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
60 }''')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
61 add_bs_value = robjects.r['add_bs_value']
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
62 bs_tree = add_bs_value(tree, bootstrap, iters)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
63 #bs_trees = bootstrap.rx('trees')[0]
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
64 #bs_tree = ape.consensus(bs_trees,p=0.5)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
65 return bs_tree
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
66
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
67
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
68 def dendro_relatives(tree,minor):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
69 """Return minor allele sequence relatives in tree"""
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
70 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
71 newick = list(ape.write_tree(tree))[0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
72 t = dendropy.Tree.get_from_string(newick,"newick")
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
73 minor_leaf = [node for node in t.leaf_nodes()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
74 if node.get_node_str() == minor][0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
75 parent = minor_leaf.parent_node
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
76 relatives = []
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
77 while len(relatives) == 0:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
78 output = [relative.get_node_str() for relative in parent.leaf_nodes()]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
79 relatives = [relative for relative in output if not (relative.endswith('minor') or relative.endswith('test'))]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
80 parent = parent.parent_node
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
81 return output
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
82
31
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
83
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
84 def dendro_plot(tree, root=False ):
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
85 """Plot tree to file in ascii format"""
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
86 ape = importr('ape')
31
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
87 if root:
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
88 newick = list(ape.write_tree(ape.root(tree,root)))[0]
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
89 else:
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
90 newick = list(ape.write_tree(tree))[0]
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
91 t = dendropy.Tree.get_from_string(newick,"newick")
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
92 ascii_tree = t.as_ascii_plot()
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
93 return ascii_tree
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
94
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
95
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
96 def ape_plot_tree(outfile, tree1, tree2=None, root=False):
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
97 """Plot tree to png file"""
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
98 ape = importr('ape')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
99 graphics = importr('graphics')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
100 grdevices = importr('grDevices')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
101 if tree2 is None:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
102 grdevices.png(file=outfile, width=1024, height=768)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
103 if root:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
104 ape.plot_phylo(ape.root(tree1,root),use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
105 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
106 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
107 else:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
108 ape.plot_phylo(tree1,use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
109 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
110 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
111 #graphics.title(main='Neighbor Joining')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
112 grdevices.dev_off()
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
113 elif tree2 is not None:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
114 grdevices.png(file=outfile, width=1024, height=768)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
115 graphics.par(mfcol=array.array('i',[1,2]))
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
116 if root:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
117 ape.plot_phylo(ape.root(tree1,root),use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
118 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
119 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
120 else:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
121 ape.plot_phylo(tree1,use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
122 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
123 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
124 graphics.title(main='Neighbor Joining', cex=1.5, font=2)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
125 if root:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
126 ape.plot_phylo(ape.root(tree2,root),use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
127 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
128 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
129 else:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
130 ape.plot_phylo(tree2,use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
131 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
132 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
133 graphics.title(main='Maximum Parsimony',cex=1.5, font=2)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
134 grdevices.dev_off()
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
135 return
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
136
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
137
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
138 def write_nwk(tree):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
139 "Write proper Newick string"
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
140 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
141 nwk = list(ape.write_tree(tree))[0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
142 return nwk
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
143
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
144 def main():
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
145 # Parse command line options
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
146 parser = argparse.ArgumentParser(description='Constructs relatedness of a set of sequences based on the pairwise proportion of different sites. It reports the test sequences relatives, tree plots and trees in Newick format. One or more test sequences are accepted as long as their name includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor). IMPORTANT: Sequences must have the same length!', epilog='Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
147 parser.add_argument('-i', '--input', metavar='FASTA', action='append', type=str, help='This option can be specified multiple times. Sequences will be added to "multi-fasta.fa". (e.g. -i major1.fa -i major2.fa -i minor1.fa)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
148 parser.add_argument('-b', '--bootstrap', type=int, metavar='INT',default=1000, help='Change number of replicas. 0 to deactivate. (Default: 1000)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
149 parser.add_argument('-p', '--pairwise', action='store_true', help='Use pairwise deletion of gaps/missing data. (Default: Complete deletion)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
150 parser.add_argument('-r', '--root', type=str, metavar='FASTA', default=False, help='Root trees using FASTA sequence as outgroup. (Default: Display unrooted trees)')
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
151 parser.add_argument('-j', '--major-only', action='store_true', help='In major-only mode no minor allele sequence is required and only a NJ major alleles tree is generated (Default: Require minor allele sequences)')
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
152 parser.add_argument('--relatives-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
153 parser.add_argument('--newick-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
154 parser.add_argument('--trees-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
155 parser.add_argument('-m', '--multi-fasta', type=str, default='multi-fasta.fa', help=argparse.SUPPRESS)
33
8d663949bdc8 Uploaded
boris
parents: 31
diff changeset
156 args = parser.parse_args()
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
157 #parser.print_help()
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
158
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
159 if args.input:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
160 for fasta in args.input:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
161 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
162 open(fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
163 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
164 sys.exit("\nERROR: Could not open %s\n" % fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
165 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
166 multi = open(args.multi_fasta, 'w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
167 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
168 sys.exit("\nERROR: Could not create %s\n" % args.multi_fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
169
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
170 for fasta in args.input:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
171 for line in list(open(fasta)):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
172 multi.write(line.replace('-', '_'))
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
173
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
174
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
175 if args.root:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
176 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
177 root = list(open(args.root))
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
178 root_id = [line.strip()[1:].replace('-', '_') for line in root if line.strip().startswith(">")][0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
179 for line in root:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
180 multi.write(line.replace('-', '_'))
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
181 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
182 sys.exit("\nERROR: Could not open %s\n" % args.root)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
183 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
184 root_id = args.root
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
185 multi.close()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
186
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
187 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
188 data = ape_read_dna(args.multi_fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
189 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
190 sys.exit("\nERROR: Check existence or proper format of %s\n" % args.multi_fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
191
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
192 # Get sequence ids in alignment and identify test sequences
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
193 fasta_ids = [seqname.strip()[1:] for seqname in list(open(args.multi_fasta)) if seqname.startswith('>')]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
194 minor_ids = [seqname for seqname in fasta_ids if seqname.endswith('minor') or seqname.endswith('test')]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
195
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
196 if len(minor_ids) == 0 and not args.major_only:
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
197 sys.exit("\nERROR: No test sequences found. _minor or _test suffixes are required in the sequence name!\n")
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
198 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
199 pass
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
200
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
201 if args.pairwise:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
202 nj_tree = ape_nj(data,1)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
203 nj_func = 'function (xx) nj(dist.dna(xx, model="raw", pair=1))'
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
204 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
205 nj_tree = ape_nj(data)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
206 nj_func = 'function (xx) nj(dist.dna(xx, model="raw"))'
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
207
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
208 if args.bootstrap == 0:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
209 bs_tree = nj_tree
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
210 elif args.bootstrap !=1000:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
211 bs_tree = ape_bootstrap(nj_tree,nj_func,data,iters=args.bootstrap)
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
212 else:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
213 bs_tree = ape_bootstrap(nj_tree,nj_func,data)
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
214
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
215 # Generate report, trees, and Newick strings
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
216 if args.relatives_out is not None:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
217 relatives = open(args.relatives_out,'w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
218 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
219 relatives = open(args.multi_fasta+'-relatives.tab','w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
220 if args.newick_out is not None:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
221 newick = open(args.newick_out,'w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
222 else:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
223 newick = open(args.multi_fasta+'-newick.nwk','w+')
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
224 if args.trees_out is not None:
31
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
225 tree_plot_file = open(args.trees_out, 'w+')
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
226 else:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
227 #tree_plot_file = open(args.multi_fasta+'-tree-ascii.txt', 'w+')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
228 tree_plot_file = args.multi_fasta+'-tree.png'
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
229
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
230 newick.write('%s\n' % (write_nwk(bs_tree)))
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
231 #tree_plot_file.write(dendro_plot(bs_tree,root=root_id))
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
232 ape_plot_tree(tree_plot_file,bs_tree,root=root_id)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
233
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
234 if args.major_only:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
235 relatives.write('Major allele only mode cannot generate a report')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
236 else:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
237 relatives.write('#source\tsample\trelatives\n')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
238 for node in minor_ids:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
239 nj_relatives = [relative for relative in dendro_relatives(bs_tree,node) if relative != node]
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
240 relatives.write( 'Neighbor_Joining_tree\t%s\t%s\n' % (node,','.join(sorted(nj_relatives))) )
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
241
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
242 newick.close()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
243 relatives.close()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
244
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
245 if __name__ == "__main__": main()