annotate phylorelatives.py @ 39:369f0b641498 draft

Updated script comments
author boris
date Tue, 25 Jun 2013 00:41:54 -0400
parents 0f66636a3f88
children dd39849c2ff1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
1 #!/usr/bin/env python
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
2 #
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
3 #Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
4 #usage: phylorelatives.py [-h] [-i FASTA] [-b INT] [-p] [-r FASTA] [-j]
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
5 #
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
6 #Constructs relatedness of a set of sequences based on the pairwise proportion
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
7 #of different sites. It reports the test sequences relatives, NJ tree plot and
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
8 #Newick string. One or more test sequences are accepted as long as their name
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
9 #includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor). IMPORTANT:
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
10 #Sequences must have the same length!
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
11 #
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
12 #optional arguments:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
13 # -h, --help show this help message and exit
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
14 # -i FASTA, --input FASTA
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
15 # This option can be specified multiple times. Sequences
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
16 # will be added to "multi-fasta.fa". (e.g. -i major1.fa
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
17 # -i major2.fa -i minor1.fa)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
18 # -b INT, --bootstrap INT
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
19 # Change number of replicas. 0 to deactivate. (Default:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
20 # 1000)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
21 # -p, --pairwise Use pairwise deletion of gaps/missing data. (Default:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
22 # Complete deletion)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
23 # -r FASTA, --root FASTA
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
24 # Root trees using FASTA sequence as outgroup. (Default:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
25 # Display unrooted trees)
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
26 # -j, --major-only In major-only mode no minor allele sequence is
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
27 # required and each sequence is treated as a major
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
28 # allele sequence (Default: Require minor allele
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
29 # sequences)
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
30
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
31 import sys
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
32 import argparse
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
33 import array
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
34 import dendropy
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
35 import rpy2.rinterface
25
29ca7d28f6c7 added --vanilla to rpy2 init options
boris
parents: 23
diff changeset
36 rpy2.rinterface.set_initoptions(('rpy2','--vanilla','--quiet'))
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
37 import rpy2.robjects as robjects
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
38 from rpy2.robjects.packages import importr
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
39
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
40
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
41 def ape_read_dna(infasta):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
42 """Read multi-fasta into phylo object"""
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
43 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
44 data = ape.read_dna(file=infasta,format="fasta")
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
45 return data
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
46
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
47 def ape_nj(data,missing_info_option=0):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
48 """Return ape nj tree"""
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
49 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
50 dist = ape.dist_dna(data,model="raw",pairwise=missing_info_option)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
51 nj_tree = ape.nj(dist)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
52 return nj_tree
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
53
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
54 def ape_bootstrap(tree,tree_function,data,iters=1000):
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
55 """Return bootstrap tree with bootstrap values on nodes"""
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
56 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
57 tree_func = robjects.r(tree_function)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
58 bootstrap = ape.boot_phylo(tree, data, tree_func,
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
59 B=iters, quiet=1, trees=1)
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
60 robjects.r('''
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
61 add_bs_value = function(tree, boot, iters) {
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
62 tree$node.label = ((boot$BP)/iters)*100
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
63 return(tree)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
64 }''')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
65 add_bs_value = robjects.r['add_bs_value']
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
66 bs_tree = add_bs_value(tree, bootstrap, iters)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
67 #bs_trees = bootstrap.rx('trees')[0]
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
68 #bs_tree = ape.consensus(bs_trees,p=0.5)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
69 return bs_tree
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
70
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
71
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
72 def dendro_relatives(tree,minor):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
73 """Return minor allele sequence relatives in tree"""
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
74 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
75 newick = list(ape.write_tree(tree))[0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
76 t = dendropy.Tree.get_from_string(newick,"newick")
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
77 minor_leaf = [node for node in t.leaf_nodes()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
78 if node.get_node_str() == minor][0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
79 parent = minor_leaf.parent_node
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
80 relatives = []
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
81 while len(relatives) == 0:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
82 output = [relative.get_node_str() for relative in parent.leaf_nodes()]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
83 relatives = [relative for relative in output if not (relative.endswith('minor') or relative.endswith('test'))]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
84 parent = parent.parent_node
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
85 return output
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
86
31
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
87
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
88 def dendro_plot(tree, root=False ):
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
89 """Plot tree to file in ascii format"""
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
90 ape = importr('ape')
31
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
91 if root:
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
92 newick = list(ape.write_tree(ape.root(tree,root)))[0]
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
93 else:
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
94 newick = list(ape.write_tree(tree))[0]
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
95 t = dendropy.Tree.get_from_string(newick,"newick")
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
96 ascii_tree = t.as_ascii_plot()
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
97 return ascii_tree
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
98
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
99
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
100 def ape_plot_tree(outfile, tree1, tree2=None, root=False):
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
101 """Plot tree to png file"""
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
102 ape = importr('ape')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
103 graphics = importr('graphics')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
104 grdevices = importr('grDevices')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
105 if tree2 is None:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
106 grdevices.png(file=outfile, width=1024, height=768)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
107 if root:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
108 ape.plot_phylo(ape.root(tree1,root),use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
109 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
110 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
111 else:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
112 ape.plot_phylo(tree1,use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
113 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
114 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
115 #graphics.title(main='Neighbor Joining')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
116 grdevices.dev_off()
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
117 elif tree2 is not None:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
118 grdevices.png(file=outfile, width=1024, height=768)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
119 graphics.par(mfcol=array.array('i',[1,2]))
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
120 if root:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
121 ape.plot_phylo(ape.root(tree1,root),use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
122 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
123 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
124 else:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
125 ape.plot_phylo(tree1,use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
126 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
127 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
128 graphics.title(main='Neighbor Joining', cex=1.5, font=2)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
129 if root:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
130 ape.plot_phylo(ape.root(tree2,root),use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
131 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
132 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
133 else:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
134 ape.plot_phylo(tree2,use_edge_length=0,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
135 show_node_label=1,edge_width=2, font=2,
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
136 cex=1,underscore=0, no_margin=1)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
137 graphics.title(main='Maximum Parsimony',cex=1.5, font=2)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
138 grdevices.dev_off()
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
139 return
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
140
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
141
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
142 def write_nwk(tree):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
143 "Write proper Newick string"
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
144 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
145 nwk = list(ape.write_tree(tree))[0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
146 return nwk
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
147
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
148 def main():
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
149 # Parse command line options
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
150 parser = argparse.ArgumentParser(description='Constructs relatedness of a set of sequences based on the pairwise proportion of different sites. It reports the test sequences relatives, NJ tree plot and Newick string. One or more test sequences are accepted as long as their name includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor). IMPORTANT: Sequences must have the same length!', epilog='Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)')
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
151 parser.add_argument('-i', '--input', metavar='FASTA', action='append', type=str, help='This option can be specified multiple times. Sequences will be added to "multi-fasta.fa". (e.g. -i major1.fa -i major2.fa -i minor1.fa)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
152 parser.add_argument('-b', '--bootstrap', type=int, metavar='INT',default=1000, help='Change number of replicas. 0 to deactivate. (Default: 1000)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
153 parser.add_argument('-p', '--pairwise', action='store_true', help='Use pairwise deletion of gaps/missing data. (Default: Complete deletion)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
154 parser.add_argument('-r', '--root', type=str, metavar='FASTA', default=False, help='Root trees using FASTA sequence as outgroup. (Default: Display unrooted trees)')
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
155 parser.add_argument('-j', '--major-only', action='store_true', help='In major-only mode no minor allele sequence is required and each sequence is treated as a major allele sequence (Default: Require minor allele sequences)')
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
156 parser.add_argument('--relatives-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
157 parser.add_argument('--newick-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
158 parser.add_argument('--trees-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
159 parser.add_argument('-m', '--multi-fasta', type=str, default='multi-fasta.fa', help=argparse.SUPPRESS)
33
8d663949bdc8 Uploaded
boris
parents: 31
diff changeset
160 args = parser.parse_args()
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
161 #parser.print_help()
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
162
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
163 if args.input:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
164 for fasta in args.input:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
165 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
166 open(fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
167 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
168 sys.exit("\nERROR: Could not open %s\n" % fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
169 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
170 multi = open(args.multi_fasta, 'w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
171 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
172 sys.exit("\nERROR: Could not create %s\n" % args.multi_fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
173
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
174 for fasta in args.input:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
175 for line in list(open(fasta)):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
176 multi.write(line.replace('-', '_'))
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
177
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
178
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
179 if args.root:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
180 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
181 root = list(open(args.root))
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
182 root_id = [line.strip()[1:].replace('-', '_') for line in root if line.strip().startswith(">")][0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
183 for line in root:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
184 multi.write(line.replace('-', '_'))
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
185 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
186 sys.exit("\nERROR: Could not open %s\n" % args.root)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
187 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
188 root_id = args.root
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
189 multi.close()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
190
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
191 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
192 data = ape_read_dna(args.multi_fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
193 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
194 sys.exit("\nERROR: Check existence or proper format of %s\n" % args.multi_fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
195
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
196 # Get sequence ids in alignment and identify test sequences
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
197 fasta_ids = [seqname.strip()[1:] for seqname in list(open(args.multi_fasta)) if seqname.startswith('>')]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
198 minor_ids = [seqname for seqname in fasta_ids if seqname.endswith('minor') or seqname.endswith('test')]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
199
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
200 if len(minor_ids) == 0 and not args.major_only:
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
201 sys.exit("\nERROR: No test sequences found. _minor or _test suffixes are required in the sequence name!\n")
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
202 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
203 pass
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
204
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
205 if args.pairwise:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
206 nj_tree = ape_nj(data,1)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
207 nj_func = 'function (xx) nj(dist.dna(xx, model="raw", pair=1))'
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
208 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
209 nj_tree = ape_nj(data)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
210 nj_func = 'function (xx) nj(dist.dna(xx, model="raw"))'
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
211
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
212 if args.bootstrap == 0:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
213 bs_tree = nj_tree
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
214 elif args.bootstrap !=1000:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
215 bs_tree = ape_bootstrap(nj_tree,nj_func,data,iters=args.bootstrap)
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
216 else:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
217 bs_tree = ape_bootstrap(nj_tree,nj_func,data)
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
218
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
219 # Generate report, trees, and Newick strings
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
220 if args.relatives_out is not None:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
221 relatives = open(args.relatives_out,'w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
222 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
223 relatives = open(args.multi_fasta+'-relatives.tab','w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
224 if args.newick_out is not None:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
225 newick = open(args.newick_out,'w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
226 else:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
227 newick = open(args.multi_fasta+'-newick.nwk','w+')
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
228 if args.trees_out is not None:
37
0f66636a3f88 fixed tree-plot file interpretation
boris
parents: 35
diff changeset
229 tree_plot_file = args.trees_out
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
230 else:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
231 #tree_plot_file = open(args.multi_fasta+'-tree-ascii.txt', 'w+')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
232 tree_plot_file = args.multi_fasta+'-tree.png'
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
233
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
234 newick.write('%s\n' % (write_nwk(bs_tree)))
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
235 #tree_plot_file.write(dendro_plot(bs_tree,root=root_id))
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
236 ape_plot_tree(tree_plot_file,bs_tree,root=root_id)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
237
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
238 if args.major_only:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
239 relatives.write('Major allele only mode cannot generate a report')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
240 else:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
241 relatives.write('#source\tsample\trelatives\n')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
242 for node in minor_ids:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
243 nj_relatives = [relative for relative in dendro_relatives(bs_tree,node) if relative != node]
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
244 relatives.write( 'Neighbor_Joining_tree\t%s\t%s\n' % (node,','.join(sorted(nj_relatives))) )
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
245
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
246 newick.close()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
247 relatives.close()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
248
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
249 if __name__ == "__main__": main()