annotate phylorelatives.py @ 42:35741a74d073 draft default tip

type="cairo" added to plotting function
author boris
date Wed, 29 Jan 2014 11:05:27 -0500
parents dd39849c2ff1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
1 #!/usr/bin/env python
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
2 #
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
3 #Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
4 #usage: phylorelatives.py [-h] [-i FASTA] [-b INT] [-p] [-r FASTA] [-j]
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
5 #
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
6 #Constructs relatedness of a set of sequences based on the pairwise proportion
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
7 #of different sites. It reports the test sequences relatives, NJ tree plot and
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
8 #Newick string. One or more test sequences are accepted as long as their name
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
9 #includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor). IMPORTANT:
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
10 #Sequences must have the same length!
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
11 #
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
12 #optional arguments:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
13 # -h, --help show this help message and exit
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
14 # -i FASTA, --input FASTA
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
15 # This option can be specified multiple times. Sequences
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
16 # will be added to "multi-fasta.fa". (e.g. -i major1.fa
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
17 # -i major2.fa -i minor1.fa)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
18 # -b INT, --bootstrap INT
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
19 # Change number of replicas. 0 to deactivate. (Default:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
20 # 1000)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
21 # -p, --pairwise Use pairwise deletion of gaps/missing data. (Default:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
22 # Complete deletion)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
23 # -r FASTA, --root FASTA
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
24 # Root trees using FASTA sequence as outgroup. (Default:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
25 # Display unrooted trees)
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
26 # -j, --major-only In major-only mode no minor allele sequence is
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
27 # required and each sequence is treated as a major
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
28 # allele sequence (Default: Require minor allele
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
29 # sequences)
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
30
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
31 import sys
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
32 import argparse
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
33 import array
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
34 import dendropy
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
35 import rpy2.rinterface
25
29ca7d28f6c7 added --vanilla to rpy2 init options
boris
parents: 23
diff changeset
36 rpy2.rinterface.set_initoptions(('rpy2','--vanilla','--quiet'))
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
37 import rpy2.robjects as robjects
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
38 from rpy2.robjects.packages import importr
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
39
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
40
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
41 def ape_read_dna(infasta):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
42 """Read multi-fasta into phylo object"""
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
43 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
44 data = ape.read_dna(file=infasta,format="fasta")
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
45 return data
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
46
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
47 def ape_nj(data,missing_info_option=0):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
48 """Return ape nj tree"""
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
49 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
50 dist = ape.dist_dna(data,model="raw",pairwise=missing_info_option)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
51 nj_tree = ape.nj(dist)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
52 return nj_tree
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
53
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
54 def ape_bootstrap(tree,tree_function,data,iters=1000):
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
55 """Return bootstrap tree with bootstrap values on nodes"""
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
56 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
57 tree_func = robjects.r(tree_function)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
58 bootstrap = ape.boot_phylo(tree, data, tree_func,
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
59 B=iters, quiet=1, trees=1)
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
60 robjects.r('''
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
61 add_bs_value = function(tree, boot, iters) {
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
62 tree$node.label = ((boot$BP)/iters)*100
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
63 return(tree)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
64 }''')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
65 add_bs_value = robjects.r['add_bs_value']
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
66 bs_tree = add_bs_value(tree, bootstrap, iters)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
67 #bs_trees = bootstrap.rx('trees')[0]
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
68 #bs_tree = ape.consensus(bs_trees,p=0.5)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
69 return bs_tree
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
70
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
71
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
72 def dendro_relatives(tree,minor):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
73 """Return minor allele sequence relatives in tree"""
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
74 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
75 newick = list(ape.write_tree(tree))[0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
76 t = dendropy.Tree.get_from_string(newick,"newick")
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
77 minor_leaf = [node for node in t.leaf_nodes()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
78 if node.get_node_str() == minor][0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
79 parent = minor_leaf.parent_node
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
80 relatives = []
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
81 while len(relatives) == 0:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
82 output = [relative.get_node_str() for relative in parent.leaf_nodes()]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
83 relatives = [relative for relative in output if not (relative.endswith('minor') or relative.endswith('test'))]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
84 parent = parent.parent_node
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
85 return output
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
86
31
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
87
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
88 def dendro_plot(tree, root=False ):
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
89 """Plot tree to file in ascii format"""
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
90 ape = importr('ape')
31
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
91 if root:
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
92 newick = list(ape.write_tree(ape.root(tree,root)))[0]
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
93 else:
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
94 newick = list(ape.write_tree(tree))[0]
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
95 t = dendropy.Tree.get_from_string(newick,"newick")
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
96 ascii_tree = t.as_ascii_plot()
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
97 return ascii_tree
75dcc9910f6d Uploaded
boris
parents: 25
diff changeset
98
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
99
40
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
100 def ape_plot_tree(outfile, tree1, root=False):
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
101 """Plot tree to png file"""
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
102 ape = importr('ape')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
103 graphics = importr('graphics')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
104 grdevices = importr('grDevices')
42
35741a74d073 type="cairo" added to plotting function
boris
parents: 40
diff changeset
105 grdevices.png(file=outfile, width=1024, height=768,type="cairo")
40
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
106 if root:
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
107 tree = ape.root(tree1,root)
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
108 labels = list(tree.rx("tip.label")[0])
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
109 colors = robjects.StrVector(["red" if tip.endswith("_minor") else "black" for tip in labels])
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
110 ape.plot_phylo(tree,tip_color=colors,use_edge_length=0,
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
111 show_node_label=1,edge_width=2, font=2,
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
112 cex=1,underscore=0, no_margin=1)
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
113 else:
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
114 tree = tree1
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
115 labels = list(tree.rx("tip.label")[0])
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
116 colors = robjects.StrVector(["red" if tip.endswith("_minor") else "black" for tip in labels])
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
117 ape.plot_phylo(tree,tip_color=colors,use_edge_length=0,
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
118 show_node_label=1,edge_width=2, font=2,
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
119 cex=1,underscore=0, no_margin=1)
dd39849c2ff1 plot now highlights minor allele sequences in tree
boris
parents: 39
diff changeset
120 #graphics.title(main='Neighbor Joining')
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
121 return
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
122
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
123
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
124 def write_nwk(tree):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
125 "Write proper Newick string"
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
126 ape = importr('ape')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
127 nwk = list(ape.write_tree(tree))[0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
128 return nwk
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
129
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
130 def main():
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
131 # Parse command line options
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
132 parser = argparse.ArgumentParser(description='Constructs relatedness of a set of sequences based on the pairwise proportion of different sites. It reports the test sequences relatives, NJ tree plot and Newick string. One or more test sequences are accepted as long as their name includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor). IMPORTANT: Sequences must have the same length!', epilog='Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)')
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
133 parser.add_argument('-i', '--input', metavar='FASTA', action='append', type=str, help='This option can be specified multiple times. Sequences will be added to "multi-fasta.fa". (e.g. -i major1.fa -i major2.fa -i minor1.fa)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
134 parser.add_argument('-b', '--bootstrap', type=int, metavar='INT',default=1000, help='Change number of replicas. 0 to deactivate. (Default: 1000)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
135 parser.add_argument('-p', '--pairwise', action='store_true', help='Use pairwise deletion of gaps/missing data. (Default: Complete deletion)')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
136 parser.add_argument('-r', '--root', type=str, metavar='FASTA', default=False, help='Root trees using FASTA sequence as outgroup. (Default: Display unrooted trees)')
39
369f0b641498 Updated script comments
boris
parents: 37
diff changeset
137 parser.add_argument('-j', '--major-only', action='store_true', help='In major-only mode no minor allele sequence is required and each sequence is treated as a major allele sequence (Default: Require minor allele sequences)')
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
138 parser.add_argument('--relatives-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
139 parser.add_argument('--newick-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
140 parser.add_argument('--trees-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
141 parser.add_argument('-m', '--multi-fasta', type=str, default='multi-fasta.fa', help=argparse.SUPPRESS)
33
8d663949bdc8 Uploaded
boris
parents: 31
diff changeset
142 args = parser.parse_args()
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
143 #parser.print_help()
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
144
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
145 if args.input:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
146 for fasta in args.input:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
147 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
148 open(fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
149 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
150 sys.exit("\nERROR: Could not open %s\n" % fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
151 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
152 multi = open(args.multi_fasta, 'w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
153 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
154 sys.exit("\nERROR: Could not create %s\n" % args.multi_fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
155
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
156 for fasta in args.input:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
157 for line in list(open(fasta)):
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
158 multi.write(line.replace('-', '_'))
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
159
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
160
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
161 if args.root:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
162 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
163 root = list(open(args.root))
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
164 root_id = [line.strip()[1:].replace('-', '_') for line in root if line.strip().startswith(">")][0]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
165 for line in root:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
166 multi.write(line.replace('-', '_'))
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
167 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
168 sys.exit("\nERROR: Could not open %s\n" % args.root)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
169 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
170 root_id = args.root
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
171 multi.close()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
172
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
173 try:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
174 data = ape_read_dna(args.multi_fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
175 except:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
176 sys.exit("\nERROR: Check existence or proper format of %s\n" % args.multi_fasta)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
177
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
178 # Get sequence ids in alignment and identify test sequences
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
179 fasta_ids = [seqname.strip()[1:] for seqname in list(open(args.multi_fasta)) if seqname.startswith('>')]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
180 minor_ids = [seqname for seqname in fasta_ids if seqname.endswith('minor') or seqname.endswith('test')]
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
181
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
182 if len(minor_ids) == 0 and not args.major_only:
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
183 sys.exit("\nERROR: No test sequences found. _minor or _test suffixes are required in the sequence name!\n")
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
184 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
185 pass
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
186
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
187 if args.pairwise:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
188 nj_tree = ape_nj(data,1)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
189 nj_func = 'function (xx) nj(dist.dna(xx, model="raw", pair=1))'
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
190 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
191 nj_tree = ape_nj(data)
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
192 nj_func = 'function (xx) nj(dist.dna(xx, model="raw"))'
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
193
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
194 if args.bootstrap == 0:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
195 bs_tree = nj_tree
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
196 elif args.bootstrap !=1000:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
197 bs_tree = ape_bootstrap(nj_tree,nj_func,data,iters=args.bootstrap)
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
198 else:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
199 bs_tree = ape_bootstrap(nj_tree,nj_func,data)
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
200
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
201 # Generate report, trees, and Newick strings
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
202 if args.relatives_out is not None:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
203 relatives = open(args.relatives_out,'w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
204 else:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
205 relatives = open(args.multi_fasta+'-relatives.tab','w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
206 if args.newick_out is not None:
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
207 newick = open(args.newick_out,'w+')
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
208 else:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
209 newick = open(args.multi_fasta+'-newick.nwk','w+')
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
210 if args.trees_out is not None:
37
0f66636a3f88 fixed tree-plot file interpretation
boris
parents: 35
diff changeset
211 tree_plot_file = args.trees_out
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
212 else:
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
213 #tree_plot_file = open(args.multi_fasta+'-tree-ascii.txt', 'w+')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
214 tree_plot_file = args.multi_fasta+'-tree.png'
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
215
35
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
216 newick.write('%s\n' % (write_nwk(bs_tree)))
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
217 #tree_plot_file.write(dendro_plot(bs_tree,root=root_id))
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
218 ape_plot_tree(tree_plot_file,bs_tree,root=root_id)
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
219
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
220 if args.major_only:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
221 relatives.write('Major allele only mode cannot generate a report')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
222 else:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
223 relatives.write('#source\tsample\trelatives\n')
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
224 for node in minor_ids:
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
225 nj_relatives = [relative for relative in dendro_relatives(bs_tree,node) if relative != node]
015d1fb47ec4 Updated script with png plot and --major-only option
boris
parents: 33
diff changeset
226 relatives.write( 'Neighbor_Joining_tree\t%s\t%s\n' % (node,','.join(sorted(nj_relatives))) )
0
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
227
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
228 newick.close()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
229 relatives.close()
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
230
5a077cdba2f6 Uploaded phylorelatives.py
boris
parents:
diff changeset
231 if __name__ == "__main__": main()