annotate phylogenetic_tree.py @ 0:99a67ac88802 default tip

Uploaded
author rico
date Thu, 05 Apr 2012 14:22:50 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
99a67ac88802 Uploaded
rico
parents:
diff changeset
1 #!/usr/bin/env python
99a67ac88802 Uploaded
rico
parents:
diff changeset
2
99a67ac88802 Uploaded
rico
parents:
diff changeset
3 import os
99a67ac88802 Uploaded
rico
parents:
diff changeset
4 import errno
99a67ac88802 Uploaded
rico
parents:
diff changeset
5 import sys
99a67ac88802 Uploaded
rico
parents:
diff changeset
6 import subprocess
99a67ac88802 Uploaded
rico
parents:
diff changeset
7 import shutil
99a67ac88802 Uploaded
rico
parents:
diff changeset
8 from Population import Population
99a67ac88802 Uploaded
rico
parents:
diff changeset
9 import gd_composite
99a67ac88802 Uploaded
rico
parents:
diff changeset
10
99a67ac88802 Uploaded
rico
parents:
diff changeset
11 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
12
99a67ac88802 Uploaded
rico
parents:
diff changeset
13 def mkdir_p(path):
99a67ac88802 Uploaded
rico
parents:
diff changeset
14 try:
99a67ac88802 Uploaded
rico
parents:
diff changeset
15 os.makedirs(path)
99a67ac88802 Uploaded
rico
parents:
diff changeset
16 except OSError, e:
99a67ac88802 Uploaded
rico
parents:
diff changeset
17 if e.errno <> errno.EEXIST:
99a67ac88802 Uploaded
rico
parents:
diff changeset
18 raise
99a67ac88802 Uploaded
rico
parents:
diff changeset
19
99a67ac88802 Uploaded
rico
parents:
diff changeset
20 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
21
99a67ac88802 Uploaded
rico
parents:
diff changeset
22 if len(sys.argv) < 11:
99a67ac88802 Uploaded
rico
parents:
diff changeset
23 print >> sys.stderr, "Usage"
99a67ac88802 Uploaded
rico
parents:
diff changeset
24 sys.exit(1)
99a67ac88802 Uploaded
rico
parents:
diff changeset
25
99a67ac88802 Uploaded
rico
parents:
diff changeset
26 input, p1_input, output, extra_files_path, minimum_coverage, minimum_quality, dbkey, data_source, draw_tree_options = sys.argv[1:10]
99a67ac88802 Uploaded
rico
parents:
diff changeset
27
99a67ac88802 Uploaded
rico
parents:
diff changeset
28 individual_metadata = sys.argv[10:]
99a67ac88802 Uploaded
rico
parents:
diff changeset
29
99a67ac88802 Uploaded
rico
parents:
diff changeset
30 # note: TEST THIS
99a67ac88802 Uploaded
rico
parents:
diff changeset
31 if dbkey in ['', '?', 'None']:
99a67ac88802 Uploaded
rico
parents:
diff changeset
32 dbkey = 'none'
99a67ac88802 Uploaded
rico
parents:
diff changeset
33
99a67ac88802 Uploaded
rico
parents:
diff changeset
34 p_total = Population()
99a67ac88802 Uploaded
rico
parents:
diff changeset
35 p_total.from_tag_list(individual_metadata)
99a67ac88802 Uploaded
rico
parents:
diff changeset
36
99a67ac88802 Uploaded
rico
parents:
diff changeset
37
99a67ac88802 Uploaded
rico
parents:
diff changeset
38 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
39
99a67ac88802 Uploaded
rico
parents:
diff changeset
40 mkdir_p(extra_files_path)
99a67ac88802 Uploaded
rico
parents:
diff changeset
41
99a67ac88802 Uploaded
rico
parents:
diff changeset
42 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
43
99a67ac88802 Uploaded
rico
parents:
diff changeset
44 def run_program(prog, args, ofh):
99a67ac88802 Uploaded
rico
parents:
diff changeset
45 #print "args: ", ' '.join(args)
99a67ac88802 Uploaded
rico
parents:
diff changeset
46 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=ofh, stderr=subprocess.PIPE)
99a67ac88802 Uploaded
rico
parents:
diff changeset
47 (stdoutdata, stderrdata) = p.communicate()
99a67ac88802 Uploaded
rico
parents:
diff changeset
48 rc = p.returncode
99a67ac88802 Uploaded
rico
parents:
diff changeset
49 ofh.close()
99a67ac88802 Uploaded
rico
parents:
diff changeset
50
99a67ac88802 Uploaded
rico
parents:
diff changeset
51 if rc != 0:
99a67ac88802 Uploaded
rico
parents:
diff changeset
52 #print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
99a67ac88802 Uploaded
rico
parents:
diff changeset
53 print >> sys.stderr, stderrdata
99a67ac88802 Uploaded
rico
parents:
diff changeset
54 sys.exit(1)
99a67ac88802 Uploaded
rico
parents:
diff changeset
55
99a67ac88802 Uploaded
rico
parents:
diff changeset
56 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
57
99a67ac88802 Uploaded
rico
parents:
diff changeset
58 phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip')
99a67ac88802 Uploaded
rico
parents:
diff changeset
59 newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick')
99a67ac88802 Uploaded
rico
parents:
diff changeset
60 ps_outfile = 'tree.ps'
99a67ac88802 Uploaded
rico
parents:
diff changeset
61 pdf_outfile = os.path.join(extra_files_path, 'tree.pdf')
99a67ac88802 Uploaded
rico
parents:
diff changeset
62
99a67ac88802 Uploaded
rico
parents:
diff changeset
63 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
64
99a67ac88802 Uploaded
rico
parents:
diff changeset
65 informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt')
99a67ac88802 Uploaded
rico
parents:
diff changeset
66 mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt')
99a67ac88802 Uploaded
rico
parents:
diff changeset
67
99a67ac88802 Uploaded
rico
parents:
diff changeset
68 prog = 'dist_mat'
99a67ac88802 Uploaded
rico
parents:
diff changeset
69
99a67ac88802 Uploaded
rico
parents:
diff changeset
70 args = []
99a67ac88802 Uploaded
rico
parents:
diff changeset
71 args.append(prog)
99a67ac88802 Uploaded
rico
parents:
diff changeset
72 args.append(input)
99a67ac88802 Uploaded
rico
parents:
diff changeset
73 args.append(minimum_coverage)
99a67ac88802 Uploaded
rico
parents:
diff changeset
74 args.append(minimum_quality)
99a67ac88802 Uploaded
rico
parents:
diff changeset
75 args.append(dbkey)
99a67ac88802 Uploaded
rico
parents:
diff changeset
76 args.append(data_source)
99a67ac88802 Uploaded
rico
parents:
diff changeset
77 args.append(informative_snp_file)
99a67ac88802 Uploaded
rico
parents:
diff changeset
78 args.append(mega_distance_matrix_file)
99a67ac88802 Uploaded
rico
parents:
diff changeset
79
99a67ac88802 Uploaded
rico
parents:
diff changeset
80 if p1_input == "all_individuals":
99a67ac88802 Uploaded
rico
parents:
diff changeset
81 tags = p_total.tag_list()
99a67ac88802 Uploaded
rico
parents:
diff changeset
82 else:
99a67ac88802 Uploaded
rico
parents:
diff changeset
83 p1 = Population()
99a67ac88802 Uploaded
rico
parents:
diff changeset
84 p1.from_population_file(p1_input)
99a67ac88802 Uploaded
rico
parents:
diff changeset
85 if not p_total.is_superset(p1):
99a67ac88802 Uploaded
rico
parents:
diff changeset
86 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
99a67ac88802 Uploaded
rico
parents:
diff changeset
87 sys.exit(1)
99a67ac88802 Uploaded
rico
parents:
diff changeset
88 tags = p1.tag_list()
99a67ac88802 Uploaded
rico
parents:
diff changeset
89
99a67ac88802 Uploaded
rico
parents:
diff changeset
90 for tag in tags:
99a67ac88802 Uploaded
rico
parents:
diff changeset
91 args.append(tag)
99a67ac88802 Uploaded
rico
parents:
diff changeset
92
99a67ac88802 Uploaded
rico
parents:
diff changeset
93 fh = open(phylip_outfile, 'w')
99a67ac88802 Uploaded
rico
parents:
diff changeset
94 run_program(None, args, fh)
99a67ac88802 Uploaded
rico
parents:
diff changeset
95
99a67ac88802 Uploaded
rico
parents:
diff changeset
96 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
97
99a67ac88802 Uploaded
rico
parents:
diff changeset
98 prog = 'quicktree'
99a67ac88802 Uploaded
rico
parents:
diff changeset
99
99a67ac88802 Uploaded
rico
parents:
diff changeset
100 args = []
99a67ac88802 Uploaded
rico
parents:
diff changeset
101 args.append(prog)
99a67ac88802 Uploaded
rico
parents:
diff changeset
102 args.append('-in')
99a67ac88802 Uploaded
rico
parents:
diff changeset
103 args.append('m')
99a67ac88802 Uploaded
rico
parents:
diff changeset
104 args.append('-out')
99a67ac88802 Uploaded
rico
parents:
diff changeset
105 args.append('t')
99a67ac88802 Uploaded
rico
parents:
diff changeset
106 args.append(phylip_outfile)
99a67ac88802 Uploaded
rico
parents:
diff changeset
107
99a67ac88802 Uploaded
rico
parents:
diff changeset
108 fh = open(newick_outfile, 'w')
99a67ac88802 Uploaded
rico
parents:
diff changeset
109 run_program(None, args, fh)
99a67ac88802 Uploaded
rico
parents:
diff changeset
110
99a67ac88802 Uploaded
rico
parents:
diff changeset
111 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
112
99a67ac88802 Uploaded
rico
parents:
diff changeset
113 prog = 'draw_tree'
99a67ac88802 Uploaded
rico
parents:
diff changeset
114
99a67ac88802 Uploaded
rico
parents:
diff changeset
115 args = []
99a67ac88802 Uploaded
rico
parents:
diff changeset
116 args.append(prog)
99a67ac88802 Uploaded
rico
parents:
diff changeset
117 if draw_tree_options:
99a67ac88802 Uploaded
rico
parents:
diff changeset
118 args.append(draw_tree_options)
99a67ac88802 Uploaded
rico
parents:
diff changeset
119 args.append(newick_outfile)
99a67ac88802 Uploaded
rico
parents:
diff changeset
120
99a67ac88802 Uploaded
rico
parents:
diff changeset
121 fh = open(ps_outfile, 'w')
99a67ac88802 Uploaded
rico
parents:
diff changeset
122 run_program(None, args, fh)
99a67ac88802 Uploaded
rico
parents:
diff changeset
123
99a67ac88802 Uploaded
rico
parents:
diff changeset
124 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
125
99a67ac88802 Uploaded
rico
parents:
diff changeset
126 prog = 'ps2pdf'
99a67ac88802 Uploaded
rico
parents:
diff changeset
127
99a67ac88802 Uploaded
rico
parents:
diff changeset
128 args = []
99a67ac88802 Uploaded
rico
parents:
diff changeset
129 args.append(prog)
99a67ac88802 Uploaded
rico
parents:
diff changeset
130 args.append('-dPDFSETTINGS=/prepress')
99a67ac88802 Uploaded
rico
parents:
diff changeset
131 args.append(ps_outfile)
99a67ac88802 Uploaded
rico
parents:
diff changeset
132 args.append('-')
99a67ac88802 Uploaded
rico
parents:
diff changeset
133
99a67ac88802 Uploaded
rico
parents:
diff changeset
134 fh = open(pdf_outfile, 'w')
99a67ac88802 Uploaded
rico
parents:
diff changeset
135 run_program(None, args, fh)
99a67ac88802 Uploaded
rico
parents:
diff changeset
136
99a67ac88802 Uploaded
rico
parents:
diff changeset
137 shutil.copyfile(pdf_outfile, output)
99a67ac88802 Uploaded
rico
parents:
diff changeset
138
99a67ac88802 Uploaded
rico
parents:
diff changeset
139 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
140
99a67ac88802 Uploaded
rico
parents:
diff changeset
141 info_page = gd_composite.InfoPage()
99a67ac88802 Uploaded
rico
parents:
diff changeset
142 info_page.set_title('Phylogenetic tree Galaxy Composite Dataset')
99a67ac88802 Uploaded
rico
parents:
diff changeset
143
99a67ac88802 Uploaded
rico
parents:
diff changeset
144 display_file = gd_composite.DisplayFile()
99a67ac88802 Uploaded
rico
parents:
diff changeset
145 display_value = gd_composite.DisplayValue()
99a67ac88802 Uploaded
rico
parents:
diff changeset
146
99a67ac88802 Uploaded
rico
parents:
diff changeset
147 out_pdf = gd_composite.Parameter(name='tree.pdf', value='tree.pdf', display_type=display_file)
99a67ac88802 Uploaded
rico
parents:
diff changeset
148 out_newick = gd_composite.Parameter(value='phylogenetic_tree.newick', name='phylogenetic tree (newick)', display_type=display_file)
99a67ac88802 Uploaded
rico
parents:
diff changeset
149 out_phylip = gd_composite.Parameter(value='distance_matrix.phylip', name='Phylip distance matrix', display_type=display_file)
99a67ac88802 Uploaded
rico
parents:
diff changeset
150 out_mega = gd_composite.Parameter(value='mega_distance_matrix.txt', name='Mega distance matrix', display_type=display_file)
99a67ac88802 Uploaded
rico
parents:
diff changeset
151 out_snps = gd_composite.Parameter(value='informative_snps.txt', name='informative SNPs', display_type=display_file)
99a67ac88802 Uploaded
rico
parents:
diff changeset
152
99a67ac88802 Uploaded
rico
parents:
diff changeset
153 info_page.add_output_parameter(out_pdf)
99a67ac88802 Uploaded
rico
parents:
diff changeset
154 info_page.add_output_parameter(out_newick)
99a67ac88802 Uploaded
rico
parents:
diff changeset
155 info_page.add_output_parameter(out_phylip)
99a67ac88802 Uploaded
rico
parents:
diff changeset
156 info_page.add_output_parameter(out_mega)
99a67ac88802 Uploaded
rico
parents:
diff changeset
157 info_page.add_output_parameter(out_snps)
99a67ac88802 Uploaded
rico
parents:
diff changeset
158
99a67ac88802 Uploaded
rico
parents:
diff changeset
159 in_min_cov = gd_composite.Parameter(description='Minimum coverage', value=minimum_coverage, display_type=display_value)
99a67ac88802 Uploaded
rico
parents:
diff changeset
160 in_min_qual = gd_composite.Parameter(description='Minimum quality', value=minimum_quality, display_type=display_value)
99a67ac88802 Uploaded
rico
parents:
diff changeset
161
99a67ac88802 Uploaded
rico
parents:
diff changeset
162 include_ref_value = 'no'
99a67ac88802 Uploaded
rico
parents:
diff changeset
163 if dbkey != 'none':
99a67ac88802 Uploaded
rico
parents:
diff changeset
164 include_ref_value = 'yes'
99a67ac88802 Uploaded
rico
parents:
diff changeset
165
99a67ac88802 Uploaded
rico
parents:
diff changeset
166 in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value)
99a67ac88802 Uploaded
rico
parents:
diff changeset
167
99a67ac88802 Uploaded
rico
parents:
diff changeset
168 if data_source == '0':
99a67ac88802 Uploaded
rico
parents:
diff changeset
169 data_source_value = 'sequence coverage'
99a67ac88802 Uploaded
rico
parents:
diff changeset
170 elif data_source == '1':
99a67ac88802 Uploaded
rico
parents:
diff changeset
171 data_source_value = 'estimated genotype'
99a67ac88802 Uploaded
rico
parents:
diff changeset
172
99a67ac88802 Uploaded
rico
parents:
diff changeset
173 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
99a67ac88802 Uploaded
rico
parents:
diff changeset
174
99a67ac88802 Uploaded
rico
parents:
diff changeset
175 branch_type_value = 'square'
99a67ac88802 Uploaded
rico
parents:
diff changeset
176 if 'd' in draw_tree_options:
99a67ac88802 Uploaded
rico
parents:
diff changeset
177 branch_type_value = 'diagonal'
99a67ac88802 Uploaded
rico
parents:
diff changeset
178
99a67ac88802 Uploaded
rico
parents:
diff changeset
179 in_branch_type = gd_composite.Parameter(description='Branch type', value=branch_type_value, display_type=display_value)
99a67ac88802 Uploaded
rico
parents:
diff changeset
180
99a67ac88802 Uploaded
rico
parents:
diff changeset
181 branch_scale_value = 'yes'
99a67ac88802 Uploaded
rico
parents:
diff changeset
182 if 's' in draw_tree_options:
99a67ac88802 Uploaded
rico
parents:
diff changeset
183 branch_scale_value = 'no'
99a67ac88802 Uploaded
rico
parents:
diff changeset
184
99a67ac88802 Uploaded
rico
parents:
diff changeset
185 in_branch_scale = gd_composite.Parameter(description='Draw branches to scale', value=branch_scale_value, display_type=display_value)
99a67ac88802 Uploaded
rico
parents:
diff changeset
186
99a67ac88802 Uploaded
rico
parents:
diff changeset
187 branch_length_value = 'yes'
99a67ac88802 Uploaded
rico
parents:
diff changeset
188 if 'b' in draw_tree_options:
99a67ac88802 Uploaded
rico
parents:
diff changeset
189 branch_length_value = 'no'
99a67ac88802 Uploaded
rico
parents:
diff changeset
190
99a67ac88802 Uploaded
rico
parents:
diff changeset
191 in_branch_length = gd_composite.Parameter(description='Show branch lengths', value=branch_length_value, display_type=display_value)
99a67ac88802 Uploaded
rico
parents:
diff changeset
192
99a67ac88802 Uploaded
rico
parents:
diff changeset
193 tree_layout_value = 'horizontal'
99a67ac88802 Uploaded
rico
parents:
diff changeset
194 if 'v' in draw_tree_options:
99a67ac88802 Uploaded
rico
parents:
diff changeset
195 tree_layout_value = 'vertical'
99a67ac88802 Uploaded
rico
parents:
diff changeset
196
99a67ac88802 Uploaded
rico
parents:
diff changeset
197 in_tree_layout = gd_composite.Parameter(description='Tree layout', value=tree_layout_value, display_type=display_value)
99a67ac88802 Uploaded
rico
parents:
diff changeset
198
99a67ac88802 Uploaded
rico
parents:
diff changeset
199 info_page.add_input_parameter(in_min_cov)
99a67ac88802 Uploaded
rico
parents:
diff changeset
200 info_page.add_input_parameter(in_min_qual)
99a67ac88802 Uploaded
rico
parents:
diff changeset
201 info_page.add_input_parameter(in_include_ref)
99a67ac88802 Uploaded
rico
parents:
diff changeset
202 info_page.add_input_parameter(in_data_source)
99a67ac88802 Uploaded
rico
parents:
diff changeset
203 info_page.add_input_parameter(in_branch_type)
99a67ac88802 Uploaded
rico
parents:
diff changeset
204 info_page.add_input_parameter(in_branch_scale)
99a67ac88802 Uploaded
rico
parents:
diff changeset
205 info_page.add_input_parameter(in_branch_length)
99a67ac88802 Uploaded
rico
parents:
diff changeset
206 info_page.add_input_parameter(in_tree_layout)
99a67ac88802 Uploaded
rico
parents:
diff changeset
207
99a67ac88802 Uploaded
rico
parents:
diff changeset
208 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
99a67ac88802 Uploaded
rico
parents:
diff changeset
209
99a67ac88802 Uploaded
rico
parents:
diff changeset
210 info_page.add_misc(misc_individuals)
99a67ac88802 Uploaded
rico
parents:
diff changeset
211
99a67ac88802 Uploaded
rico
parents:
diff changeset
212
99a67ac88802 Uploaded
rico
parents:
diff changeset
213 with open(output, 'w') as ofh:
99a67ac88802 Uploaded
rico
parents:
diff changeset
214 print >> ofh, info_page.render()
99a67ac88802 Uploaded
rico
parents:
diff changeset
215
99a67ac88802 Uploaded
rico
parents:
diff changeset
216 ################################################################################
99a67ac88802 Uploaded
rico
parents:
diff changeset
217
99a67ac88802 Uploaded
rico
parents:
diff changeset
218 sys.exit(0)
99a67ac88802 Uploaded
rico
parents:
diff changeset
219