annotate coverage_distributions.py @ 0:580da578c5e6 default tip

Uploaded
author rico
date Thu, 05 Apr 2012 15:56:36 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
580da578c5e6 Uploaded
rico
parents:
diff changeset
1 #!/usr/bin/env python
580da578c5e6 Uploaded
rico
parents:
diff changeset
2
580da578c5e6 Uploaded
rico
parents:
diff changeset
3 import os
580da578c5e6 Uploaded
rico
parents:
diff changeset
4 import errno
580da578c5e6 Uploaded
rico
parents:
diff changeset
5 import sys
580da578c5e6 Uploaded
rico
parents:
diff changeset
6 import shutil
580da578c5e6 Uploaded
rico
parents:
diff changeset
7 import subprocess
580da578c5e6 Uploaded
rico
parents:
diff changeset
8 from Population import Population
580da578c5e6 Uploaded
rico
parents:
diff changeset
9 import gd_composite
580da578c5e6 Uploaded
rico
parents:
diff changeset
10
580da578c5e6 Uploaded
rico
parents:
diff changeset
11 ################################################################################
580da578c5e6 Uploaded
rico
parents:
diff changeset
12
580da578c5e6 Uploaded
rico
parents:
diff changeset
13 def mkdir_p(path):
580da578c5e6 Uploaded
rico
parents:
diff changeset
14 try:
580da578c5e6 Uploaded
rico
parents:
diff changeset
15 os.makedirs(path)
580da578c5e6 Uploaded
rico
parents:
diff changeset
16 except OSError, e:
580da578c5e6 Uploaded
rico
parents:
diff changeset
17 if e.errno <> errno.EEXIST:
580da578c5e6 Uploaded
rico
parents:
diff changeset
18 raise
580da578c5e6 Uploaded
rico
parents:
diff changeset
19
580da578c5e6 Uploaded
rico
parents:
diff changeset
20 ################################################################################
580da578c5e6 Uploaded
rico
parents:
diff changeset
21
580da578c5e6 Uploaded
rico
parents:
diff changeset
22 if len(sys.argv) < 7:
580da578c5e6 Uploaded
rico
parents:
diff changeset
23 print >> sys.stderr, "Usage"
580da578c5e6 Uploaded
rico
parents:
diff changeset
24 sys.exit(1)
580da578c5e6 Uploaded
rico
parents:
diff changeset
25
580da578c5e6 Uploaded
rico
parents:
diff changeset
26 input, data_source, output, extra_files_path = sys.argv[1:5]
580da578c5e6 Uploaded
rico
parents:
diff changeset
27
580da578c5e6 Uploaded
rico
parents:
diff changeset
28 individual_metadata = []
580da578c5e6 Uploaded
rico
parents:
diff changeset
29 population_info = []
580da578c5e6 Uploaded
rico
parents:
diff changeset
30 p1_input = None
580da578c5e6 Uploaded
rico
parents:
diff changeset
31 all_individuals = False
580da578c5e6 Uploaded
rico
parents:
diff changeset
32
580da578c5e6 Uploaded
rico
parents:
diff changeset
33 for arg in sys.argv[5:]:
580da578c5e6 Uploaded
rico
parents:
diff changeset
34 if arg == 'all_individuals':
580da578c5e6 Uploaded
rico
parents:
diff changeset
35 all_individuals = True
580da578c5e6 Uploaded
rico
parents:
diff changeset
36 elif len(arg) > 12 and arg[:12] == 'individuals:':
580da578c5e6 Uploaded
rico
parents:
diff changeset
37 p1_input = arg[12:]
580da578c5e6 Uploaded
rico
parents:
diff changeset
38 elif len(arg) > 11:
580da578c5e6 Uploaded
rico
parents:
diff changeset
39 if arg[:11] == 'population:':
580da578c5e6 Uploaded
rico
parents:
diff changeset
40 file, name = arg[11:].split(':', 1)
580da578c5e6 Uploaded
rico
parents:
diff changeset
41 population_info.append((file, name))
580da578c5e6 Uploaded
rico
parents:
diff changeset
42 elif arg[:11] == 'individual:':
580da578c5e6 Uploaded
rico
parents:
diff changeset
43 individual_metadata.append(arg[11:])
580da578c5e6 Uploaded
rico
parents:
diff changeset
44
580da578c5e6 Uploaded
rico
parents:
diff changeset
45 p_total = Population()
580da578c5e6 Uploaded
rico
parents:
diff changeset
46 p_total.from_tag_list(individual_metadata)
580da578c5e6 Uploaded
rico
parents:
diff changeset
47
580da578c5e6 Uploaded
rico
parents:
diff changeset
48 ################################################################################
580da578c5e6 Uploaded
rico
parents:
diff changeset
49
580da578c5e6 Uploaded
rico
parents:
diff changeset
50 mkdir_p(extra_files_path)
580da578c5e6 Uploaded
rico
parents:
diff changeset
51
580da578c5e6 Uploaded
rico
parents:
diff changeset
52 ################################################################################
580da578c5e6 Uploaded
rico
parents:
diff changeset
53
580da578c5e6 Uploaded
rico
parents:
diff changeset
54 prog = 'coverage'
580da578c5e6 Uploaded
rico
parents:
diff changeset
55
580da578c5e6 Uploaded
rico
parents:
diff changeset
56 args = []
580da578c5e6 Uploaded
rico
parents:
diff changeset
57 args.append(prog)
580da578c5e6 Uploaded
rico
parents:
diff changeset
58 args.append(input)
580da578c5e6 Uploaded
rico
parents:
diff changeset
59 args.append(data_source)
580da578c5e6 Uploaded
rico
parents:
diff changeset
60
580da578c5e6 Uploaded
rico
parents:
diff changeset
61 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt')
580da578c5e6 Uploaded
rico
parents:
diff changeset
62 args.append(user_coverage_file)
580da578c5e6 Uploaded
rico
parents:
diff changeset
63
580da578c5e6 Uploaded
rico
parents:
diff changeset
64 population_list = []
580da578c5e6 Uploaded
rico
parents:
diff changeset
65
580da578c5e6 Uploaded
rico
parents:
diff changeset
66 if all_individuals:
580da578c5e6 Uploaded
rico
parents:
diff changeset
67 tags = p_total.tag_list()
580da578c5e6 Uploaded
rico
parents:
diff changeset
68 elif p1_input is not None:
580da578c5e6 Uploaded
rico
parents:
diff changeset
69 p1 = Population()
580da578c5e6 Uploaded
rico
parents:
diff changeset
70 this_pop = Population()
580da578c5e6 Uploaded
rico
parents:
diff changeset
71 this_pop.from_population_file(p1_input)
580da578c5e6 Uploaded
rico
parents:
diff changeset
72 population_list.append(this_pop)
580da578c5e6 Uploaded
rico
parents:
diff changeset
73 p1.from_population_file(p1_input)
580da578c5e6 Uploaded
rico
parents:
diff changeset
74 if not p_total.is_superset(p1):
580da578c5e6 Uploaded
rico
parents:
diff changeset
75 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
580da578c5e6 Uploaded
rico
parents:
diff changeset
76 sys.exit(1)
580da578c5e6 Uploaded
rico
parents:
diff changeset
77 tags = p1.tag_list()
580da578c5e6 Uploaded
rico
parents:
diff changeset
78 else:
580da578c5e6 Uploaded
rico
parents:
diff changeset
79 tags = []
580da578c5e6 Uploaded
rico
parents:
diff changeset
80 for population_file, population_name in population_info:
580da578c5e6 Uploaded
rico
parents:
diff changeset
81 population = Population()
580da578c5e6 Uploaded
rico
parents:
diff changeset
82 this_pop = Population()
580da578c5e6 Uploaded
rico
parents:
diff changeset
83 this_pop.from_population_file(population_file)
580da578c5e6 Uploaded
rico
parents:
diff changeset
84 population_list.append(this_pop)
580da578c5e6 Uploaded
rico
parents:
diff changeset
85 population.from_population_file(population_file)
580da578c5e6 Uploaded
rico
parents:
diff changeset
86 if not p_total.is_superset(population):
580da578c5e6 Uploaded
rico
parents:
diff changeset
87 print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name)
580da578c5e6 Uploaded
rico
parents:
diff changeset
88 sys.exit(1)
580da578c5e6 Uploaded
rico
parents:
diff changeset
89 columns = population.column_list()
580da578c5e6 Uploaded
rico
parents:
diff changeset
90 for column in columns:
580da578c5e6 Uploaded
rico
parents:
diff changeset
91 tags.append('{0}:{1}'.format(column, population_name))
580da578c5e6 Uploaded
rico
parents:
diff changeset
92
580da578c5e6 Uploaded
rico
parents:
diff changeset
93 for tag in tags:
580da578c5e6 Uploaded
rico
parents:
diff changeset
94 args.append(tag)
580da578c5e6 Uploaded
rico
parents:
diff changeset
95
580da578c5e6 Uploaded
rico
parents:
diff changeset
96 ## text output
580da578c5e6 Uploaded
rico
parents:
diff changeset
97 coverage_file = 'coverage.txt'
580da578c5e6 Uploaded
rico
parents:
diff changeset
98 fh = open(coverage_file, 'w')
580da578c5e6 Uploaded
rico
parents:
diff changeset
99 #print "args:", ' '.join(args)
580da578c5e6 Uploaded
rico
parents:
diff changeset
100 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
580da578c5e6 Uploaded
rico
parents:
diff changeset
101 rc = p.wait()
580da578c5e6 Uploaded
rico
parents:
diff changeset
102 fh.close()
580da578c5e6 Uploaded
rico
parents:
diff changeset
103
580da578c5e6 Uploaded
rico
parents:
diff changeset
104 ## graphical output
580da578c5e6 Uploaded
rico
parents:
diff changeset
105 fh = open(coverage_file)
580da578c5e6 Uploaded
rico
parents:
diff changeset
106 coverage2_file = 'coverage2.txt'
580da578c5e6 Uploaded
rico
parents:
diff changeset
107 ofh = open(coverage2_file, 'w')
580da578c5e6 Uploaded
rico
parents:
diff changeset
108
580da578c5e6 Uploaded
rico
parents:
diff changeset
109 for line in fh:
580da578c5e6 Uploaded
rico
parents:
diff changeset
110 line = line.rstrip('\r\n')
580da578c5e6 Uploaded
rico
parents:
diff changeset
111 elems = line.split('\t')
580da578c5e6 Uploaded
rico
parents:
diff changeset
112 name = elems.pop(0)
580da578c5e6 Uploaded
rico
parents:
diff changeset
113 values = [ elems[0] ]
580da578c5e6 Uploaded
rico
parents:
diff changeset
114 for idx in range(1, len(elems)):
580da578c5e6 Uploaded
rico
parents:
diff changeset
115 val = str(float(elems[idx]) - float(elems[idx-1]))
580da578c5e6 Uploaded
rico
parents:
diff changeset
116 values.append(val)
580da578c5e6 Uploaded
rico
parents:
diff changeset
117 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
580da578c5e6 Uploaded
rico
parents:
diff changeset
118
580da578c5e6 Uploaded
rico
parents:
diff changeset
119 fh.close()
580da578c5e6 Uploaded
rico
parents:
diff changeset
120 ofh.close()
580da578c5e6 Uploaded
rico
parents:
diff changeset
121
580da578c5e6 Uploaded
rico
parents:
diff changeset
122 ################################################################################
580da578c5e6 Uploaded
rico
parents:
diff changeset
123
580da578c5e6 Uploaded
rico
parents:
diff changeset
124 prog = 'R'
580da578c5e6 Uploaded
rico
parents:
diff changeset
125
580da578c5e6 Uploaded
rico
parents:
diff changeset
126 args = []
580da578c5e6 Uploaded
rico
parents:
diff changeset
127 args.append(prog)
580da578c5e6 Uploaded
rico
parents:
diff changeset
128 args.append('--vanilla')
580da578c5e6 Uploaded
rico
parents:
diff changeset
129 args.append('--quiet')
580da578c5e6 Uploaded
rico
parents:
diff changeset
130
580da578c5e6 Uploaded
rico
parents:
diff changeset
131 _realpath = os.path.realpath(__file__)
580da578c5e6 Uploaded
rico
parents:
diff changeset
132 _script_dir = os.path.dirname(_realpath)
580da578c5e6 Uploaded
rico
parents:
diff changeset
133 r_script_file = os.path.join(_script_dir, 'coverage_plot.r')
580da578c5e6 Uploaded
rico
parents:
diff changeset
134
580da578c5e6 Uploaded
rico
parents:
diff changeset
135 ifh = open(r_script_file)
580da578c5e6 Uploaded
rico
parents:
diff changeset
136 ofh = open('/dev/null', 'w')
580da578c5e6 Uploaded
rico
parents:
diff changeset
137 #print "args:", ' '.join(args)
580da578c5e6 Uploaded
rico
parents:
diff changeset
138 p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
580da578c5e6 Uploaded
rico
parents:
diff changeset
139 rc = p.wait()
580da578c5e6 Uploaded
rico
parents:
diff changeset
140 ifh.close()
580da578c5e6 Uploaded
rico
parents:
diff changeset
141 ofh.close()
580da578c5e6 Uploaded
rico
parents:
diff changeset
142
580da578c5e6 Uploaded
rico
parents:
diff changeset
143 pdf_file = os.path.join(extra_files_path, 'coverage.pdf')
580da578c5e6 Uploaded
rico
parents:
diff changeset
144 shutil.copy2('coverage.pdf', pdf_file)
580da578c5e6 Uploaded
rico
parents:
diff changeset
145 os.remove('coverage.pdf')
580da578c5e6 Uploaded
rico
parents:
diff changeset
146 os.remove(coverage2_file)
580da578c5e6 Uploaded
rico
parents:
diff changeset
147
580da578c5e6 Uploaded
rico
parents:
diff changeset
148 ################################################################################
580da578c5e6 Uploaded
rico
parents:
diff changeset
149
580da578c5e6 Uploaded
rico
parents:
diff changeset
150 info_page = gd_composite.InfoPage()
580da578c5e6 Uploaded
rico
parents:
diff changeset
151 info_page.set_title('Coverage distributions Galaxy Composite Dataset')
580da578c5e6 Uploaded
rico
parents:
diff changeset
152
580da578c5e6 Uploaded
rico
parents:
diff changeset
153 display_file = gd_composite.DisplayFile()
580da578c5e6 Uploaded
rico
parents:
diff changeset
154 display_value = gd_composite.DisplayValue()
580da578c5e6 Uploaded
rico
parents:
diff changeset
155
580da578c5e6 Uploaded
rico
parents:
diff changeset
156 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file)
580da578c5e6 Uploaded
rico
parents:
diff changeset
157 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file)
580da578c5e6 Uploaded
rico
parents:
diff changeset
158
580da578c5e6 Uploaded
rico
parents:
diff changeset
159 info_page.add_output_parameter(out_pdf)
580da578c5e6 Uploaded
rico
parents:
diff changeset
160 info_page.add_output_parameter(out_txt)
580da578c5e6 Uploaded
rico
parents:
diff changeset
161
580da578c5e6 Uploaded
rico
parents:
diff changeset
162
580da578c5e6 Uploaded
rico
parents:
diff changeset
163 if data_source == '0':
580da578c5e6 Uploaded
rico
parents:
diff changeset
164 data_source_value = 'sequence coverage'
580da578c5e6 Uploaded
rico
parents:
diff changeset
165 elif data_source == '1':
580da578c5e6 Uploaded
rico
parents:
diff changeset
166 data_source_value = 'estimated genotype'
580da578c5e6 Uploaded
rico
parents:
diff changeset
167
580da578c5e6 Uploaded
rico
parents:
diff changeset
168 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
580da578c5e6 Uploaded
rico
parents:
diff changeset
169
580da578c5e6 Uploaded
rico
parents:
diff changeset
170 info_page.add_input_parameter(in_data_source)
580da578c5e6 Uploaded
rico
parents:
diff changeset
171
580da578c5e6 Uploaded
rico
parents:
diff changeset
172 if population_list:
580da578c5e6 Uploaded
rico
parents:
diff changeset
173 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
580da578c5e6 Uploaded
rico
parents:
diff changeset
174 info_page.add_misc(misc_populations)
580da578c5e6 Uploaded
rico
parents:
diff changeset
175 else:
580da578c5e6 Uploaded
rico
parents:
diff changeset
176 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
580da578c5e6 Uploaded
rico
parents:
diff changeset
177 info_page.add_misc(misc_individuals)
580da578c5e6 Uploaded
rico
parents:
diff changeset
178
580da578c5e6 Uploaded
rico
parents:
diff changeset
179
580da578c5e6 Uploaded
rico
parents:
diff changeset
180
580da578c5e6 Uploaded
rico
parents:
diff changeset
181
580da578c5e6 Uploaded
rico
parents:
diff changeset
182 with open (output, 'w') as ofh:
580da578c5e6 Uploaded
rico
parents:
diff changeset
183 print >> ofh, info_page.render()
580da578c5e6 Uploaded
rico
parents:
diff changeset
184
580da578c5e6 Uploaded
rico
parents:
diff changeset
185
580da578c5e6 Uploaded
rico
parents:
diff changeset
186 sys.exit(0)
580da578c5e6 Uploaded
rico
parents:
diff changeset
187