annotate dpmix.py @ 0:32f8fbdd754c default tip

Uploaded
author rico
date Fri, 06 Apr 2012 13:59:29 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
32f8fbdd754c Uploaded
rico
parents:
diff changeset
1 #!/usr/bin/env python
32f8fbdd754c Uploaded
rico
parents:
diff changeset
2
32f8fbdd754c Uploaded
rico
parents:
diff changeset
3 import errno
32f8fbdd754c Uploaded
rico
parents:
diff changeset
4 import sys
32f8fbdd754c Uploaded
rico
parents:
diff changeset
5 import os
32f8fbdd754c Uploaded
rico
parents:
diff changeset
6 import subprocess
32f8fbdd754c Uploaded
rico
parents:
diff changeset
7 from Population import Population
32f8fbdd754c Uploaded
rico
parents:
diff changeset
8 import gd_composite
32f8fbdd754c Uploaded
rico
parents:
diff changeset
9 from dpmix_plot import make_dpmix_plot
32f8fbdd754c Uploaded
rico
parents:
diff changeset
10
32f8fbdd754c Uploaded
rico
parents:
diff changeset
11 ################################################################################
32f8fbdd754c Uploaded
rico
parents:
diff changeset
12
32f8fbdd754c Uploaded
rico
parents:
diff changeset
13 def mkdir_p(path):
32f8fbdd754c Uploaded
rico
parents:
diff changeset
14 try:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
15 os.makedirs(path)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
16 except OSError, e:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
17 if e.errno <> errno.EEXIST:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
18 raise
32f8fbdd754c Uploaded
rico
parents:
diff changeset
19
32f8fbdd754c Uploaded
rico
parents:
diff changeset
20 def run_program(prog, args, stdout_file=None, space_to_tab=False):
32f8fbdd754c Uploaded
rico
parents:
diff changeset
21 #print "args: ", ' '.join(args)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
22 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
23 (stdoutdata, stderrdata) = p.communicate()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
24 rc = p.returncode
32f8fbdd754c Uploaded
rico
parents:
diff changeset
25
32f8fbdd754c Uploaded
rico
parents:
diff changeset
26 if stdout_file is not None:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
27 with open(stdout_file, 'w') as ofh:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
28 lines = stdoutdata.split('\n')
32f8fbdd754c Uploaded
rico
parents:
diff changeset
29 for line in lines:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
30 line = line.strip()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
31 if line:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
32 if space_to_tab:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
33 line = line.replace(' ', '\t')
32f8fbdd754c Uploaded
rico
parents:
diff changeset
34 print >> ofh, line
32f8fbdd754c Uploaded
rico
parents:
diff changeset
35
32f8fbdd754c Uploaded
rico
parents:
diff changeset
36 if rc != 0:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
37 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
32f8fbdd754c Uploaded
rico
parents:
diff changeset
38 print >> sys.stderr, stderrdata
32f8fbdd754c Uploaded
rico
parents:
diff changeset
39 sys.exit(1)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
40
32f8fbdd754c Uploaded
rico
parents:
diff changeset
41 ################################################################################
32f8fbdd754c Uploaded
rico
parents:
diff changeset
42
32f8fbdd754c Uploaded
rico
parents:
diff changeset
43 if len(sys.argv) < 14:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
44 print "usage"
32f8fbdd754c Uploaded
rico
parents:
diff changeset
45 sys.exit(1)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
46
32f8fbdd754c Uploaded
rico
parents:
diff changeset
47 input, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir = sys.argv[1:13]
32f8fbdd754c Uploaded
rico
parents:
diff changeset
48 individual_metadata = sys.argv[13:]
32f8fbdd754c Uploaded
rico
parents:
diff changeset
49
32f8fbdd754c Uploaded
rico
parents:
diff changeset
50 chrom = 'all'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
51 analyze_additional = '0'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
52 add_logs = '0'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
53
32f8fbdd754c Uploaded
rico
parents:
diff changeset
54 population_list = []
32f8fbdd754c Uploaded
rico
parents:
diff changeset
55
32f8fbdd754c Uploaded
rico
parents:
diff changeset
56 p_total = Population()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
57 p_total.from_tag_list(individual_metadata)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
58
32f8fbdd754c Uploaded
rico
parents:
diff changeset
59 ap1 = Population(name='Ancestral population 1')
32f8fbdd754c Uploaded
rico
parents:
diff changeset
60 ap1.from_population_file(ap1_input)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
61 population_list.append(ap1)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
62 if not p_total.is_superset(ap1):
32f8fbdd754c Uploaded
rico
parents:
diff changeset
63 print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
64 sys.exit(1)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
65
32f8fbdd754c Uploaded
rico
parents:
diff changeset
66 ap2 = Population(name='Ancestral population 2')
32f8fbdd754c Uploaded
rico
parents:
diff changeset
67 ap2.from_population_file(ap2_input)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
68 population_list.append(ap2)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
69 if not p_total.is_superset(ap2):
32f8fbdd754c Uploaded
rico
parents:
diff changeset
70 print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
71 sys.exit(1)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
72
32f8fbdd754c Uploaded
rico
parents:
diff changeset
73 p = Population(name='Potentially admixed')
32f8fbdd754c Uploaded
rico
parents:
diff changeset
74 p.from_population_file(p_input)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
75 population_list.append(p)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
76 if not p_total.is_superset(p):
32f8fbdd754c Uploaded
rico
parents:
diff changeset
77 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
78 sys.exit(1)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
79
32f8fbdd754c Uploaded
rico
parents:
diff changeset
80 mkdir_p(output2_dir)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
81
32f8fbdd754c Uploaded
rico
parents:
diff changeset
82 ################################################################################
32f8fbdd754c Uploaded
rico
parents:
diff changeset
83 # Create tabular file
32f8fbdd754c Uploaded
rico
parents:
diff changeset
84 ################################################################################
32f8fbdd754c Uploaded
rico
parents:
diff changeset
85
32f8fbdd754c Uploaded
rico
parents:
diff changeset
86 misc_file = os.path.join(output2_dir, 'misc.txt')
32f8fbdd754c Uploaded
rico
parents:
diff changeset
87
32f8fbdd754c Uploaded
rico
parents:
diff changeset
88 prog = 'dpmix'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
89 args = [ prog ]
32f8fbdd754c Uploaded
rico
parents:
diff changeset
90 args.append(input)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
91 args.append(ref_column)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
92 args.append(chrom)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
93 args.append(data_source)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
94 args.append(add_logs)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
95 args.append(switch_penalty)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
96 args.append(analyze_additional)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
97 args.append(misc_file)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
98
32f8fbdd754c Uploaded
rico
parents:
diff changeset
99 columns = ap1.column_list()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
100 for column in columns:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
101 args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name))
32f8fbdd754c Uploaded
rico
parents:
diff changeset
102
32f8fbdd754c Uploaded
rico
parents:
diff changeset
103 columns = ap2.column_list()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
104 for column in columns:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
105 args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name))
32f8fbdd754c Uploaded
rico
parents:
diff changeset
106
32f8fbdd754c Uploaded
rico
parents:
diff changeset
107 columns = p.column_list()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
108 for column in columns:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
109 args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name))
32f8fbdd754c Uploaded
rico
parents:
diff changeset
110
32f8fbdd754c Uploaded
rico
parents:
diff changeset
111 run_program(None, args, stdout_file=output, space_to_tab=True)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
112
32f8fbdd754c Uploaded
rico
parents:
diff changeset
113 ################################################################################
32f8fbdd754c Uploaded
rico
parents:
diff changeset
114 # Create pdf file
32f8fbdd754c Uploaded
rico
parents:
diff changeset
115 ################################################################################
32f8fbdd754c Uploaded
rico
parents:
diff changeset
116
32f8fbdd754c Uploaded
rico
parents:
diff changeset
117 pdf_file = os.path.join(output2_dir, 'dpmix.pdf')
32f8fbdd754c Uploaded
rico
parents:
diff changeset
118 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
119
32f8fbdd754c Uploaded
rico
parents:
diff changeset
120 ################################################################################
32f8fbdd754c Uploaded
rico
parents:
diff changeset
121 # Create html
32f8fbdd754c Uploaded
rico
parents:
diff changeset
122 ################################################################################
32f8fbdd754c Uploaded
rico
parents:
diff changeset
123
32f8fbdd754c Uploaded
rico
parents:
diff changeset
124 info_page = gd_composite.InfoPage()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
125 info_page.set_title('dpmix Galaxy Composite Dataset')
32f8fbdd754c Uploaded
rico
parents:
diff changeset
126
32f8fbdd754c Uploaded
rico
parents:
diff changeset
127 display_file = gd_composite.DisplayFile()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
128 display_value = gd_composite.DisplayValue()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
129
32f8fbdd754c Uploaded
rico
parents:
diff changeset
130 out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
131 out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
132
32f8fbdd754c Uploaded
rico
parents:
diff changeset
133 info_page.add_output_parameter(out_pdf)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
134 info_page.add_output_parameter(out_misc)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
135
32f8fbdd754c Uploaded
rico
parents:
diff changeset
136 if data_source == '0':
32f8fbdd754c Uploaded
rico
parents:
diff changeset
137 data_source_value = 'sequence coverage'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
138 elif data_source == '1':
32f8fbdd754c Uploaded
rico
parents:
diff changeset
139 data_source_value = 'estimated genotype'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
140
32f8fbdd754c Uploaded
rico
parents:
diff changeset
141 if analyze_additional == '0':
32f8fbdd754c Uploaded
rico
parents:
diff changeset
142 analyze_additional_value = 'no'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
143 elif analyze_additional == '1':
32f8fbdd754c Uploaded
rico
parents:
diff changeset
144 analyze_additional_value = 'yes'
32f8fbdd754c Uploaded
rico
parents:
diff changeset
145
32f8fbdd754c Uploaded
rico
parents:
diff changeset
146 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
147 in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
148 in_analyze_additional = gd_composite.Parameter(description='Also analyze random chromosome', value=analyze_additional_value, display_type=display_value)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
149
32f8fbdd754c Uploaded
rico
parents:
diff changeset
150 info_page.add_input_parameter(in_data_source)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
151 info_page.add_input_parameter(in_switch_penalty)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
152 info_page.add_input_parameter(in_analyze_additional)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
153
32f8fbdd754c Uploaded
rico
parents:
diff changeset
154 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
32f8fbdd754c Uploaded
rico
parents:
diff changeset
155
32f8fbdd754c Uploaded
rico
parents:
diff changeset
156 info_page.add_misc(misc_populations)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
157
32f8fbdd754c Uploaded
rico
parents:
diff changeset
158 with open(output2, 'w') as ofh:
32f8fbdd754c Uploaded
rico
parents:
diff changeset
159 print >> ofh, info_page.render()
32f8fbdd754c Uploaded
rico
parents:
diff changeset
160
32f8fbdd754c Uploaded
rico
parents:
diff changeset
161 sys.exit(0)
32f8fbdd754c Uploaded
rico
parents:
diff changeset
162
32f8fbdd754c Uploaded
rico
parents:
diff changeset
163