Mercurial > repos > rico > testing_again
comparison dpmix.py @ 0:4d28d3295ac3 default tip
Uploaded
| author | rico |
|---|---|
| date | Fri, 06 Apr 2012 13:46:42 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4d28d3295ac3 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import errno | |
| 4 import sys | |
| 5 import os | |
| 6 import subprocess | |
| 7 from Population import Population | |
| 8 import gd_composite | |
| 9 from dpmix_plot import make_dpmix_plot | |
| 10 | |
| 11 ################################################################################ | |
| 12 | |
| 13 def mkdir_p(path): | |
| 14 try: | |
| 15 os.makedirs(path) | |
| 16 except OSError, e: | |
| 17 if e.errno <> errno.EEXIST: | |
| 18 raise | |
| 19 | |
| 20 def run_program(prog, args, stdout_file=None, space_to_tab=False): | |
| 21 #print "args: ", ' '.join(args) | |
| 22 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 23 (stdoutdata, stderrdata) = p.communicate() | |
| 24 rc = p.returncode | |
| 25 | |
| 26 if stdout_file is not None: | |
| 27 with open(stdout_file, 'w') as ofh: | |
| 28 lines = stdoutdata.split('\n') | |
| 29 for line in lines: | |
| 30 line = line.strip() | |
| 31 if line: | |
| 32 if space_to_tab: | |
| 33 line = line.replace(' ', '\t') | |
| 34 print >> ofh, line | |
| 35 | |
| 36 if rc != 0: | |
| 37 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) | |
| 38 print >> sys.stderr, stderrdata | |
| 39 sys.exit(1) | |
| 40 | |
| 41 ################################################################################ | |
| 42 | |
| 43 if len(sys.argv) < 14: | |
| 44 print "usage" | |
| 45 sys.exit(1) | |
| 46 | |
| 47 input, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir = sys.argv[1:13] | |
| 48 individual_metadata = sys.argv[13:] | |
| 49 | |
| 50 chrom = 'all' | |
| 51 analyze_additional = '0' | |
| 52 add_logs = '0' | |
| 53 | |
| 54 population_list = [] | |
| 55 | |
| 56 p_total = Population() | |
| 57 p_total.from_tag_list(individual_metadata) | |
| 58 | |
| 59 ap1 = Population(name='Ancestral population 1') | |
| 60 ap1.from_population_file(ap1_input) | |
| 61 population_list.append(ap1) | |
| 62 if not p_total.is_superset(ap1): | |
| 63 print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table' | |
| 64 sys.exit(1) | |
| 65 | |
| 66 ap2 = Population(name='Ancestral population 2') | |
| 67 ap2.from_population_file(ap2_input) | |
| 68 population_list.append(ap2) | |
| 69 if not p_total.is_superset(ap2): | |
| 70 print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table' | |
| 71 sys.exit(1) | |
| 72 | |
| 73 p = Population(name='Potentially admixed') | |
| 74 p.from_population_file(p_input) | |
| 75 population_list.append(p) | |
| 76 if not p_total.is_superset(p): | |
| 77 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | |
| 78 sys.exit(1) | |
| 79 | |
| 80 mkdir_p(output2_dir) | |
| 81 | |
| 82 ################################################################################ | |
| 83 # Create tabular file | |
| 84 ################################################################################ | |
| 85 | |
| 86 misc_file = os.path.join(output2_dir, 'misc.txt') | |
| 87 | |
| 88 prog = 'dpmix' | |
| 89 args = [ prog ] | |
| 90 args.append(input) | |
| 91 args.append(ref_column) | |
| 92 args.append(chrom) | |
| 93 args.append(data_source) | |
| 94 args.append(add_logs) | |
| 95 args.append(switch_penalty) | |
| 96 args.append(analyze_additional) | |
| 97 args.append(misc_file) | |
| 98 | |
| 99 columns = ap1.column_list() | |
| 100 for column in columns: | |
| 101 args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name)) | |
| 102 | |
| 103 columns = ap2.column_list() | |
| 104 for column in columns: | |
| 105 args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name)) | |
| 106 | |
| 107 columns = p.column_list() | |
| 108 for column in columns: | |
| 109 args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name)) | |
| 110 | |
| 111 run_program(None, args, stdout_file=output, space_to_tab=True) | |
| 112 | |
| 113 ################################################################################ | |
| 114 # Create pdf file | |
| 115 ################################################################################ | |
| 116 | |
| 117 pdf_file = os.path.join(output2_dir, 'dpmix.pdf') | |
| 118 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir) | |
| 119 | |
| 120 ################################################################################ | |
| 121 # Create html | |
| 122 ################################################################################ | |
| 123 | |
| 124 info_page = gd_composite.InfoPage() | |
| 125 info_page.set_title('dpmix Galaxy Composite Dataset') | |
| 126 | |
| 127 display_file = gd_composite.DisplayFile() | |
| 128 display_value = gd_composite.DisplayValue() | |
| 129 | |
| 130 out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file) | |
| 131 out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file) | |
| 132 | |
| 133 info_page.add_output_parameter(out_pdf) | |
| 134 info_page.add_output_parameter(out_misc) | |
| 135 | |
| 136 if data_source == '0': | |
| 137 data_source_value = 'sequence coverage' | |
| 138 elif data_source == '1': | |
| 139 data_source_value = 'estimated genotype' | |
| 140 | |
| 141 if analyze_additional == '0': | |
| 142 analyze_additional_value = 'no' | |
| 143 elif analyze_additional == '1': | |
| 144 analyze_additional_value = 'yes' | |
| 145 | |
| 146 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
| 147 in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value) | |
| 148 in_analyze_additional = gd_composite.Parameter(description='Also analyze random chromosome', value=analyze_additional_value, display_type=display_value) | |
| 149 | |
| 150 info_page.add_input_parameter(in_data_source) | |
| 151 info_page.add_input_parameter(in_switch_penalty) | |
| 152 info_page.add_input_parameter(in_analyze_additional) | |
| 153 | |
| 154 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
| 155 | |
| 156 info_page.add_misc(misc_populations) | |
| 157 | |
| 158 with open(output2, 'w') as ofh: | |
| 159 print >> ofh, info_page.render() | |
| 160 | |
| 161 sys.exit(0) | |
| 162 | |
| 163 |
