Mercurial > repos > rico > quote_test
comparison prepare_population_structure.py @ 0:939c20789501 default tip
Uploaded
| author | rico |
|---|---|
| date | Fri, 06 Apr 2012 10:51:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:939c20789501 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import errno | |
| 4 import os | |
| 5 import shutil | |
| 6 import subprocess | |
| 7 import sys | |
| 8 from Population import Population | |
| 9 import gd_composite | |
| 10 | |
| 11 ################################################################################ | |
| 12 | |
| 13 def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list): | |
| 14 info_page = gd_composite.InfoPage() | |
| 15 info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset') | |
| 16 | |
| 17 display_file = gd_composite.DisplayFile() | |
| 18 display_value = gd_composite.DisplayValue() | |
| 19 | |
| 20 out_ped = gd_composite.Parameter(name='admix.ped', value='admix.ped', display_type=display_file) | |
| 21 out_map = gd_composite.Parameter(name='admix.map', value='admix.map', display_type=display_file) | |
| 22 out_use = gd_composite.Parameter(description=using_info, display_type=display_value) | |
| 23 | |
| 24 info_page.add_output_parameter(out_ped) | |
| 25 info_page.add_output_parameter(out_map) | |
| 26 info_page.add_output_parameter(out_use) | |
| 27 | |
| 28 in_min_reads = gd_composite.Parameter(description='Minimum reads covering a SNP, per individual', value=min_reads, display_type=display_value) | |
| 29 in_min_qual = gd_composite.Parameter(description='Minimum quality value, per individual', value=min_qual, display_type=display_value) | |
| 30 in_min_spacing = gd_composite.Parameter(description='Minimum spacing between SNPs on the same scaffold', value=min_spacing, display_type=display_value) | |
| 31 | |
| 32 info_page.add_input_parameter(in_min_reads) | |
| 33 info_page.add_input_parameter(in_min_qual) | |
| 34 info_page.add_input_parameter(in_min_spacing) | |
| 35 | |
| 36 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
| 37 info_page.add_misc(misc_populations) | |
| 38 | |
| 39 with open(filename, 'w') as ofh: | |
| 40 print >> ofh, info_page.render() | |
| 41 | |
| 42 def mkdir_p(path): | |
| 43 try: | |
| 44 os.makedirs(path) | |
| 45 except OSError, e: | |
| 46 if e.errno <> errno.EEXIST: | |
| 47 raise | |
| 48 | |
| 49 def die(message, exit=True): | |
| 50 print >> sys.stderr, message | |
| 51 if exit: | |
| 52 sys.exit(1) | |
| 53 | |
| 54 ################################################################################ | |
| 55 | |
| 56 if len(sys.argv) < 9: | |
| 57 die("Usage") | |
| 58 | |
| 59 # parse command line | |
| 60 input_snp_filename, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:7] | |
| 61 args = sys.argv[7:] | |
| 62 | |
| 63 individual_metadata = [] | |
| 64 population_files = [] | |
| 65 population_names = [] | |
| 66 all_individuals = False | |
| 67 | |
| 68 for arg in args: | |
| 69 if arg == 'all_individuals': | |
| 70 all_individuals = True | |
| 71 elif len(arg) > 11: | |
| 72 tag = arg[:11] | |
| 73 value = arg[11:] | |
| 74 if tag == 'individual:': | |
| 75 individual_metadata.append(value) | |
| 76 elif tag == 'population:': | |
| 77 filename, name = value.split(':', 1) | |
| 78 population_files.append(filename) | |
| 79 population_names.append(name) | |
| 80 | |
| 81 p_total = Population() | |
| 82 p_total.from_tag_list(individual_metadata) | |
| 83 | |
| 84 individual_population = {} | |
| 85 | |
| 86 population_list = [] | |
| 87 | |
| 88 if all_individuals: | |
| 89 p1 = p_total | |
| 90 p1.name = 'All Individuals' | |
| 91 population_list.append(p1) | |
| 92 else: | |
| 93 p1 = Population() | |
| 94 for idx in range(len(population_files)): | |
| 95 population_file = population_files[idx] | |
| 96 population_name = population_names[idx] | |
| 97 this_pop = Population(population_name) | |
| 98 this_pop.from_population_file(population_file) | |
| 99 population_list.append(this_pop) | |
| 100 p1.from_population_file(population_file) | |
| 101 tags = p1.tag_list() | |
| 102 for tag in tags: | |
| 103 if tag not in individual_population: | |
| 104 individual_population[tag] = population_name | |
| 105 | |
| 106 if not p_total.is_superset(p1): | |
| 107 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | |
| 108 sys.exit(1) | |
| 109 | |
| 110 # run tool | |
| 111 prog = 'admix_prep' | |
| 112 | |
| 113 args = [] | |
| 114 args.append(prog) | |
| 115 args.append(input_snp_filename) | |
| 116 args.append(min_reads) | |
| 117 args.append(min_qual) | |
| 118 args.append(min_spacing) | |
| 119 | |
| 120 tags = p1.tag_list() | |
| 121 for tag in tags: | |
| 122 args.append(tag) | |
| 123 | |
| 124 #print "args:", ' '.join(args) | |
| 125 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) | |
| 126 (stdoutdata, stderrdata) = p.communicate() | |
| 127 rc = p.returncode | |
| 128 | |
| 129 if rc != 0: | |
| 130 die('admix_prep failed: rc={0}'.format(rc)) | |
| 131 | |
| 132 using_info = stdoutdata.rstrip('\r\n') | |
| 133 mkdir_p(output_files_path) | |
| 134 output_ped_filename = os.path.join(output_files_path, 'admix.ped') | |
| 135 output_map_filename = os.path.join(output_files_path, 'admix.map') | |
| 136 shutil.copy2('admix.ped', output_ped_filename) | |
| 137 shutil.copy2('admix.map', output_map_filename) | |
| 138 do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list) | |
| 139 | |
| 140 os.unlink('admix.ped') | |
| 141 os.unlink('admix.map') | |
| 142 | |
| 143 sys.exit(0) | |
| 144 |
