Mercurial > repos > rijst > snptools
comparison tablemerger.py @ 3:1f00946b18c2 draft default tip
Uploaded
| author | rijst |
|---|---|
| date | Wed, 12 Dec 2012 09:09:45 -0500 |
| parents | cc961e057668 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:7e46920d9664 | 3:1f00946b18c2 |
|---|---|
| 1 '''Takes tab-delimited SNP tables from user input and merges them into one.''' | |
| 2 | |
| 3 import sys | |
| 4 files = [] | |
| 5 filenames = [] | |
| 6 | |
| 7 try: | |
| 8 output = open(sys.argv[1], "w") | |
| 9 output.write('Position\tReference') | |
| 10 except: | |
| 11 exit("No output file given or unable to open output file.") | |
| 12 for name in sys.argv[2:]: | |
| 13 try: | |
| 14 files.append(open(name, "rU")) | |
| 15 except: | |
| 16 continue | |
| 17 | |
| 18 # Fetch headers and print them to output file; | |
| 19 headers = [header.readline()[:-1].split('\t')[2:] for header in files] | |
| 20 columns = [len(strains) for strains in headers] | |
| 21 for strain in [a for b in headers for a in b]: | |
| 22 output.write('\t'+strain) | |
| 23 output.flush() | |
| 24 | |
| 25 file_active = [True]*len(files) | |
| 26 snps = [row.readline()[:-1].split('\t') for row in files] | |
| 27 | |
| 28 while True in file_active: | |
| 29 for h in range(0,len(snps)): | |
| 30 if file_active[h]: | |
| 31 cur_pos = [h] | |
| 32 lowest = int(snps[h][0]) | |
| 33 break | |
| 34 i = 1 | |
| 35 | |
| 36 # Determine lowest position | |
| 37 while i < len(snps): | |
| 38 if int(snps[i][0]) < lowest and file_active[i]: | |
| 39 lowest = int(snps[i][0]) | |
| 40 cur_pos = [i] | |
| 41 elif int(snps[i][0]) == lowest: | |
| 42 cur_pos.append(i) | |
| 43 i+=1 | |
| 44 | |
| 45 # Check if all SNPs sharing a position have the same reference base, exit with message otherwise; | |
| 46 if len(cur_pos) > 1: | |
| 47 ref_base = snps[cur_pos[0]][1].lower() | |
| 48 for j in cur_pos[1:]: | |
| 49 if snps[j][1].lower() != ref_base: | |
| 50 error = '\nError: Reference bases not the same for position %s, present in following files:' % lowest | |
| 51 for k in cur_pos: | |
| 52 error += ' '+filenames[k] | |
| 53 exit(error+'.') | |
| 54 | |
| 55 # Write line to output file containing position, ref base and snps/empty cells; | |
| 56 output.write('\n'+snps[cur_pos[0]][0]+'\t'+snps[cur_pos[0]][1].lower()) | |
| 57 for l,row in enumerate(snps): | |
| 58 if l in cur_pos: | |
| 59 for snp in row[2:]: | |
| 60 output.write('\t'+snp) | |
| 61 else: | |
| 62 output.write('\t'*columns[l]) | |
| 63 | |
| 64 # Read new line in files that had snp at current position; | |
| 65 for m in cur_pos: | |
| 66 line = files[m].readline() | |
| 67 if line == '': file_active[m] = False | |
| 68 else: | |
| 69 snps[m] = line.split('\t') | |
| 70 snps[m][-1] = snps[m][-1].rstrip()# Remove newline character at end of line; | |
| 71 | |
| 72 for it in files: it.close() | |
| 73 output.close() |
