Mercurial > repos > rijst > snptools
comparison snpsplit.py @ 3:1f00946b18c2 draft default tip
Uploaded
| author | rijst |
|---|---|
| date | Wed, 12 Dec 2012 09:09:45 -0500 |
| parents | cc961e057668 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:7e46920d9664 | 3:1f00946b18c2 |
|---|---|
| 1 '''This script takes a tab-delimited file containting position, ref base, mut base and splits any multicharacter ref or mut base entries into seperate lines and calculating the new positions''' | |
| 2 | |
| 3 import sys | |
| 4 | |
| 5 if len(sys.argv) != 3: | |
| 6 exit("snpsplit takes exactly two arguments (input and output file), no more and no less") | |
| 7 | |
| 8 input_name = sys.argv[1] | |
| 9 output_name = sys.argv[2] | |
| 10 | |
| 11 try: | |
| 12 in_file = open(input_name) | |
| 13 except IOError as e: | |
| 14 exit("Error trying to open '"+input_name+"': {1}".format(e.errno, e.strerror)) | |
| 15 | |
| 16 try: | |
| 17 out_file = open(output_name, 'w') | |
| 18 except IOError as e: | |
| 19 exit("Error trying to open '"+output_name+"': {1}".format(e.errno, e.strerror)) | |
| 20 | |
| 21 def splitter(cells): | |
| 22 global out_lines | |
| 23 for i in range(0,len(cells[1])): | |
| 24 if cells[1][i] == cells[2][i]: continue | |
| 25 out_file.write(str(int(cells[0])+i)+'\t'+cells[1][i]+'\t'+cells[2][i]+'\n') | |
| 26 out_lines += 1 | |
| 27 | |
| 28 in_lines=out_lines=0 | |
| 29 out_file.write("Position\tRef\tMut\n") | |
| 30 for line in in_file: | |
| 31 in_lines += 1 | |
| 32 cells = line.rstrip().split('\t') | |
| 33 if not str(line[0]).isdigit(): | |
| 34 out_file.write(line) | |
| 35 continue | |
| 36 | |
| 37 # Can only deal with SNPs/MNPs, not indels. | |
| 38 if len(cells[1]) != len(cells[2]): continue | |
| 39 splitter(cells) | |
| 40 | |
| 41 in_file.close() | |
| 42 out_file.close() | |
| 43 | |
| 44 print "Lines read: %s" % in_lines | |
| 45 print "Lines printed: %s" % out_lines |
