annotate blat_mapping.py @ 0:81608f3c2f32 draft default tip

Imported from capsule None
author devteam
date Mon, 19 May 2014 10:59:40 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
2
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
3 import os, sys
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
4
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
5 assert sys.version_info[:2] >= ( 2, 4 )
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
6
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
7 def reverse_complement(s):
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
8 complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."}
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
9 reversed_s = []
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
10 for i in s:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
11 reversed_s.append(complement_dna[i])
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
12 reversed_s.reverse()
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
13 return "".join(reversed_s)
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
14
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
15 def __main__():
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
16 nuc_index = {'a':0,'t':1,'c':2,'g':3,'n':4}
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
17 coverage = {} # key = (chrom, index)
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
18 invalid_lines = 0
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
19 invalid_chrom = 0
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
20 infile = sys.argv[1]
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
21 outfile = sys.argv[2]
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
22
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
23 for i, line in enumerate( open( infile ) ):
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
24 line = line.rstrip('\r\n')
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
25 if not line or line.startswith('#'):
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
26 continue
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
27 fields = line.split()
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
28 if len(fields) < 21: # standard number of pslx columns
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
29 invalid_lines += 1
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
30 continue
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
31 if not fields[0].isdigit():
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
32 invalid_lines += 1
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
33 continue
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
34 chrom = fields[13]
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
35 if not chrom.startswith( 'chr' ):
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
36 invalid_lines += 1
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
37 invalid_chrom += 1
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
38 continue
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
39 try:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
40 block_count = int(fields[17])
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
41 except:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
42 invalid_lines += 1
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
43 continue
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
44 block_size = fields[18].split(',')
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
45 chrom_start = fields[20].split(',')
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
46
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
47 for j in range( block_count ):
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
48 try:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
49 this_block_size = int(block_size[j])
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
50 this_chrom_start = int(chrom_start[j])
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
51 except:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
52 invalid_lines += 1
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
53 break
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
54 # brut force coverage
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
55 for k in range( this_block_size ):
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
56 cur_index = this_chrom_start + k
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
57 if coverage.has_key( ( chrom, cur_index ) ):
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
58 coverage[(chrom, cur_index)] += 1
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
59 else:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
60 coverage[(chrom, cur_index)] = 1
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
61
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
62 # generate a index file
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
63 outputfh = open(outfile, 'w')
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
64 keys = coverage.keys()
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
65 keys.sort()
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
66 previous_chrom = ''
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
67 for i in keys:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
68 (chrom, location) = i
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
69 sum = coverage[(i)]
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
70 if chrom != previous_chrom:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
71 outputfh.write( 'variableStep chrom=%s\n' % ( chrom ) )
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
72 previous_chrom = chrom
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
73 outputfh.write( "%s\t%s\n" % ( location, sum ) )
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
74 outputfh.close()
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
75
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
76 if invalid_lines:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
77 invalid_msg = "Skipped %d invalid lines" % invalid_lines
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
78 if invalid_chrom:
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
79 invalid_msg += ", including %d lines with chrom id errors which must begin with 'chr' to map correctly to the UCSC Genome Browser. "
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
80
81608f3c2f32 Imported from capsule None
devteam
parents:
diff changeset
81 if __name__ == '__main__': __main__()