Mercurial > repos > devteam > blat_mapping
comparison blat_mapping.py @ 0:81608f3c2f32 draft default tip
Imported from capsule None
| author | devteam |
|---|---|
| date | Mon, 19 May 2014 10:59:40 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:81608f3c2f32 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import os, sys | |
| 4 | |
| 5 assert sys.version_info[:2] >= ( 2, 4 ) | |
| 6 | |
| 7 def reverse_complement(s): | |
| 8 complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."} | |
| 9 reversed_s = [] | |
| 10 for i in s: | |
| 11 reversed_s.append(complement_dna[i]) | |
| 12 reversed_s.reverse() | |
| 13 return "".join(reversed_s) | |
| 14 | |
| 15 def __main__(): | |
| 16 nuc_index = {'a':0,'t':1,'c':2,'g':3,'n':4} | |
| 17 coverage = {} # key = (chrom, index) | |
| 18 invalid_lines = 0 | |
| 19 invalid_chrom = 0 | |
| 20 infile = sys.argv[1] | |
| 21 outfile = sys.argv[2] | |
| 22 | |
| 23 for i, line in enumerate( open( infile ) ): | |
| 24 line = line.rstrip('\r\n') | |
| 25 if not line or line.startswith('#'): | |
| 26 continue | |
| 27 fields = line.split() | |
| 28 if len(fields) < 21: # standard number of pslx columns | |
| 29 invalid_lines += 1 | |
| 30 continue | |
| 31 if not fields[0].isdigit(): | |
| 32 invalid_lines += 1 | |
| 33 continue | |
| 34 chrom = fields[13] | |
| 35 if not chrom.startswith( 'chr' ): | |
| 36 invalid_lines += 1 | |
| 37 invalid_chrom += 1 | |
| 38 continue | |
| 39 try: | |
| 40 block_count = int(fields[17]) | |
| 41 except: | |
| 42 invalid_lines += 1 | |
| 43 continue | |
| 44 block_size = fields[18].split(',') | |
| 45 chrom_start = fields[20].split(',') | |
| 46 | |
| 47 for j in range( block_count ): | |
| 48 try: | |
| 49 this_block_size = int(block_size[j]) | |
| 50 this_chrom_start = int(chrom_start[j]) | |
| 51 except: | |
| 52 invalid_lines += 1 | |
| 53 break | |
| 54 # brut force coverage | |
| 55 for k in range( this_block_size ): | |
| 56 cur_index = this_chrom_start + k | |
| 57 if coverage.has_key( ( chrom, cur_index ) ): | |
| 58 coverage[(chrom, cur_index)] += 1 | |
| 59 else: | |
| 60 coverage[(chrom, cur_index)] = 1 | |
| 61 | |
| 62 # generate a index file | |
| 63 outputfh = open(outfile, 'w') | |
| 64 keys = coverage.keys() | |
| 65 keys.sort() | |
| 66 previous_chrom = '' | |
| 67 for i in keys: | |
| 68 (chrom, location) = i | |
| 69 sum = coverage[(i)] | |
| 70 if chrom != previous_chrom: | |
| 71 outputfh.write( 'variableStep chrom=%s\n' % ( chrom ) ) | |
| 72 previous_chrom = chrom | |
| 73 outputfh.write( "%s\t%s\n" % ( location, sum ) ) | |
| 74 outputfh.close() | |
| 75 | |
| 76 if invalid_lines: | |
| 77 invalid_msg = "Skipped %d invalid lines" % invalid_lines | |
| 78 if invalid_chrom: | |
| 79 invalid_msg += ", including %d lines with chrom id errors which must begin with 'chr' to map correctly to the UCSC Genome Browser. " | |
| 80 | |
| 81 if __name__ == '__main__': __main__() |
