Mercurial > repos > melissacline > ucsc_cancer_utilities
comparison seg2matrix/CGData/RefGene.py @ 31:ab20c0d04f4a
add seg2matrix tool
author | jingchunzhu |
---|---|
date | Fri, 24 Jul 2015 13:10:11 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
30:7a7a52e9b019 | 31:ab20c0d04f4a |
---|---|
1 | |
2 import csv | |
3 import re | |
4 import CGData | |
5 | |
6 #column definitions for the current refGene_hg18.table | |
7 COL_CHROM = 2 | |
8 COL_STRAND = 3 | |
9 COL_START = 4 | |
10 COL_END = 5 | |
11 COL_CDSSTART = 6 | |
12 COL_CDSEND = 7 | |
13 COL_EXCOUNT = 8 | |
14 COL_EXSTART = 9 | |
15 COL_EXEND = 10 | |
16 COL_HUGO = 12 | |
17 | |
18 | |
19 #sometimes the ref table ends with a comma, which makes | |
20 #arrays that end with '' when you split | |
21 re_comma_end = re.compile(r',$') | |
22 | |
23 | |
24 class GeneInfo(object): | |
25 """ | |
26 Class to hold information about gene, including exon start/stops | |
27 """ | |
28 | |
29 def __init__(self, | |
30 chrom, strand, start, end, cds_start, cds_end, ex_count, ex_start, ex_end, hugo): | |
31 self.chrom = chrom | |
32 self.strand = strand | |
33 self.chrom_start = int(start)+1 | |
34 self.chrom_end = int(end) | |
35 self.cds_start = int(cds_start)+1 | |
36 self.cds_end = int(cds_end) | |
37 | |
38 self.ex_count = int(ex_count) | |
39 self.ex_start = [] | |
40 for p in re_comma_end.sub("", ex_start).split(','): | |
41 self.ex_start.append(int(p)+1) | |
42 self.ex_end = [] | |
43 for p in re_comma_end.sub("", ex_end).split(','): | |
44 self.ex_end.append(int(p)) | |
45 self.name = hugo | |
46 | |
47 def __repr__(self): | |
48 #return "%s_%s_%d_%d" % (self.hugo, self.chrom, self.start, self.end ) | |
49 return self.name | |
50 | |
51 | |
52 class RefGene(CGData.CGObjectBase): | |
53 | |
54 def __init__(self): | |
55 CGData.CGObjectBase.__init__(self) | |
56 self.hugo_map = {} | |
57 self.chrom_map = {} | |
58 | |
59 def read(self, handle): | |
60 read = csv.reader(handle, delimiter="\t") | |
61 | |
62 self.hugo_map = {} | |
63 for row in read: | |
64 gene = GeneInfo( | |
65 row[COL_CHROM], | |
66 row[COL_STRAND], | |
67 row[COL_START], | |
68 row[COL_END], | |
69 row[COL_CDSSTART], | |
70 row[COL_CDSEND], | |
71 row[COL_EXCOUNT], | |
72 row[COL_EXSTART], | |
73 row[COL_EXEND], | |
74 row[COL_HUGO]) | |
75 if not row[COL_HUGO] in self.hugo_map: | |
76 self.hugo_map[row[COL_HUGO]] = [gene] | |
77 else: | |
78 self.hugo_map[row[COL_HUGO]].append(gene) | |
79 self.chrom_map = {} | |
80 for hugo in self.hugo_map: | |
81 genes = self.hugo_map[hugo] | |
82 for gene in genes: | |
83 if not gene.chrom in self.chrom_map: | |
84 self.chrom_map[gene.chrom] = [] | |
85 self.chrom_map[gene.chrom].append(gene) | |
86 | |
87 for chrom in self.chrom_map: | |
88 self.chrom_map[chrom].sort( | |
89 lambda x, y: x.chrom_start - y.chrom_start) | |
90 self.loaded = True | |
91 | |
92 def add(self, gene): | |
93 if gene.chrom not in self.chrom_map: | |
94 self.chrom_map[gene.chrom] = [] | |
95 self.chrom_map[gene.chrom].append(gene) | |
96 if gene.name not in self.hugo_map: | |
97 self.hugo_map[gene.name] = [] | |
98 self.hugo_map[gene.name].append(gene) | |
99 | |
100 | |
101 def get_chrom_list(self): | |
102 if not self.loaded: | |
103 self.load() | |
104 return self.chrom_map.keys() | |
105 | |
106 def has_chrom(self, chrom): | |
107 if not self.loaded: | |
108 self.load() | |
109 return chrom in self.chrom_map | |
110 | |
111 def get_chrom(self, chrom): | |
112 if not self.loaded: | |
113 self.load() | |
114 return self.chrom_map[chrom] | |
115 | |
116 def get_gene_list(self): | |
117 if not self.loaded: | |
118 self.load() | |
119 return self.hugo_map.keys() | |
120 | |
121 def get_gene(self, gene): | |
122 if not self.loaded: | |
123 self.load() | |
124 return self.hugo_map[gene] |