Mercurial > repos > melissacline > ucsc_cancer_utilities
comparison seg2matrix/mapSegToGeneMatrix.py @ 39:61f03b481b0d
new tool
author | jingchunzhu |
---|---|
date | Fri, 31 Jul 2015 20:38:27 -0700 |
parents | |
children | 728eda331f07 |
comparison
equal
deleted
inserted
replaced
38:84eb11adc22f | 39:61f03b481b0d |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys,string,copy | |
4 import CGData.RefGene | |
5 import CGData.GeneMap | |
6 import segToProbeMap | |
7 | |
8 if __name__ == "__main__": | |
9 | |
10 if len(sys.argv[:])!=4: | |
11 print "python mapSegToGeneMatrix.py genomicsSegmentIn refGene GeneLevelMatrixOut\n" | |
12 sys.exit() | |
13 refgene = CGData.RefGene.RefGene() | |
14 refgene.load( sys.argv[2] ) | |
15 | |
16 #* b for cnv | |
17 probeMapper = CGData.GeneMap.ProbeMapper('b') | |
18 | |
19 fin =open(sys.argv[1],'r') | |
20 genes= {} | |
21 samples={} | |
22 matrix=[] #sample then gene | |
23 for gene in refgene.get_gene_list(): | |
24 genes[gene]=len(genes) | |
25 | |
26 Ngene = len(genes.keys()) | |
27 oneSample=[] | |
28 for i in range(0, Ngene): | |
29 oneSample.append([]); | |
30 | |
31 print "genes: ", len(genes) | |
32 | |
33 count =0 | |
34 | |
35 while 1: | |
36 count = count+1 | |
37 #print count | |
38 line =string.strip(fin.readline()) | |
39 if line =="": | |
40 break | |
41 if line[0]=="#": | |
42 continue | |
43 tmp = string.split(line,"\t") | |
44 if len(tmp)!= 6: | |
45 continue | |
46 seg = segToProbeMap.probeseg("", tmp[1], int(tmp[2]), int(tmp[3]),tmp[4]) | |
47 sample = tmp[0] | |
48 value = float(tmp[5]) | |
49 if sample not in samples: | |
50 samples[sample]=len(samples) | |
51 matrix.append(copy.deepcopy(oneSample)) | |
52 | |
53 hits={} | |
54 for hit in probeMapper.find_overlap( seg, refgene ): | |
55 gene = hit.name | |
56 if gene in hits: | |
57 continue | |
58 hits[gene]=0 | |
59 matrix[samples[sample]][genes[gene]].append(value) | |
60 fin.close() | |
61 | |
62 print "segments: ", count | |
63 | |
64 fout =open(sys.argv[3],'w') | |
65 sample_list =samples.keys() | |
66 fout.write("sample\t"+string.join(sample_list,"\t")+"\n") | |
67 for gene in genes.keys(): | |
68 fout.write(gene) | |
69 for sample in sample_list: | |
70 list = matrix[samples[sample]][genes[gene]] | |
71 if len(list)==0: | |
72 average =0 | |
73 elif len(list)==1: | |
74 average = list[0] | |
75 average =round(average,3) | |
76 else: | |
77 total=0.0 | |
78 for value in list: | |
79 total = total +value | |
80 average = total/len(list) | |
81 average =round(average,3) | |
82 fout.write("\t"+str(average)) | |
83 fout.write("\n") | |
84 fout.close() | |
85 |