39
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys,string,copy
|
|
4 import CGData.RefGene
|
|
5 import CGData.GeneMap
|
|
6 import segToProbeMap
|
|
7
|
|
8 if __name__ == "__main__":
|
|
9
|
|
10 if len(sys.argv[:])!=4:
|
|
11 print "python mapSegToGeneMatrix.py genomicsSegmentIn refGene GeneLevelMatrixOut\n"
|
|
12 sys.exit()
|
|
13 refgene = CGData.RefGene.RefGene()
|
|
14 refgene.load( sys.argv[2] )
|
|
15
|
|
16 #* b for cnv
|
|
17 probeMapper = CGData.GeneMap.ProbeMapper('b')
|
|
18
|
|
19 fin =open(sys.argv[1],'r')
|
|
20 genes= {}
|
|
21 samples={}
|
|
22 matrix=[] #sample then gene
|
|
23 for gene in refgene.get_gene_list():
|
|
24 genes[gene]=len(genes)
|
|
25
|
|
26 Ngene = len(genes.keys())
|
|
27 oneSample=[]
|
|
28 for i in range(0, Ngene):
|
|
29 oneSample.append([]);
|
|
30
|
|
31 print "genes: ", len(genes)
|
|
32
|
|
33 count =0
|
|
34
|
|
35 while 1:
|
|
36 count = count+1
|
|
37 #print count
|
|
38 line =string.strip(fin.readline())
|
|
39 if line =="":
|
|
40 break
|
|
41 if line[0]=="#":
|
|
42 continue
|
|
43 tmp = string.split(line,"\t")
|
|
44 if len(tmp)!= 6:
|
|
45 continue
|
|
46 seg = segToProbeMap.probeseg("", tmp[1], int(tmp[2]), int(tmp[3]),tmp[4])
|
|
47 sample = tmp[0]
|
|
48 value = float(tmp[5])
|
|
49 if sample not in samples:
|
|
50 samples[sample]=len(samples)
|
|
51 matrix.append(copy.deepcopy(oneSample))
|
|
52
|
|
53 hits={}
|
|
54 for hit in probeMapper.find_overlap( seg, refgene ):
|
|
55 gene = hit.name
|
|
56 if gene in hits:
|
|
57 continue
|
|
58 hits[gene]=0
|
|
59 matrix[samples[sample]][genes[gene]].append(value)
|
|
60 fin.close()
|
|
61
|
|
62 print "segments: ", count
|
|
63
|
|
64 fout =open(sys.argv[3],'w')
|
|
65 sample_list =samples.keys()
|
|
66 fout.write("sample\t"+string.join(sample_list,"\t")+"\n")
|
|
67 for gene in genes.keys():
|
|
68 fout.write(gene)
|
|
69 for sample in sample_list:
|
|
70 list = matrix[samples[sample]][genes[gene]]
|
|
71 if len(list)==0:
|
|
72 average =0
|
|
73 elif len(list)==1:
|
|
74 average = list[0]
|
|
75 average =round(average,3)
|
|
76 else:
|
|
77 total=0.0
|
|
78 for value in list:
|
|
79 total = total +value
|
|
80 average = total/len(list)
|
|
81 average =round(average,3)
|
|
82 fout.write("\t"+str(average))
|
|
83 fout.write("\n")
|
|
84 fout.close()
|
|
85
|