39
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys,string,copy
|
|
4 import CGData.RefGene
|
|
5 import CGData.GeneMap
|
|
6 import segToProbeMap
|
|
7
|
|
8 if __name__ == "__main__":
|
|
9
|
54
|
10 if len(sys.argv[:])!=5:
|
|
11 print "python mapSegToGeneMatrix.py genomicsSegmentIn refGene GeneLevelMatrixOut NORMAL_CNV\n"
|
39
|
12 sys.exit()
|
|
13 refgene = CGData.RefGene.RefGene()
|
|
14 refgene.load( sys.argv[2] )
|
|
15
|
54
|
16 NORMAL_CNV=sys.argv[4]
|
|
17
|
39
|
18 #* b for cnv
|
|
19 probeMapper = CGData.GeneMap.ProbeMapper('b')
|
|
20
|
|
21 fin =open(sys.argv[1],'r')
|
|
22 genes= {}
|
|
23 samples={}
|
|
24 matrix=[] #sample then gene
|
|
25 for gene in refgene.get_gene_list():
|
|
26 genes[gene]=len(genes)
|
|
27
|
|
28 Ngene = len(genes.keys())
|
|
29 oneSample=[]
|
|
30 for i in range(0, Ngene):
|
|
31 oneSample.append([]);
|
|
32
|
|
33 print "genes: ", len(genes)
|
|
34
|
|
35 count =0
|
|
36
|
|
37 while 1:
|
|
38 count = count+1
|
|
39 #print count
|
51
|
40 line =fin.readline()
|
|
41 if line =="": # end of file
|
39
|
42 break
|
51
|
43 if count ==1:
|
|
44 continue #ignore the first line
|
|
45 line = string.strip(line)
|
|
46 if line == "": # empty line
|
|
47 continue
|
39
|
48 if line[0]=="#":
|
|
49 continue
|
|
50 tmp = string.split(line,"\t")
|
60
|
51 if len(tmp)!= 5:
|
39
|
52 continue
|
60
|
53 seg = segToProbeMap.probeseg("", tmp[1], int(tmp[2]), int(tmp[3]),".")
|
39
|
54 sample = tmp[0]
|
60
|
55 value = float(tmp[4])
|
39
|
56 if sample not in samples:
|
|
57 samples[sample]=len(samples)
|
|
58 matrix.append(copy.deepcopy(oneSample))
|
|
59
|
|
60 hits={}
|
|
61 for hit in probeMapper.find_overlap( seg, refgene ):
|
|
62 gene = hit.name
|
|
63 if gene in hits:
|
|
64 continue
|
|
65 hits[gene]=0
|
|
66 matrix[samples[sample]][genes[gene]].append(value)
|
|
67 fin.close()
|
|
68
|
|
69 print "segments: ", count
|
|
70
|
|
71 fout =open(sys.argv[3],'w')
|
|
72 sample_list =samples.keys()
|
|
73 fout.write("sample\t"+string.join(sample_list,"\t")+"\n")
|
|
74 for gene in genes.keys():
|
|
75 fout.write(gene)
|
|
76 for sample in sample_list:
|
|
77 list = matrix[samples[sample]][genes[gene]]
|
|
78 if len(list)==0:
|
54
|
79 average = NORMAL_CNV
|
39
|
80 elif len(list)==1:
|
|
81 average = list[0]
|
|
82 average =round(average,3)
|
|
83 else:
|
|
84 total=0.0
|
|
85 for value in list:
|
|
86 total = total +value
|
|
87 average = total/len(list)
|
|
88 average =round(average,3)
|
|
89 fout.write("\t"+str(average))
|
|
90 fout.write("\n")
|
|
91 fout.close()
|
|
92
|