comparison seg2matrix/mapSegToGeneMatrix.py @ 39:61f03b481b0d

new tool
author jingchunzhu
date Fri, 31 Jul 2015 20:38:27 -0700
parents
children 728eda331f07
comparison
equal deleted inserted replaced
38:84eb11adc22f 39:61f03b481b0d
1 #!/usr/bin/env python
2
3 import sys,string,copy
4 import CGData.RefGene
5 import CGData.GeneMap
6 import segToProbeMap
7
8 if __name__ == "__main__":
9
10 if len(sys.argv[:])!=4:
11 print "python mapSegToGeneMatrix.py genomicsSegmentIn refGene GeneLevelMatrixOut\n"
12 sys.exit()
13 refgene = CGData.RefGene.RefGene()
14 refgene.load( sys.argv[2] )
15
16 #* b for cnv
17 probeMapper = CGData.GeneMap.ProbeMapper('b')
18
19 fin =open(sys.argv[1],'r')
20 genes= {}
21 samples={}
22 matrix=[] #sample then gene
23 for gene in refgene.get_gene_list():
24 genes[gene]=len(genes)
25
26 Ngene = len(genes.keys())
27 oneSample=[]
28 for i in range(0, Ngene):
29 oneSample.append([]);
30
31 print "genes: ", len(genes)
32
33 count =0
34
35 while 1:
36 count = count+1
37 #print count
38 line =string.strip(fin.readline())
39 if line =="":
40 break
41 if line[0]=="#":
42 continue
43 tmp = string.split(line,"\t")
44 if len(tmp)!= 6:
45 continue
46 seg = segToProbeMap.probeseg("", tmp[1], int(tmp[2]), int(tmp[3]),tmp[4])
47 sample = tmp[0]
48 value = float(tmp[5])
49 if sample not in samples:
50 samples[sample]=len(samples)
51 matrix.append(copy.deepcopy(oneSample))
52
53 hits={}
54 for hit in probeMapper.find_overlap( seg, refgene ):
55 gene = hit.name
56 if gene in hits:
57 continue
58 hits[gene]=0
59 matrix[samples[sample]][genes[gene]].append(value)
60 fin.close()
61
62 print "segments: ", count
63
64 fout =open(sys.argv[3],'w')
65 sample_list =samples.keys()
66 fout.write("sample\t"+string.join(sample_list,"\t")+"\n")
67 for gene in genes.keys():
68 fout.write(gene)
69 for sample in sample_list:
70 list = matrix[samples[sample]][genes[gene]]
71 if len(list)==0:
72 average =0
73 elif len(list)==1:
74 average = list[0]
75 average =round(average,3)
76 else:
77 total=0.0
78 for value in list:
79 total = total +value
80 average = total/len(list)
81 average =round(average,3)
82 fout.write("\t"+str(average))
83 fout.write("\n")
84 fout.close()
85