annotate seg2matrix/mapSegToGeneMatrix.py @ 60:bf57076e27b9 default tip

change genomicSegment input data
author jingchunzhu@gmail.com
date Tue, 27 Oct 2015 16:07:09 -0700
parents 59dbe857f5d4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
39
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
1 #!/usr/bin/env python
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
2
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
3 import sys,string,copy
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
4 import CGData.RefGene
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
5 import CGData.GeneMap
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
6 import segToProbeMap
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
7
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
8 if __name__ == "__main__":
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
9
54
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 51
diff changeset
10 if len(sys.argv[:])!=5:
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 51
diff changeset
11 print "python mapSegToGeneMatrix.py genomicsSegmentIn refGene GeneLevelMatrixOut NORMAL_CNV\n"
39
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
12 sys.exit()
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
13 refgene = CGData.RefGene.RefGene()
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
14 refgene.load( sys.argv[2] )
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
15
54
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 51
diff changeset
16 NORMAL_CNV=sys.argv[4]
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 51
diff changeset
17
39
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
18 #* b for cnv
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
19 probeMapper = CGData.GeneMap.ProbeMapper('b')
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
20
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
21 fin =open(sys.argv[1],'r')
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
22 genes= {}
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
23 samples={}
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
24 matrix=[] #sample then gene
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
25 for gene in refgene.get_gene_list():
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
26 genes[gene]=len(genes)
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
27
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
28 Ngene = len(genes.keys())
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
29 oneSample=[]
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
30 for i in range(0, Ngene):
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
31 oneSample.append([]);
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
32
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
33 print "genes: ", len(genes)
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
34
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
35 count =0
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
36
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
37 while 1:
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
38 count = count+1
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
39 #print count
51
728eda331f07 better handle of input
jingchunzhu@gmail.com
parents: 39
diff changeset
40 line =fin.readline()
728eda331f07 better handle of input
jingchunzhu@gmail.com
parents: 39
diff changeset
41 if line =="": # end of file
39
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
42 break
51
728eda331f07 better handle of input
jingchunzhu@gmail.com
parents: 39
diff changeset
43 if count ==1:
728eda331f07 better handle of input
jingchunzhu@gmail.com
parents: 39
diff changeset
44 continue #ignore the first line
728eda331f07 better handle of input
jingchunzhu@gmail.com
parents: 39
diff changeset
45 line = string.strip(line)
728eda331f07 better handle of input
jingchunzhu@gmail.com
parents: 39
diff changeset
46 if line == "": # empty line
728eda331f07 better handle of input
jingchunzhu@gmail.com
parents: 39
diff changeset
47 continue
39
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
48 if line[0]=="#":
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
49 continue
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
50 tmp = string.split(line,"\t")
60
bf57076e27b9 change genomicSegment input data
jingchunzhu@gmail.com
parents: 54
diff changeset
51 if len(tmp)!= 5:
39
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
52 continue
60
bf57076e27b9 change genomicSegment input data
jingchunzhu@gmail.com
parents: 54
diff changeset
53 seg = segToProbeMap.probeseg("", tmp[1], int(tmp[2]), int(tmp[3]),".")
39
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
54 sample = tmp[0]
60
bf57076e27b9 change genomicSegment input data
jingchunzhu@gmail.com
parents: 54
diff changeset
55 value = float(tmp[4])
39
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
56 if sample not in samples:
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
57 samples[sample]=len(samples)
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
58 matrix.append(copy.deepcopy(oneSample))
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
59
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
60 hits={}
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
61 for hit in probeMapper.find_overlap( seg, refgene ):
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
62 gene = hit.name
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
63 if gene in hits:
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
64 continue
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
65 hits[gene]=0
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
66 matrix[samples[sample]][genes[gene]].append(value)
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
67 fin.close()
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
68
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
69 print "segments: ", count
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
70
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
71 fout =open(sys.argv[3],'w')
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
72 sample_list =samples.keys()
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
73 fout.write("sample\t"+string.join(sample_list,"\t")+"\n")
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
74 for gene in genes.keys():
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
75 fout.write(gene)
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
76 for sample in sample_list:
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
77 list = matrix[samples[sample]][genes[gene]]
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
78 if len(list)==0:
54
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 51
diff changeset
79 average = NORMAL_CNV
39
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
80 elif len(list)==1:
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
81 average = list[0]
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
82 average =round(average,3)
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
83 else:
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
84 total=0.0
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
85 for value in list:
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
86 total = total +value
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
87 average = total/len(list)
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
88 average =round(average,3)
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
89 fout.write("\t"+str(average))
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
90 fout.write("\n")
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
91 fout.close()
61f03b481b0d new tool
jingchunzhu
parents:
diff changeset
92