annotate seg2matrix/segToMatrixGalaxy.py @ 60:bf57076e27b9 default tip

change genomicSegment input data
author jingchunzhu@gmail.com
date Tue, 27 Oct 2015 16:07:09 -0700
parents 59dbe857f5d4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
1 #!/usr/bin/env python
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
2
60
bf57076e27b9 change genomicSegment input data
jingchunzhu@gmail.com
parents: 54
diff changeset
3 import sys,os, string
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
4 import CGData.GenomicSegment
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
5 import CGData.SegToMatrix
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
6 import CGData.RefGene
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
7 import CGData.GeneMap
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
8
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
9 class matrix_write:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
10 def __init__(self, handle):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
11 self.buff = ""
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
12 self.handle = handle
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
13 self.probes = []
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
14 def write(self, s):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
15 self.buff += s
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
16 if s.endswith("\n"):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
17 tmp = self.buff.split("\t")
60
bf57076e27b9 change genomicSegment input data
jingchunzhu@gmail.com
parents: 54
diff changeset
18 if string.strip(tmp[0]) != "probe":
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
19 tmp2 = tmp[0].split("_")
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
20 p = probeseg(tmp[0], tmp2[0], int(tmp2[1]), int(tmp2[2]))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
21 self.probes.append(p)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
22
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
23 self.handle.write(self.buff)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
24 self.buff = ""
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
25
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
26 class probeseg:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
27 def __init__(self, name, chrom, chrom_start, chrom_end):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
28 self.name = name
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
29 self.chrom = chrom
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
30 self.chrom_start = chrom_start
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
31 self.chrom_end = chrom_end
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
32 self.strand = "."
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
33
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
34 if __name__ == "__main__":
54
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 53
diff changeset
35 if len(sys.argv[:])!= 6:
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 53
diff changeset
36 print "python segToMatrixGalaxy.py inputSegmentFile refGeneFile outputMatrix outputProbeMap NORMAL_CNV\n"
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
37 sys.exit()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
38 seg = CGData.GenomicSegment.GenomicSegment()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
39 seg.load(sys.argv[1])
60
bf57076e27b9 change genomicSegment input data
jingchunzhu@gmail.com
parents: 54
diff changeset
40
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
41 refgene = CGData.RefGene.RefGene()
54
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 53
diff changeset
42 refgene.load(os.path.dirname(sys.argv[0])+"/"+os.path.basename(sys.argv[2]))
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 53
diff changeset
43
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 53
diff changeset
44 NORMAL_CNV = sys.argv[5]
53
jingchunzhu
parents: 31
diff changeset
45
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
46 handle = open(sys.argv[3], "w")
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
47 m = matrix_write(handle)
54
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 53
diff changeset
48 CGData.SegToMatrix.seg_to_matrix(seg, m, NORMAL_CNV)
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
49 handle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
50
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
51 handle = open(sys.argv[4], "w")
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
52 probeMapper = CGData.GeneMap.ProbeMapper('b')
54
59dbe857f5d4 introduce normal_CNV parameter
jingchunzhu
parents: 53
diff changeset
53 handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" % ("id", "gene","chrom","chromStart","chromEnd","strand"))
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
54 for probe in m.probes:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
55 hits = []
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
56 for hit in probeMapper.find_overlap( probe, refgene ):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
57 if hit.name not in hits:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
58 hits.append(hit.name)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
59 handle.write("%s\t%s\t%s\t%s\t%s\t.\n" % (probe.name, ",".join(hits), probe.chrom, probe.chrom_start, probe.chrom_end))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
60 handle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
61
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
62