# HG changeset patch
# User jingchunzhu
# Date 1438400307 25200
# Node ID 61f03b481b0d8437096d0c2f850c06096c921c0d
# Parent 84eb11adc22f89ccee540838b74f61c17419e4f4
new tool
diff -r 84eb11adc22f -r 61f03b481b0d seg2matrix/mapSegToGeneMatrix.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/seg2matrix/mapSegToGeneMatrix.py Fri Jul 31 20:38:27 2015 -0700
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+import sys,string,copy
+import CGData.RefGene
+import CGData.GeneMap
+import segToProbeMap
+
+if __name__ == "__main__":
+
+ if len(sys.argv[:])!=4:
+ print "python mapSegToGeneMatrix.py genomicsSegmentIn refGene GeneLevelMatrixOut\n"
+ sys.exit()
+ refgene = CGData.RefGene.RefGene()
+ refgene.load( sys.argv[2] )
+
+ #* b for cnv
+ probeMapper = CGData.GeneMap.ProbeMapper('b')
+
+ fin =open(sys.argv[1],'r')
+ genes= {}
+ samples={}
+ matrix=[] #sample then gene
+ for gene in refgene.get_gene_list():
+ genes[gene]=len(genes)
+
+ Ngene = len(genes.keys())
+ oneSample=[]
+ for i in range(0, Ngene):
+ oneSample.append([]);
+
+ print "genes: ", len(genes)
+
+ count =0
+
+ while 1:
+ count = count+1
+ #print count
+ line =string.strip(fin.readline())
+ if line =="":
+ break
+ if line[0]=="#":
+ continue
+ tmp = string.split(line,"\t")
+ if len(tmp)!= 6:
+ continue
+ seg = segToProbeMap.probeseg("", tmp[1], int(tmp[2]), int(tmp[3]),tmp[4])
+ sample = tmp[0]
+ value = float(tmp[5])
+ if sample not in samples:
+ samples[sample]=len(samples)
+ matrix.append(copy.deepcopy(oneSample))
+
+ hits={}
+ for hit in probeMapper.find_overlap( seg, refgene ):
+ gene = hit.name
+ if gene in hits:
+ continue
+ hits[gene]=0
+ matrix[samples[sample]][genes[gene]].append(value)
+ fin.close()
+
+ print "segments: ", count
+
+ fout =open(sys.argv[3],'w')
+ sample_list =samples.keys()
+ fout.write("sample\t"+string.join(sample_list,"\t")+"\n")
+ for gene in genes.keys():
+ fout.write(gene)
+ for sample in sample_list:
+ list = matrix[samples[sample]][genes[gene]]
+ if len(list)==0:
+ average =0
+ elif len(list)==1:
+ average = list[0]
+ average =round(average,3)
+ else:
+ total=0.0
+ for value in list:
+ total = total +value
+ average = total/len(list)
+ average =round(average,3)
+ fout.write("\t"+str(average))
+ fout.write("\n")
+ fout.close()
+
diff -r 84eb11adc22f -r 61f03b481b0d seg2matrix/segToProbeMap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/seg2matrix/segToProbeMap.py Fri Jul 31 20:38:27 2015 -0700
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+import sys,string
+sys.path.insert(0,"../CGDataNew")
+
+import CGData.GenomicSegment
+import CGData.SegToMatrix
+import CGData.RefGene
+import CGData.GeneMap
+import optparse
+
+class segs:
+ def __init__(self):
+ self.probes = []
+
+ def load (self, handle): #handle bed6
+ fin =open(handle,'r')
+ while 1:
+ line =string.strip(fin.readline())
+ if line =="":
+ break
+ if line[0]=="#":
+ continue
+ tmp = string.split(line,"\t")
+ if len(tmp)== 5:
+ p = probeseg(tmp[0], tmp[1], int(tmp[3]), int(tmp[4]),tmp[2])
+ self.probes.append(p)
+ fin.close()
+
+
+class probeseg:
+ def __init__(self, name, chrom, chrom_start, chrom_end, strand):
+ self.name = name
+ self.chrom = chrom
+ self.chrom_start = chrom_start
+ self.chrom_end = chrom_end
+ self.strand = strand
+
+
+if __name__ == "__main__":
+ def printUsage():
+ print "python segToProbeMap.py segInput(name,chr,strand,start,end) refGene(eg hg18) probeMapOut --mode=cnv|exp\n"
+
+ if len(sys.argv) != 5:
+ printUsage()
+ sys.exit()
+
+ parser = optparse.OptionParser()
+ parser.add_option("--mode", action="store", type="string", dest="mode")
+ (options, args) = parser.parse_args()
+
+ if (not options.mode) or (options.mode not in ["cnv","exp"]):
+ printUsage()
+ sys.exit()
+
+ probes=segs()
+ probes.load(sys.argv[1])
+
+ refgene = CGData.RefGene.RefGene()
+ refgene.load(sys.argv[2])
+
+ handle = open(sys.argv[3], "w")
+ if options.mode=="cnv":
+ probeMapper = CGData.GeneMap.ProbeMapper('b')
+ if options.mode=="exp":
+ probeMapper = CGData.GeneMap.ProbeMapper('g')
+
+ handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" % ("#id", "gene","chrom","chromStart","chromEnd","strand"))
+ for probe in probes.probes:
+ hits = []
+ for hit in probeMapper.find_overlap( probe, refgene ):
+ if hit.name not in hits:
+ hits.append(hit.name)
+ handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (probe.name, ",".join(hits), probe.chrom, probe.chrom_start, probe.chrom_end, probe.strand))
+ handle.close()
+
+
diff -r 84eb11adc22f -r 61f03b481b0d segToGeneMatrix.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/segToGeneMatrix.xml Fri Jul 31 20:38:27 2015 -0700
@@ -0,0 +1,35 @@
+
+
+ Convert segmented copy number data to gene based matrix data
+
+
+ seg2matrix/mapSegToGeneMatrix.py $input $__tool_directory__/seg2matrix/$refGene.assembly $outputMatrix
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ***Convert segmented copy number data for input into xena***
+
+ Given a segmented copy number data file, convert it into gene based matrix data
+
+ Output File no 1. matrix file
+
+
+
+
diff -r 84eb11adc22f -r 61f03b481b0d segToMatrix.xml
--- a/segToMatrix.xml Fri Jul 31 15:29:53 2015 -0700
+++ b/segToMatrix.xml Fri Jul 31 20:38:27 2015 -0700
@@ -3,7 +3,7 @@
Prep segmented copy number data for Xena
- seg2matrix/segToMatrixGalaxy.py $input seg2matrix/$refGene.assembly $outputMatrix $outputProbeMap
+ seg2matrix/segToMatrixGalaxy.py $input $__tool_directory__/seg2matrix/$refGene.assembly $outputMatrix $outputProbeMap