Mercurial > repos > melissacline > ucsc_cancer_utilities
changeset 39:61f03b481b0d
new tool
author | jingchunzhu |
---|---|
date | Fri, 31 Jul 2015 20:38:27 -0700 |
parents | 84eb11adc22f |
children | 72dc9215623d |
files | seg2matrix/mapSegToGeneMatrix.py seg2matrix/segToProbeMap.py segToGeneMatrix.xml segToMatrix.xml |
diffstat | 4 files changed, 198 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seg2matrix/mapSegToGeneMatrix.py Fri Jul 31 20:38:27 2015 -0700 @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +import sys,string,copy +import CGData.RefGene +import CGData.GeneMap +import segToProbeMap + +if __name__ == "__main__": + + if len(sys.argv[:])!=4: + print "python mapSegToGeneMatrix.py genomicsSegmentIn refGene GeneLevelMatrixOut\n" + sys.exit() + refgene = CGData.RefGene.RefGene() + refgene.load( sys.argv[2] ) + + #* b for cnv + probeMapper = CGData.GeneMap.ProbeMapper('b') + + fin =open(sys.argv[1],'r') + genes= {} + samples={} + matrix=[] #sample then gene + for gene in refgene.get_gene_list(): + genes[gene]=len(genes) + + Ngene = len(genes.keys()) + oneSample=[] + for i in range(0, Ngene): + oneSample.append([]); + + print "genes: ", len(genes) + + count =0 + + while 1: + count = count+1 + #print count + line =string.strip(fin.readline()) + if line =="": + break + if line[0]=="#": + continue + tmp = string.split(line,"\t") + if len(tmp)!= 6: + continue + seg = segToProbeMap.probeseg("", tmp[1], int(tmp[2]), int(tmp[3]),tmp[4]) + sample = tmp[0] + value = float(tmp[5]) + if sample not in samples: + samples[sample]=len(samples) + matrix.append(copy.deepcopy(oneSample)) + + hits={} + for hit in probeMapper.find_overlap( seg, refgene ): + gene = hit.name + if gene in hits: + continue + hits[gene]=0 + matrix[samples[sample]][genes[gene]].append(value) + fin.close() + + print "segments: ", count + + fout =open(sys.argv[3],'w') + sample_list =samples.keys() + fout.write("sample\t"+string.join(sample_list,"\t")+"\n") + for gene in genes.keys(): + fout.write(gene) + for sample in sample_list: + list = matrix[samples[sample]][genes[gene]] + if len(list)==0: + average =0 + elif len(list)==1: + average = list[0] + average =round(average,3) + else: + total=0.0 + for value in list: + total = total +value + average = total/len(list) + average =round(average,3) + fout.write("\t"+str(average)) + fout.write("\n") + fout.close() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seg2matrix/segToProbeMap.py Fri Jul 31 20:38:27 2015 -0700 @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +import sys,string +sys.path.insert(0,"../CGDataNew") + +import CGData.GenomicSegment +import CGData.SegToMatrix +import CGData.RefGene +import CGData.GeneMap +import optparse + +class segs: + def __init__(self): + self.probes = [] + + def load (self, handle): #handle bed6 + fin =open(handle,'r') + while 1: + line =string.strip(fin.readline()) + if line =="": + break + if line[0]=="#": + continue + tmp = string.split(line,"\t") + if len(tmp)== 5: + p = probeseg(tmp[0], tmp[1], int(tmp[3]), int(tmp[4]),tmp[2]) + self.probes.append(p) + fin.close() + + +class probeseg: + def __init__(self, name, chrom, chrom_start, chrom_end, strand): + self.name = name + self.chrom = chrom + self.chrom_start = chrom_start + self.chrom_end = chrom_end + self.strand = strand + + +if __name__ == "__main__": + def printUsage(): + print "python segToProbeMap.py segInput(name,chr,strand,start,end) refGene(eg hg18) probeMapOut --mode=cnv|exp\n" + + if len(sys.argv) != 5: + printUsage() + sys.exit() + + parser = optparse.OptionParser() + parser.add_option("--mode", action="store", type="string", dest="mode") + (options, args) = parser.parse_args() + + if (not options.mode) or (options.mode not in ["cnv","exp"]): + printUsage() + sys.exit() + + probes=segs() + probes.load(sys.argv[1]) + + refgene = CGData.RefGene.RefGene() + refgene.load(sys.argv[2]) + + handle = open(sys.argv[3], "w") + if options.mode=="cnv": + probeMapper = CGData.GeneMap.ProbeMapper('b') + if options.mode=="exp": + probeMapper = CGData.GeneMap.ProbeMapper('g') + + handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" % ("#id", "gene","chrom","chromStart","chromEnd","strand")) + for probe in probes.probes: + hits = [] + for hit in probeMapper.find_overlap( probe, refgene ): + if hit.name not in hits: + hits.append(hit.name) + handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (probe.name, ",".join(hits), probe.chrom, probe.chrom_start, probe.chrom_end, probe.strand)) + handle.close() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/segToGeneMatrix.xml Fri Jul 31 20:38:27 2015 -0700 @@ -0,0 +1,35 @@ +<tool id="segToGeneMatrix" name="segToGeneMatrix" version="0.0.1"> + <description> + Convert segmented copy number data to gene based matrix data + </description> + <command interpreter="python"> + seg2matrix/mapSegToGeneMatrix.py $input $__tool_directory__/seg2matrix/$refGene.assembly $outputMatrix + </command> + <inputs> + <param name="input" format="tabular" type="data" multiple="false" label="Input segmented copy number data" /> + <conditional name="refGene"> + <param name="assembly_select" type="select" label="Which human genome assembly is your input segmented copy number data?"> + <option value="hg19">hg19</option> + <option value="hg18">hg18</option> + </param> + <when value="hg19"> + <param name="assembly" type="hidden" value="refGene_hg19" /> + </when> + <when value="hg18"> + <param name="assembly" type="hidden" value="refGene_hg18" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="outputMatrix" format="tabular" label="gene-level copy number matrix" /> + </outputs> + <help> + ***Convert segmented copy number data for input into xena*** + + Given a segmented copy number data file, convert it into gene based matrix data + + Output File no 1. matrix file + + </help> +</tool> +
--- a/segToMatrix.xml Fri Jul 31 15:29:53 2015 -0700 +++ b/segToMatrix.xml Fri Jul 31 20:38:27 2015 -0700 @@ -3,7 +3,7 @@ Prep segmented copy number data for Xena </description> <command interpreter="python"> - seg2matrix/segToMatrixGalaxy.py $input seg2matrix/$refGene.assembly $outputMatrix $outputProbeMap + seg2matrix/segToMatrixGalaxy.py $input $__tool_directory__/seg2matrix/$refGene.assembly $outputMatrix $outputProbeMap </command> <inputs> <param name="input" format="tabular" type="data" multiple="false" label="Input segmented copy number data" />