changeset 0:29573fbd1f67 draft default tip

Uploaded
author kellrott
date Fri, 21 Dec 2012 16:41:30 -0500
parents
children
files siggenes/siggenes.py siggenes/siggenes.xml
diffstat 2 files changed, 394 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/siggenes/siggenes.py	Fri Dec 21 16:41:30 2012 -0500
@@ -0,0 +1,326 @@
+#!/usr/bin/env python
+
+import sys
+import argparse
+import array
+import csv
+import math
+try:
+	from rpy2 import robjects
+	from rpy2.robjects.packages import importr
+except ImportError:
+	pass
+
+def die(msg):
+    sys.stderr.write("%s\n" % (msg))
+    sys.exit(1)
+
+def log(msg):
+    pass
+
+class DataException(Exception):
+    pass
+
+class FloatMatrix:
+    def __init__(self):
+        self.corner_name = "probe"
+        self.data = None
+        self.nrows = None
+        self.ncols = None
+        self.rowmap = None
+        self.colmap = None
+
+    def read(self, handle):
+        header = None
+        for line in handle:
+            row = line.rstrip().split("\t")
+            if header is None:
+                header = row
+                self.data = array.array("f")
+                self.colmap = {}
+                self.rowmap = {}
+                self.ncols = len(row) - 1
+                self.nrows = 0
+                for i, c in enumerate(row[1:]):
+                    self.colmap[c] = i
+            else:
+                if len(row) - 1 != self.ncols:
+                    raise DataException("Misformed matrix")
+                self.rowmap[row[0]] = len(self.rowmap)
+                a = []
+                for v in row[1:]:
+                    try:
+                        a.append(float(v))
+                    except ValueError:
+                        a.append(float('Nan'))
+                self.data.extend(a)
+                self.nrows += 1
+
+    def init_blank(self, rows, cols):
+        self.data = array.array("f")
+        self.colmap = {}
+        for i,c in enumerate(cols):
+            self.colmap[c] = i
+        self.rowmap = {}
+        for i,r in enumerate(rows):
+            self.rowmap[r] = i
+        self.ncols = len(cols)
+        self.nrows = len(rows)
+        for i in range(self.nrows):
+            self.data.extend([float('nan')] * self.ncols)
+
+    def get_value(self, row_name, col_name):
+        return self.data[ self.rowmap[row_name] * self.ncols + self.colmap[col_name] ]
+
+    def set_value(self, row_name, col_name, value):
+        self.data[ self.rowmap[row_name] * self.ncols + self.colmap[col_name] ] = value
+    
+    def get_row(self, row_name):
+        return self.data[ self.rowmap[row_name] * self.ncols :  (self.rowmap[row_name]+1) * self.ncols ]
+
+    def get_cols(self):
+        out = self.colmap.keys()
+        return sorted(out, key=self.colmap.get)
+    
+    def has_row(self, row):
+        return row in self.rowmap 
+
+    def has_col(self, col):
+        return col in self.colmap 
+
+    def get_rows(self):
+        out = self.rowmap.keys()
+        return sorted(out, key=self.rowmap.get)
+    
+    def write(self, handle, missing='NA'):
+        write = csv.writer(handle, delimiter="\t", lineterminator='\n')
+        col_list = self.get_cols()
+        
+        write.writerow([self.corner_name] + col_list)
+        for rowName in self.rowmap:
+            out = [rowName]
+            row = self.get_row(rowName)
+            for col in col_list:
+                val = row[self.colmap[col]]
+                if val is None or math.isnan(val):
+                    val = missing
+                else:
+                    val = "%.5f" % (val)
+                out.append(val)
+            write.writerow(out)        
+            
+    def toRmatrix(self, r):
+        out = r.matrix(self.data, ncol=self.ncols, dimnames=[ self.get_rows(), self.get_cols() ], byrow=True)
+        return out
+
+
+
+
+class StringMatrix(FloatMatrix):
+    def __init__(self):
+        self.corner_name = "sample"
+        self.data = None
+        self.nrows = None
+        self.ncols = None
+        self.rowmap = None
+        self.colmap = None
+
+    def init_blank(self, rows, cols):
+        self.data = []
+        self.rowmap = {}
+        for r in rows:
+            self.rowmap[r] = len(self.rowmap) 
+            if self.colmap is None:
+                self.colmap = {}
+                for c in cols:
+                    self.colmap[c] = len(self.colmap)
+                self.ncols = len(self.colmap)
+
+            for c in cols:
+                self.data.append(None)
+        self.nrows = len(self.rowmap)
+
+    def read(self, handle):
+        header = None
+        for line in handle:
+            row = line.rstrip("\r\n").split("\t")
+            if header is None:
+                header = row
+                self.data = []
+                self.colmap = {}
+                self.rowmap = {}
+                self.ncols = len(row) - 1
+                self.nrows = 0
+                for i, c in enumerate(row[1:]):
+                    self.colmap[c] = i
+            else:
+                if len(row) - 1 != self.ncols:
+                    raise DataException("Misformed matrix")
+                self.rowmap[row[0]] = len(self.rowmap)
+                a = []
+                for v in row[1:]:
+                    a.append(v)
+                self.data.extend(a)
+                self.nrows += 1
+
+    def write(self, handle, missing=''):
+        write = csv.writer(handle, delimiter="\t", lineterminator='\n')
+        col_list = self.get_cols()
+        
+        write.writerow([self.corner_name] + col_list)
+        for rowName in self.rowmap:
+            out = [rowName]
+            row = self.get_row(rowName)
+            for col in col_list:
+                val = row[self.colmap[col]]
+                if val is None:
+                    val = missing
+                else:
+                    val = "%s" % (val)
+                out.append(val)
+            write.writerow(out)        
+
+
+class SigGenes:
+    def __init__(self):
+        self.siggenes = importr("siggenes")
+    
+    def calc(self, method, genomicMatrix, phenotypeMatrix, phenotype, phenotypeEval):
+        r = robjects.r
+        
+        samples = []
+        for s_name in genomicMatrix.get_cols():
+            if phenotypeMatrix.has_row( s_name ):
+                samples.append(s_name)
+    
+        probes = genomicMatrix.get_rows()
+        f_sub = FloatMatrix()
+        f_sub.init_blank(rows=probes, cols=samples)
+        for row in probes:
+            for col in samples:
+                f_sub.set_value(row_name=row,col_name=col, value=genomicMatrix.get_value(row_name=row, col_name=col))
+        
+        mat = f_sub.toRmatrix(r)
+        
+        cls = FloatMatrix()
+        cls.init_blank(rows=samples, cols=[phenotype])
+        for row in samples:
+            val = phenotypeMatrix.get_value(row_name=row, col_name=phenotype)
+            try:
+                val = float(val)
+            except ValueError:
+                pass
+            if eval(phenotypeEval, {"__builtins__":None}, { 'float' : float, 'int' : int, 'value' : val } ):
+                cls.set_value(row_name=row, col_name=phenotype, value=1)
+            else:
+                cls.set_value(row_name=row, col_name=phenotype, value=0)
+    
+        cls_r = cls.toRmatrix(r)
+        if method == 'sam':
+            sam_out = self.siggenes.sam(mat, r.c(cls_r))
+            sam_att = r.cbind(
+                r.c(r.attributes(sam_out).rx2("d")),
+                r.c(r.attributes(sam_out).rx2("vec.false")),
+                r.c(r.attributes(sam_out).rx2("q.value")),
+                r.c(r.attributes(sam_out).rx2("p.value")),
+                r.c(r.attributes(sam_out).rx2("s"))
+            )
+            out = FloatMatrix()
+            ocols = [ "Score", "FalseCalls", "Q-value",
+                           "P-value", "StdDev"]
+            out.init_blank( cols=ocols,
+                           rows=probes)
+            
+            for i,n in enumerate(probes):
+                vals = list(sam_att.rx(i+1, True))
+                for j, col in enumerate(ocols):
+                     out.set_value( col_name=col, row_name=n, value=vals[j])
+            return out
+
+        if method == 'ebam':
+            e_a0 = r.get("find.a0")(mat, r.c(cls_r))
+            ebam_out = self.siggenes.ebam(e_a0)
+            ebam_att = r.cbind(
+                r.c(r.attributes(ebam_out).rx2('z')),
+                r.c(r.attributes(ebam_out).rx2('vec.pos')),
+                r.c(r.attributes(ebam_out).rx2('vec.neg'))
+                )
+
+            out = FloatMatrix()
+            ocols = [ "Score", "PosCalls", "NegCalls" ]
+            out.init_blank( cols=ocols,
+                           rows=probes)
+            
+            for i,n in enumerate(probes):
+                vals = list(ebam_att.rx(i+1, True))
+                for j, col in enumerate(ocols):
+                     out.set_value( col_name=col, row_name=n, value=vals[j])
+            return out
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-m', '--method', help="SigGenes method (sam/ebam)", default='sam')
+    parser.add_argument('-g', '--genomic', help='GenomicMatrix', default=None)
+    parser.add_argument('-p', '--pheno', help='Phenotype Matrix', default=None)
+    parser.add_argument('-l', '--list', help="Sample List File", default=None)
+    parser.add_argument('-e', '--eval', help='Class Evalutation (val=="+")', default=None)
+    parser.add_argument('-o', '--out', help="Output Path", default="None")
+    
+    args = parser.parse_args()
+    
+    if args.genomic is None:
+        die("Need to define genomic matrix")
+
+    handle = open(args.genomic)
+    genomicMatrix = FloatMatrix()
+    genomicMatrix.read(handle)
+    handle.close()
+    
+    eval_text = args.eval
+
+    phenotypeMatrix = None
+    if args.pheno is not None:
+        handle = open(args.pheno)
+        phenotypeMatrix = StringMatrix()
+        phenotypeMatrix.read(handle)
+        handle.close()
+    
+    if args.list is not None:
+        in_list = {}
+        handle = open(args.list)
+        for line in handle:
+            in_list[line.rstrip()] = True
+        handle.close()
+        phenotypeMatrix = StringMatrix()
+        phenotypeMatrix.init_blank( cols=[ 'member' ], rows=genomicMatrix.get_cols() )
+        for r in genomicMatrix.get_cols():
+            if r in in_list:
+                phenotypeMatrix.set_value(row_name=r, col_name='member', value='+')
+            else:
+                phenotypeMatrix.set_value(row_name=r, col_name='member', value='-')
+        eval_text = "value == '+'"
+
+    if phenotypeMatrix is None:
+        die("Need to define phenotypes")
+    if eval_text is None:
+        die("Need to define evaluation statement")
+
+
+    sig = SigGenes()
+    out = FloatMatrix()
+    out.init_blank(rows=genomicMatrix.get_rows(), cols=phenotypeMatrix.get_cols())
+    for phenotype in phenotypeMatrix.get_cols():
+        log("Calculating" + phenotype)
+        cur = sig.calc(args.method, genomicMatrix, phenotypeMatrix, phenotype, eval_text)
+        for row in cur.get_rows():
+            out.set_value( row_name=row, col_name=phenotype, value=cur.get_value(row_name=row, col_name="Score") )
+    if args.out is None:
+        out.write(sys.stdout)
+    else:
+        handle = open(args.out, "w")
+        out.write(handle)
+        handle.close()
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/siggenes/siggenes.xml	Fri Dec 21 16:41:30 2012 -0500
@@ -0,0 +1,68 @@
+<tool id="siggenes" name="Siggenes" version="1.0.0">
+	<description>Signficance test for genes in Microarray data</description>
+	<command interpreter="python">siggenes.py --genomic $genomic --out $out 
+#if $samples.mode == "matrix":
+-p ${samples.phenotype}
+-e "${samples.eval_text}"
+#end if
+#if $samples.mode == "list":
+-l ${samples.sample_list}
+#end if
+	</command>
+	<inputs>
+		<param name="genomic" type="data" label="Genomic Matrix" help="Matrix of Genomic Values"/>
+
+		<conditional name="samples">
+    		<param name="mode" type="select" label="Set Mode">
+				<option value="list">Sample List</option>
+				<option value="matrix">Phenotype Matrix</option>
+			</param>
+			<when value="list">
+				<param name="sample_list" type="data" label="Sample List" help="List of positive samples"/>
+			</when>
+			<when value="matrix">
+				<param name="phenotype" type="data" label="Phenotype Matrix" help="Matrix of Phenotypes"/>
+				<param name="eval_text" type="text" size="90" label="Evaluation Statement" help="evalation statement to select positive set" value="value=='+'">
+					<sanitizer>
+	        			<valid initial="string.printable">
+							<remove value="&quot;"/>
+						</valid>
+						<mapping initial="none">
+	         				<add source="&quot;" target="\&quot;"/>
+						</mapping>
+	      			</sanitizer>
+				</param>
+			</when>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data name="out" format="tabular" label="Significance Matrix" help="Matrix of gene significance"/>
+	</outputs>
+	<help>
+Wrapper for the siggenes R package.
+
+Genomic Matrix:
+    
+    Matrix of values from micro array, samples as columns, probe values as rows
+
+Phenotype Matrix:
+
+    Matrix of values describing the samples class membership. Classes as columns, samples as rows.
+    All values will be evaluated using the 'Evaluation Statement'.
+
+Sample List:
+    
+    A list of samples that belong to the positive set. An alternative input to providing a phenotype matrix and 
+    evaluation statement
+
+Evaluation Statement:
+    
+    A statement that will be evaluated against every cell in the phenotype matrix (it is ignored if a sample list is provided). A 'True' return value
+    means that the sample is part of the group. The value of the cell is 'value' in string format. Example statements::
+
+        value == '+'
+
+        float(value) > 0.0
+
+	</help>
+</tool>