annotate lefse.py @ 2:0efe0916b1b6 draft

Fix the kruskal wallis call error
author george-weingart
date Sat, 24 May 2014 13:27:35 -0400
parents 47ac77f2fe68
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
1 import os,sys,math,pickle
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
2 import random as lrand
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
3 import rpy2.robjects as robjects
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
4 import argparse
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
5 import numpy
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
6 #import svmutil
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
7
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
8 def init():
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
9 lrand.seed(1982)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
10 robjects.r('library(splines)')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
11 robjects.r('library(stats4)')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
12 robjects.r('library(survival)')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
13 robjects.r('library(mvtnorm)')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
14 robjects.r('library(modeltools)')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
15 robjects.r('library(coin)')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
16 robjects.r('library(MASS)')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
17
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
18 def get_class_means(class_sl,feats):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
19 means = {}
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
20 clk = class_sl.keys()
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
21 for fk,f in feats.items():
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
22 means[fk] = [numpy.mean((f[class_sl[k][0]:class_sl[k][1]])) for k in clk]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
23 return clk,means
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
24
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
25 def save_res(res,filename):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
26 with open(filename, 'w') as out:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
27 for k,v in res['cls_means'].items():
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
28 out.write(k+"\t"+str(math.log(max(max(v),1.0),10.0))+"\t")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
29 if k in res['lda_res_th']:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
30 for i,vv in enumerate(v):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
31 if vv == max(v):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
32 out.write(str(res['cls_means_kord'][i])+"\t")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
33 break
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
34 out.write(str(res['lda_res'][k]))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
35 else: out.write("\t")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
36 out.write( "\t" + res['wilcox_res'][k]+"\n")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
37
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
38 def load_data(input_file, nnorm = False):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
39 with open(input_file, 'rb') as inputf:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
40 inp = pickle.load(inputf)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
41 if nnorm: return inp['feats'],inp['cls'],inp['class_sl'],inp['subclass_sl'],inp['class_hierarchy'],inp['norm']
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
42 else: return inp['feats'],inp['cls'],inp['class_sl'],inp['subclass_sl'],inp['class_hierarchy']
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
43
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
44 def load_res(input_file):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
45 with open(input_file, 'rb') as inputf:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
46 inp = pickle.load(inputf)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
47 return inp['res'],inp['params'],inp['class_sl'],inp['subclass_sl']
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
48
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
49
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
50 def test_kw_r(cls,feats,p,factors):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
51 robjects.globalenv["y"] = robjects.FloatVector(feats)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
52 for i,f in enumerate(factors):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
53 robjects.globalenv['x'+str(i+1)] = robjects.FactorVector(robjects.StrVector(cls[f]))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
54 fo = "y~x1"
2
0efe0916b1b6 Fix the kruskal wallis call error
george-weingart
parents: 0
diff changeset
55 #for i,f in enumerate(factors[1:]):
0efe0916b1b6 Fix the kruskal wallis call error
george-weingart
parents: 0
diff changeset
56 # if f == "subclass" and len(set(cls[f])) <= len(set(cls["class"])): continue
0efe0916b1b6 Fix the kruskal wallis call error
george-weingart
parents: 0
diff changeset
57 # if len(set(cls[f])) == len(cls[f]): continue
0efe0916b1b6 Fix the kruskal wallis call error
george-weingart
parents: 0
diff changeset
58 # fo += "+x"+str(i+2)
0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
59 kw_res = robjects.r('kruskal.test('+fo+',)$p.value')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
60 return float(tuple(kw_res)[0]) < p, float(tuple(kw_res)[0])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
61
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
62 def test_rep_wilcoxon_r(sl,cl_hie,feats,th,multiclass_strat,mul_cor,fn,min_c,comp_only_same_subcl,curv=False):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
63 comp_all_sub = not comp_only_same_subcl
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
64 tot_ok = 0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
65 alpha_mtc = th
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
66 all_diff = []
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
67 for pair in [(x,y) for x in cl_hie.keys() for y in cl_hie.keys() if x < y]:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
68 dir_cmp = "not_set" #
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
69 l_subcl1, l_subcl2 = (len(cl_hie[pair[0]]), len(cl_hie[pair[1]]))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
70 if mul_cor != 0: alpha_mtc = th*l_subcl1*l_subcl2 if mul_cor == 2 else 1.0-math.pow(1.0-th,l_subcl1*l_subcl2)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
71 ok = 0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
72 curv_sign = 0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
73 first = True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
74 for i,k1 in enumerate(cl_hie[pair[0]]):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
75 br = False
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
76 for j,k2 in enumerate(cl_hie[pair[1]]):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
77 if not comp_all_sub and k1[len(pair[0]):] != k2[len(pair[1]):]:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
78 ok += 1
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
79 continue
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
80 cl1 = feats[sl[k1][0]:sl[k1][1]]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
81 cl2 = feats[sl[k2][0]:sl[k2][1]]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
82 med_comp = False
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
83 if len(cl1) < min_c or len(cl2) < min_c:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
84 med_comp = True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
85 sx,sy = numpy.median(cl1),numpy.median(cl2)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
86 if cl1[0] == cl2[0] and len(set(cl1)) == 1 and len(set(cl2)) == 1:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
87 tres, first = False, False
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
88 elif not med_comp:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
89 robjects.globalenv["x"] = robjects.FloatVector(cl1+cl2)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
90 robjects.globalenv["y"] = robjects.FactorVector(robjects.StrVector(["a" for a in cl1]+["b" for b in cl2]))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
91 pv = float(robjects.r('pvalue(wilcox_test(x~y,data=data.frame(x,y)))')[0])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
92 tres = pv < alpha_mtc*2.0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
93 if first:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
94 first = False
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
95 if not curv and ( med_comp or tres ):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
96 dir_cmp = sx < sy
2
0efe0916b1b6 Fix the kruskal wallis call error
george-weingart
parents: 0
diff changeset
97 #if sx == sy: br = True
0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
98 elif curv:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
99 dir_cmp = None
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
100 if med_comp or tres:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
101 curv_sign += 1
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
102 dir_cmp = sx < sy
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
103 else: br = True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
104 elif not curv and med_comp:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
105 if ((sx < sy) != dir_cmp or sx == sy): br = True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
106 elif curv:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
107 if tres and dir_cmp == None:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
108 curv_sign += 1
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
109 dir_cmp = sx < sy
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
110 if tres and dir_cmp != (sx < sy):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
111 br = True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
112 curv_sign = -1
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
113 elif not tres or (sx < sy) != dir_cmp or sx == sy: br = True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
114 if br: break
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
115 ok += 1
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
116 if br: break
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
117 if curv: diff = curv_sign > 0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
118 else: diff = (ok == len(cl_hie[pair[1]])*len(cl_hie[pair[0]])) # or (not comp_all_sub and dir_cmp != "not_set")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
119 if diff: tot_ok += 1
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
120 if not diff and multiclass_strat: return False
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
121 if diff and not multiclass_strat: all_diff.append(pair)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
122 if not multiclass_strat:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
123 tot_k = len(cl_hie.keys())
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
124 for k in cl_hie.keys():
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
125 nk = 0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
126 for a in all_diff:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
127 if k in a: nk += 1
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
128 if nk == tot_k-1: return True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
129 return False
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
130 return True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
131
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
132
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
133
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
134 def contast_within_classes_or_few_per_class(feats,inds,min_cl,ncl):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
135 ff = zip(*[v for n,v in feats.items() if n != 'class'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
136 cols = [ff[i] for i in inds]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
137 cls = [feats['class'][i] for i in inds]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
138 if len(set(cls)) < ncl:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
139 return True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
140 for c in set(cls):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
141 if cls.count(c) < min_cl:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
142 return True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
143 cols_cl = [x for i,x in enumerate(cols) if cls[i] == c]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
144 for i,col in enumerate(zip(*cols_cl)):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
145 if (len(set(col)) <= min_cl and min_cl > 1) or (min_cl == 1 and len(set(col)) <= 1):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
146 return True
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
147 return False
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
148
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
149 def test_lda_r(cls,feats,cl_sl,boots,fract_sample,lda_th,tol_min,nlogs):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
150 fk = feats.keys()
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
151 means = dict([(k,[]) for k in feats.keys()])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
152 feats['class'] = list(cls['class'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
153 clss = list(set(feats['class']))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
154 for uu,k in enumerate(fk):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
155 if k == 'class': continue
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
156 ff = [(feats['class'][i],v) for i,v in enumerate(feats[k])]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
157 for c in clss:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
158 if len(set([float(v[1]) for v in ff if v[0] == c])) > max(float(feats['class'].count(c))*0.5,4): continue
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
159 for i,v in enumerate(feats[k]):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
160 if feats['class'][i] == c:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
161 feats[k][i] = math.fabs(feats[k][i] + lrand.normalvariate(0.0,max(feats[k][i]*0.05,0.01)))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
162 rdict = {}
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
163 for a,b in feats.items():
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
164 if a == 'class' or a == 'subclass' or a == 'subject':
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
165 rdict[a] = robjects.StrVector(b)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
166 else: rdict[a] = robjects.FloatVector(b)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
167 robjects.globalenv["d"] = robjects.DataFrame(rdict)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
168 lfk = len(feats[fk[0]])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
169 rfk = int(float(len(feats[fk[0]]))*fract_sample)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
170 f = "class ~ "+fk[0]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
171 for k in fk[1:]: f += " + " + k.strip()
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
172 ncl = len(set(cls['class']))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
173 min_cl = int(float(min([cls['class'].count(c) for c in set(cls['class'])]))*fract_sample*fract_sample*0.5)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
174 min_cl = max(min_cl,1)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
175 pairs = [(a,b) for a in set(cls['class']) for b in set(cls['class']) if a > b]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
176
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
177 for k in fk:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
178 for i in range(boots):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
179 means[k].append([])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
180 for i in range(boots):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
181 for rtmp in range(1000):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
182 rand_s = [lrand.randint(0,lfk-1) for v in range(rfk)]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
183 if not contast_within_classes_or_few_per_class(feats,rand_s,min_cl,ncl): break
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
184 rand_s = [r+1 for r in rand_s]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
185 means[k][i] = []
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
186 for p in pairs:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
187 robjects.globalenv["rand_s"] = robjects.IntVector(rand_s)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
188 robjects.globalenv["sub_d"] = robjects.r('d[rand_s,]')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
189 z = robjects.r('z <- suppressWarnings(lda(as.formula('+f+'),data=sub_d,tol='+str(tol_min)+'))')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
190 robjects.r('w <- z$scaling[,1]')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
191 robjects.r('w.unit <- w/sqrt(sum(w^2))')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
192 robjects.r('ss <- sub_d[,-match("class",colnames(sub_d))]')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
193 if 'subclass' in feats:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
194 robjects.r('ss <- ss[,-match("subclass",colnames(ss))]')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
195 if 'subject' in feats:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
196 robjects.r('ss <- ss[,-match("subject",colnames(ss))]')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
197 robjects.r('xy.matrix <- as.matrix(ss)')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
198 robjects.r('LD <- xy.matrix%*%w.unit')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
199 robjects.r('effect.size <- abs(mean(LD[sub_d[,"class"]=="'+p[0]+'"]) - mean(LD[sub_d[,"class"]=="'+p[1]+'"]))')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
200 scal = robjects.r('wfinal <- w.unit * effect.size')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
201 rres = robjects.r('mm <- z$means')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
202 rowns = list(rres.rownames)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
203 lenc = len(list(rres.colnames))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
204 coeff = [abs(float(v)) if not math.isnan(float(v)) else 0.0 for v in scal]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
205 res = dict([(pp,[float(ff) for ff in rres.rx(pp,True)] if pp in rowns else [0.0]*lenc ) for pp in [p[0],p[1]]])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
206 for j,k in enumerate(fk):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
207 gm = abs(res[p[0]][j] - res[p[1]][j])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
208 means[k][i].append((gm+coeff[j])*0.5)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
209 res = {}
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
210 for k in fk:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
211 m = max([numpy.mean([means[k][kk][p] for kk in range(boots)]) for p in range(len(pairs))])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
212 res[k] = math.copysign(1.0,m)*math.log(1.0+math.fabs(m),10)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
213 return res,dict([(k,x) for k,x in res.items() if math.fabs(x) > lda_th])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
214
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
215
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
216 def test_svm(cls,feats,cl_sl,boots,fract_sample,lda_th,tol_min,nsvm):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
217 return NULL
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
218 """
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
219 fk = feats.keys()
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
220 clss = list(set(cls['class']))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
221 y = [clss.index(c)*2-1 for c in list(cls['class'])]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
222 xx = [feats[f] for f in fk]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
223 if nsvm:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
224 maxs = [max(v) for v in xx]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
225 mins = [min(v) for v in xx]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
226 x = [ dict([(i+1,(v-mins[i])/(maxs[i]-mins[i])) for i,v in enumerate(f)]) for f in zip(*xx)]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
227 else: x = [ dict([(i+1,v) for i,v in enumerate(f)]) for f in zip(*xx)]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
228
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
229 lfk = len(feats[fk[0]])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
230 rfk = int(float(len(feats[fk[0]]))*fract_sample)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
231 mm = []
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
232
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
233 best_c = svmutil.svm_ms(y, x, [pow(2.0,i) for i in range(-5,10)],'-t 0 -q')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
234 for i in range(boots):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
235 rand_s = [lrand.randint(0,lfk-1) for v in range(rfk)]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
236 r = svmutil.svm_w([y[yi] for yi in rand_s], [x[xi] for xi in rand_s], best_c,'-t 0 -q')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
237 mm.append(r[:len(fk)])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
238 m = [numpy.mean(v) for v in zip(*mm)]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
239 res = dict([(v,m[i]) for i,v in enumerate(fk)])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
240 return res,dict([(k,x) for k,x in res.items() if math.fabs(x) > lda_th])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
241 """