80
|
1 #!/usr/bin/env python
|
|
2
|
|
3 from galaxy import eggs
|
|
4 import sys, string
|
|
5 #from rpy import *
|
|
6 import rpy2.robjects as robjects
|
|
7 import rpy2.rlike.container as rlc
|
|
8 from rpy2.robjects.packages import importr
|
|
9 r = robjects.r
|
|
10 grdevices = importr('grDevices')
|
|
11 import numpy
|
|
12
|
|
13
|
|
14 def stop_err(msg):
|
|
15 sys.stderr.write(msg)
|
|
16 sys.exit()
|
|
17
|
|
18 infile = sys.argv[1]
|
|
19 x_cols = sys.argv[2].split(',')
|
|
20 y_cols = sys.argv[3].split(',')
|
|
21
|
|
22 x_scale = x_center = "FALSE"
|
|
23 if sys.argv[4] == 'both':
|
|
24 x_scale = x_center = "TRUE"
|
|
25 elif sys.argv[4] == 'center':
|
|
26 x_center = "TRUE"
|
|
27 elif sys.argv[4] == 'scale':
|
|
28 x_scale = "TRUE"
|
|
29
|
|
30 y_scale = y_center = "FALSE"
|
|
31 if sys.argv[5] == 'both':
|
|
32 y_scale = y_center = "TRUE"
|
|
33 elif sys.argv[5] == 'center':
|
|
34 y_center = "TRUE"
|
|
35 elif sys.argv[5] == 'scale':
|
|
36 y_scale = "TRUE"
|
|
37
|
|
38 std_scores = "FALSE"
|
|
39 if sys.argv[6] == "yes":
|
|
40 std_scores = "TRUE"
|
|
41
|
|
42 outfile = sys.argv[7]
|
|
43 outfile2 = sys.argv[8]
|
|
44
|
|
45 fout = open(outfile,'w')
|
|
46 elems = []
|
|
47 for i, line in enumerate( file ( infile )):
|
|
48 line = line.rstrip('\r\n')
|
|
49 if len( line )>0 and not line.startswith( '#' ):
|
|
50 elems = line.split( '\t' )
|
|
51 break
|
|
52 if i == 30:
|
|
53 break # Hopefully we'll never get here...
|
|
54
|
|
55 if len( elems )<1:
|
|
56 stop_err( "The data in your input dataset is either missing or not formatted properly." )
|
|
57
|
|
58 x_vals = []
|
|
59
|
|
60 for k,col in enumerate(x_cols):
|
|
61 x_cols[k] = int(col)-1
|
|
62 #x_vals.append([])
|
|
63
|
|
64 y_vals = []
|
|
65
|
|
66 for k,col in enumerate(y_cols):
|
|
67 y_cols[k] = int(col)-1
|
|
68 #y_vals.append([])
|
|
69
|
|
70 skipped = 0
|
|
71 for ind,line in enumerate( file( infile )):
|
|
72 if line and not line.startswith( '#' ):
|
|
73 try:
|
|
74 fields = line.strip().split("\t")
|
|
75 valid_line = True
|
|
76 for col in x_cols+y_cols:
|
|
77 try:
|
|
78 assert float(fields[col])
|
|
79 except:
|
|
80 skipped += 1
|
|
81 valid_line = False
|
|
82 break
|
|
83 if valid_line:
|
|
84 for k,col in enumerate(x_cols):
|
|
85 try:
|
|
86 xval = float(fields[col])
|
|
87 except:
|
|
88 xval = NaN#
|
|
89 #x_vals[k].append(xval)
|
|
90 x_vals.append(xval)
|
|
91 for k,col in enumerate(y_cols):
|
|
92 try:
|
|
93 yval = float(fields[col])
|
|
94 except:
|
|
95 yval = NaN#
|
|
96 #y_vals[k].append(yval)
|
|
97 y_vals.append(yval)
|
|
98 except:
|
|
99 skipped += 1
|
|
100
|
|
101 #x_vals1 = numpy.asarray(x_vals).transpose()
|
|
102 #y_vals1 = numpy.asarray(y_vals).transpose()
|
|
103
|
|
104 #x_dat= r.list(array(x_vals1))
|
|
105 #y_dat= r.list(array(y_vals1))
|
|
106
|
|
107 x_dat = r['matrix'](robjects.FloatVector(x_vals),ncol=len(x_cols),byrow=True)
|
|
108 y_dat = r['matrix'](robjects.FloatVector(y_vals),ncol=len(y_cols),byrow=True)
|
|
109
|
|
110 try:
|
|
111 r.suppressWarnings(r.library("yacca"))
|
|
112 except:
|
|
113 stop_err("Missing R library yacca.")
|
|
114
|
|
115 #set_default_mode(NO_CONVERSION)
|
|
116 try:
|
|
117 xcolnames = ["c%d" %(el+1) for el in x_cols]
|
|
118 ycolnames = ["c%d" %(el+1) for el in y_cols]
|
|
119 #cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores))
|
|
120 cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), **{'standardize.scores':r(std_scores)})
|
|
121 #ftest = r.F_test_cca(cc)
|
|
122 ftest = r['F.test.cca'](cc)
|
|
123 except RException, rex:
|
|
124 stop_err("Encountered error while performing CCA on the input data: %s" %(rex))
|
|
125
|
|
126 #set_default_mode(BASIC_CONVERSION)
|
|
127 summary = r.summary(cc)
|
|
128
|
|
129 #ncomps = len(summary['corr'])
|
|
130 ncomps = len(summary.rx2('corr'))
|
|
131 #comps = summary['corr'].keys()
|
|
132 #comps = summary.rx2('corr').names
|
|
133 comps = (','.join(summary.rx2('corr').names)).split(',')
|
|
134 #corr = summary['corr'].values()
|
|
135 corr = summary.rx2('corr')
|
|
136 #xlab = summary['xlab']
|
|
137 xlab = summary.rx2('xlab')
|
|
138 #ylab = summary['ylab']
|
|
139 ylab = summary.rx2('ylab')
|
|
140
|
|
141 for i in range(ncomps):
|
|
142 corr[comps.index('CV %s' %(i+1))] = summary.rx2('corr')[i]
|
|
143 #corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i]
|
|
144
|
|
145 #ftest=ftest.as_py()
|
|
146 print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
147 print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr]))
|
|
148 #print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']]))
|
|
149 print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest.rx2('statistic')]))
|
|
150 #print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']]))
|
|
151 print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest.rx2('p.value')]))
|
|
152
|
|
153
|
|
154 print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
155 #for i,val in enumerate(summary['xcoef']):
|
|
156 # print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
|
|
157 vm = summary.rx2('xcoef')
|
|
158 for i in range(vm.nrow):
|
|
159 vals = []
|
|
160 for j in range(vm.ncol):
|
|
161 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
162 print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals))
|
|
163
|
|
164 print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
165 #for i,val in enumerate(summary['ycoef']):
|
|
166 # print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
|
|
167 vm = summary.rx2('ycoef')
|
|
168 for i in range(vm.nrow):
|
|
169 vals = []
|
|
170 for j in range(vm.ncol):
|
|
171 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
172 print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals))
|
|
173
|
|
174 print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
175 #for i,val in enumerate(summary['xstructcorr']):
|
|
176 # print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
|
|
177 vm = summary.rx2('xstructcorr')
|
|
178 for i in range(vm.nrow):
|
|
179 vals = []
|
|
180 for j in range(vm.ncol):
|
|
181 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
182 print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals))
|
|
183
|
|
184 print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
185 #for i,val in enumerate(summary['ystructcorr']):
|
|
186 # print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
|
|
187 vm = summary.rx2('ystructcorr')
|
|
188 for i in range(vm.nrow):
|
|
189 vals = []
|
|
190 for j in range(vm.ncol):
|
|
191 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
192 print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals))
|
|
193
|
|
194 print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
195 #for i,val in enumerate(summary['xcrosscorr']):
|
|
196 # print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
|
|
197 vm = summary.rx2('xcrosscorr')
|
|
198 for i in range(vm.nrow):
|
|
199 vals = []
|
|
200 for j in range(vm.ncol):
|
|
201 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
202 print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals))
|
|
203
|
|
204 print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
205 #for i,val in enumerate(summary['ycrosscorr']):
|
|
206 # print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
|
|
207 vm = summary.rx2('ycrosscorr')
|
|
208 for i in range(vm.nrow):
|
|
209 vals = []
|
|
210 for j in range(vm.ncol):
|
|
211 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
212 print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals))
|
|
213
|
|
214 r.pdf( outfile2, 8, 8 )
|
|
215 #r.plot(cc)
|
|
216 for i in range(ncomps):
|
|
217 r['helio.plot'](cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance")
|
|
218 #r.dev_off()
|
|
219 grdevices.dev_off()
|
|
220
|