80
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys, string
|
|
4 #from rpy import *
|
|
5 import rpy2.robjects as robjects
|
|
6 import rpy2.rlike.container as rlc
|
|
7 from rpy2.robjects.packages import importr
|
|
8 r = robjects.r
|
|
9 grdevices = importr('grDevices')
|
|
10 import numpy
|
|
11
|
|
12
|
|
13 def stop_err(msg):
|
|
14 sys.stderr.write(msg)
|
|
15 sys.exit()
|
|
16
|
|
17 infile = sys.argv[1]
|
|
18 x_cols = sys.argv[2].split(',')
|
|
19 y_cols = sys.argv[3].split(',')
|
|
20
|
|
21 x_scale = x_center = "FALSE"
|
|
22 if sys.argv[4] == 'both':
|
|
23 x_scale = x_center = "TRUE"
|
|
24 elif sys.argv[4] == 'center':
|
|
25 x_center = "TRUE"
|
|
26 elif sys.argv[4] == 'scale':
|
|
27 x_scale = "TRUE"
|
|
28
|
|
29 y_scale = y_center = "FALSE"
|
|
30 if sys.argv[5] == 'both':
|
|
31 y_scale = y_center = "TRUE"
|
|
32 elif sys.argv[5] == 'center':
|
|
33 y_center = "TRUE"
|
|
34 elif sys.argv[5] == 'scale':
|
|
35 y_scale = "TRUE"
|
|
36
|
|
37 std_scores = "FALSE"
|
|
38 if sys.argv[6] == "yes":
|
|
39 std_scores = "TRUE"
|
|
40
|
|
41 outfile = sys.argv[7]
|
|
42 outfile2 = sys.argv[8]
|
|
43
|
|
44 fout = open(outfile,'w')
|
|
45 elems = []
|
|
46 for i, line in enumerate( file ( infile )):
|
|
47 line = line.rstrip('\r\n')
|
|
48 if len( line )>0 and not line.startswith( '#' ):
|
|
49 elems = line.split( '\t' )
|
|
50 break
|
|
51 if i == 30:
|
|
52 break # Hopefully we'll never get here...
|
|
53
|
|
54 if len( elems )<1:
|
|
55 stop_err( "The data in your input dataset is either missing or not formatted properly." )
|
|
56
|
|
57 x_vals = []
|
|
58
|
|
59 for k,col in enumerate(x_cols):
|
|
60 x_cols[k] = int(col)-1
|
|
61 #x_vals.append([])
|
|
62
|
|
63 y_vals = []
|
|
64
|
|
65 for k,col in enumerate(y_cols):
|
|
66 y_cols[k] = int(col)-1
|
|
67 #y_vals.append([])
|
|
68
|
|
69 skipped = 0
|
|
70 for ind,line in enumerate( file( infile )):
|
|
71 if line and not line.startswith( '#' ):
|
|
72 try:
|
|
73 fields = line.strip().split("\t")
|
|
74 valid_line = True
|
|
75 for col in x_cols+y_cols:
|
|
76 try:
|
|
77 assert float(fields[col])
|
|
78 except:
|
|
79 skipped += 1
|
|
80 valid_line = False
|
|
81 break
|
|
82 if valid_line:
|
|
83 for k,col in enumerate(x_cols):
|
|
84 try:
|
|
85 xval = float(fields[col])
|
|
86 except:
|
|
87 xval = NaN#
|
|
88 #x_vals[k].append(xval)
|
|
89 x_vals.append(xval)
|
|
90 for k,col in enumerate(y_cols):
|
|
91 try:
|
|
92 yval = float(fields[col])
|
|
93 except:
|
|
94 yval = NaN#
|
|
95 #y_vals[k].append(yval)
|
|
96 y_vals.append(yval)
|
|
97 except:
|
|
98 skipped += 1
|
|
99
|
|
100 #x_vals1 = numpy.asarray(x_vals).transpose()
|
|
101 #y_vals1 = numpy.asarray(y_vals).transpose()
|
|
102
|
|
103 #x_dat= r.list(array(x_vals1))
|
|
104 #y_dat= r.list(array(y_vals1))
|
|
105
|
|
106 x_dat = r['matrix'](robjects.FloatVector(x_vals),ncol=len(x_cols),byrow=True)
|
|
107 y_dat = r['matrix'](robjects.FloatVector(y_vals),ncol=len(y_cols),byrow=True)
|
|
108
|
|
109 try:
|
|
110 r.suppressWarnings(r.library("yacca"))
|
|
111 except:
|
|
112 stop_err("Missing R library yacca.")
|
|
113
|
|
114 #set_default_mode(NO_CONVERSION)
|
|
115 try:
|
|
116 xcolnames = ["c%d" %(el+1) for el in x_cols]
|
|
117 ycolnames = ["c%d" %(el+1) for el in y_cols]
|
|
118 #cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores))
|
|
119 cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), **{'standardize.scores':r(std_scores)})
|
|
120 #ftest = r.F_test_cca(cc)
|
|
121 ftest = r['F.test.cca'](cc)
|
|
122 except RException, rex:
|
|
123 stop_err("Encountered error while performing CCA on the input data: %s" %(rex))
|
|
124
|
|
125 #set_default_mode(BASIC_CONVERSION)
|
|
126 summary = r.summary(cc)
|
|
127
|
|
128 #ncomps = len(summary['corr'])
|
|
129 ncomps = len(summary.rx2('corr'))
|
|
130 #comps = summary['corr'].keys()
|
|
131 #comps = summary.rx2('corr').names
|
|
132 comps = (','.join(summary.rx2('corr').names)).split(',')
|
|
133 #corr = summary['corr'].values()
|
|
134 corr = summary.rx2('corr')
|
|
135 #xlab = summary['xlab']
|
|
136 xlab = summary.rx2('xlab')
|
|
137 #ylab = summary['ylab']
|
|
138 ylab = summary.rx2('ylab')
|
|
139
|
|
140 for i in range(ncomps):
|
|
141 corr[comps.index('CV %s' %(i+1))] = summary.rx2('corr')[i]
|
|
142 #corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i]
|
|
143
|
|
144 #ftest=ftest.as_py()
|
|
145 print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
146 print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr]))
|
|
147 #print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']]))
|
|
148 print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest.rx2('statistic')]))
|
|
149 #print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']]))
|
|
150 print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest.rx2('p.value')]))
|
|
151
|
|
152
|
|
153 print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
154 #for i,val in enumerate(summary['xcoef']):
|
|
155 # print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
|
|
156 vm = summary.rx2('xcoef')
|
|
157 for i in range(vm.nrow):
|
|
158 vals = []
|
|
159 for j in range(vm.ncol):
|
|
160 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
161 print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals))
|
|
162
|
|
163 print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
164 #for i,val in enumerate(summary['ycoef']):
|
|
165 # print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
|
|
166 vm = summary.rx2('ycoef')
|
|
167 for i in range(vm.nrow):
|
|
168 vals = []
|
|
169 for j in range(vm.ncol):
|
|
170 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
171 print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals))
|
|
172
|
|
173 print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
174 #for i,val in enumerate(summary['xstructcorr']):
|
|
175 # print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
|
|
176 vm = summary.rx2('xstructcorr')
|
|
177 for i in range(vm.nrow):
|
|
178 vals = []
|
|
179 for j in range(vm.ncol):
|
|
180 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
181 print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals))
|
|
182
|
|
183 print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
184 #for i,val in enumerate(summary['ystructcorr']):
|
|
185 # print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
|
|
186 vm = summary.rx2('ystructcorr')
|
|
187 for i in range(vm.nrow):
|
|
188 vals = []
|
|
189 for j in range(vm.ncol):
|
|
190 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
191 print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals))
|
|
192
|
|
193 print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
194 #for i,val in enumerate(summary['xcrosscorr']):
|
|
195 # print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
|
|
196 vm = summary.rx2('xcrosscorr')
|
|
197 for i in range(vm.nrow):
|
|
198 vals = []
|
|
199 for j in range(vm.ncol):
|
|
200 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
201 print >>fout, "%s\t%s" %(xlab[i][0], "\t".join(vals))
|
|
202
|
|
203 print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
204 #for i,val in enumerate(summary['ycrosscorr']):
|
|
205 # print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
|
|
206 vm = summary.rx2('ycrosscorr')
|
|
207 for i in range(vm.nrow):
|
|
208 vals = []
|
|
209 for j in range(vm.ncol):
|
|
210 vals.append("%.4g" % vm.rx2(i+1,j+1)[0])
|
|
211 print >>fout, "%s\t%s" %(ylab[i][0], "\t".join(vals))
|
|
212
|
|
213 r.pdf( outfile2, 8, 8 )
|
|
214 #r.plot(cc)
|
|
215 for i in range(ncomps):
|
|
216 r['helio.plot'](cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance")
|
|
217 #r.dev_off()
|
|
218 grdevices.dev_off()
|
|
219
|