annotate Intchecks/Script_intensity_check.R @ 0:51c39ea1fd54 draft

Uploaded
author melpetera
date Mon, 14 Jan 2019 08:37:27 -0500
parents
children d1133a7c26f9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
1 #########################################################################
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
2 # SCRIPT INTENSITY CHECK #
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
3 # #
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
4 # Input: Data Matrix, VariableMetadata, SampleMetadata #
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
5 # Output: VariableMetadata, Graphics (barplots and boxplots) #
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
6 # #
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
7 # Dependencies: RcheckLibrary.R #
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
8 # #
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
9 #########################################################################
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
10
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
11
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
12 # Parameters (for dev)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
13 if(FALSE){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
14
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
15 rm(list = ls())
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
16 setwd("Y:\\Developpement\\Intensity check\\Pour tests")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
17
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
18 DM.name <- "DM_NA.tabular"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
19 SM.name <- "SM_NA.tabular"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
20 VM.name <- "vM_NA.tabular"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
21 class.col <- "2"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
22 type <- "One_class"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
23 class1 <- "Blanks"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
24 fold.frac <- "Top"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
25 logarithm <- "log2"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
26 VM.output <- "new_VM.txt"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
27 graphs.output <- "Barplots_and_Boxplots.pdf"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
28 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
29
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
30
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
31
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
32
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
33 intens_check <- function(DM.name, SM.name, VM.name, class.col, type, class1, fold.frac, logarithm,
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
34 VM.output, graphs.output){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
35
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
36
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
37 # This function allows to check the intensities considering classes with a mean fold change calculation,
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
38 # the number and the proportion of missing values (NA) in dataMatrix
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
39 #
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
40 # Two options:
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
41 # - one class (selected by the user) against all the remaining samples ("One_class")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
42 # - tests on each class ("Each_class")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
43 #
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
44 # Parameters:
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
45 # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
46 # class.col: number of the sampleMetadata's column with classes
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
47 # type: "One_class" or "Each_class"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
48 # class1: name of the class, only if type="One_class"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
49 # fold.frac: if type="One class": class1/other ("Top") or other/class1 ("Bottom")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
50 # logarithm: "log2", "log10" or "none" for log mean fold change
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
51 # VM.output: output file's access (VM with new columns)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
52 # graphs.output: pdf file's access with barplots for the proportion of NA and boxplots with the folds values
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
53
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
54
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
55
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
56
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
57
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
58 # Input ---------------------------------------------------------------------------------------------------
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
59
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
60 DM <- read.table(DM.name, header=TRUE, sep="\t", check.names=FALSE)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
61 SM <- read.table(SM.name, header=TRUE, sep="\t", check.names=FALSE)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
62 VM <- read.table(VM.name, header=TRUE, sep="\t", check.names=FALSE)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
63
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
64
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
65
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
66 # Table match check with Rchecklibrary
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
67 table.check <- match3(DM, SM, VM)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
68 check.err(table.check)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
69
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
70
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
71
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
72 rownames(DM) <- DM[,1]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
73 var_names <- DM[,1]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
74 DM <- DM[,-1]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
75 DM <- data.frame(t(DM))
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
76
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
77 class.col <- colnames(SM)[as.numeric(class.col)]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
78
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
79
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
80 # check class.col, class1 and the number of classes ---------------------------------------------------------
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
81
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
82 if(!(class.col %in% colnames(SM))){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
83 stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
84 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
85
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
86 c_class <- SM[,class.col]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
87 c_class <- as.factor(c_class)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
88 nb_class <- nlevels(c_class)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
89 classnames <- levels(c_class)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
90
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
91 if(nb_class < 2){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
92 err.1class <- c("\n The column",class.col, "contains only one class, fold calculation could not be executed \n")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
93 cat(err.1class)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
94 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
95
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
96 if((nb_class > (nrow(SM))/3)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
97 class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
98 with few samples \n")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
99 cat(class.err)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
100 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
101
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
102
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
103 if(type == "One_class"){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
104 if(!(class1 %in% classnames)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
105 list.class1 <- c("\n Classes:",classnames,"\n")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
106 cat(list.class1)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
107 err.class1 <- c("The class ",class1, " does not appear in the column ", class.col)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
108 stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
109 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
110 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
111
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
112
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
113 #If type is "one_class", change others classes in "other"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
114 if(type == "One_class"){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
115 for(i in 1:length(c_class)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
116 if(c_class[i]!=class1){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
117 c_class <- as.character(c_class)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
118 c_class[i] <- "Other"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
119 c_class <- as.factor(c_class)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
120 nb_class <- nlevels(c_class)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
121 classnames <- c(class1,"Other")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
122
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
123 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
124 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
125 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
126
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
127 DM <- cbind(DM,c_class)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
128
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
129
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
130
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
131 # fold calculation -------------------------------------------------------------------------------------------
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
132
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
133 if(nb_class >= 2){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
134
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
135
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
136 fold <- data.frame()
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
137 n <- 1
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
138 ratio1 <- NULL
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
139 ratio2 <- NULL
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
140
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
141 if(type=="Each_class"){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
142 fold.frac <- "Top"
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
143 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
144
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
145 for(j in 1:(nb_class-1)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
146 for(k in (j+1):nb_class) {
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
147
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
148 if(fold.frac=="Bottom"){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
149 ratio1 <- classnames[k]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
150 ratio2 <- classnames[j]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
151 }else{
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
152 ratio1 <- classnames[j]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
153 ratio2 <- classnames[k]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
154 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
155
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
156 for (i in 1:(length(DM)-1)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
157 fold[i,n] <- mean(DM[which(DM$c_class==ratio1),i], na.rm=TRUE)/
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
158 mean(DM[which(DM$c_class==ratio2),i], na.rm=TRUE)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
159 if(logarithm=="log2"){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
160 fold[i,n] <- log2(fold[i,n])
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
161 }else if(logarithm=="log10"){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
162 fold[i,n] <- log10(fold[i,n])
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
163 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
164 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
165 names(fold)[n] <- paste("fold",ratio1,"VS", ratio2, sep="_")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
166 if(logarithm != "none"){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
167 names(fold)[n] <- paste(logarithm,names(fold)[n], sep="_")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
168 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
169 n <- n + 1}
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
170 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
171
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
172 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
173
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
174 # number and proportion of NA ---------------------------------------------------------------------------------
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
175
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
176 calcul_NA <- data.frame()
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
177 pct_NA <- data.frame()
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
178 for (i in 1:(length(DM)-1)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
179 for (j in 1:nb_class){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
180 n <- 0
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
181 new_DM <- DM[which(DM$c_class==classnames[j]),i]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
182 for(k in 1:length(new_DM)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
183 if (is.na(new_DM[k])){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
184 n <- n + 1}
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
185 calcul_NA[i,j] <- n
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
186 pct_NA[i,j] <- (calcul_NA[i,j]/length(new_DM))*100}
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
187 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
188 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
189 names(calcul_NA) <- paste("NA",classnames, sep="_")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
190 names(pct_NA) <- paste("Pct_NA", classnames, sep="_")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
191
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
192 # Alert message if there is no NA in data matrix
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
193
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
194 sumNA <- colSums(calcul_NA)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
195 sum_total <- sum(sumNA)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
196 alerte <- NULL
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
197 if(sum_total==0){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
198 alerte <- c(alerte, "Data Matrix contains no NA.\n")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
199 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
200
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
201 if(length(alerte) != 0){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
202 cat(alerte,"\n")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
203 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
204 table_NA <- cbind(calcul_NA, pct_NA)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
205
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
206
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
207
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
208 # check columns names ---------------------------------------------------------------------------------------
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
209
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
210
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
211 VM.names <- colnames(VM)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
212
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
213 # Fold
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
214
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
215 if(nb_class >=2){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
216 fold.names <- colnames(fold)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
217
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
218 for (i in 1:length(VM.names)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
219 for (j in 1:length(fold.names)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
220 if (VM.names[i]==fold.names[j]){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
221 fold.names[j] <- paste(fold.names[j],"2", sep="_")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
222 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
223 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
224 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
225 colnames(fold) <- fold.names
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
226
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
227 VM <- cbind(VM,fold)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
228 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
229
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
230 # NA
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
231 NA.names <- colnames(table_NA)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
232
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
233 for (i in 1:length(VM.names)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
234 for (j in 1:length(NA.names)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
235 if (VM.names[i]==NA.names[j]){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
236 NA.names[j] <- paste(NA.names[j],"2", sep="_")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
237 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
238 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
239 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
240 colnames(table_NA) <- NA.names
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
241 VM <- cbind(VM,table_NA)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
242
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
243
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
244 #for NA barplots -------------------------------------------------------------------------------------------
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
245
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
246 data_bp <- data.frame()
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
247
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
248 for (j in 1:ncol(pct_NA)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
249 Nb_NA_0_20 <- 0
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
250 Nb_NA_20_40 <- 0
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
251 Nb_NA_40_60 <- 0
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
252 Nb_NA_60_80 <- 0
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
253 Nb_NA_80_100 <- 0
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
254 for (i in 1:nrow(pct_NA)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
255
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
256 if ((0<=pct_NA[i,j])&(pct_NA[i,j]<20)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
257 Nb_NA_0_20=Nb_NA_0_20+1
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
258 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
259
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
260 if ((20<=pct_NA[i,j])&(pct_NA[i,j]<40)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
261 Nb_NA_20_40=Nb_NA_20_40+1}
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
262
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
263 if ((40<=pct_NA[i,j])&(pct_NA[i,j]<60)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
264 Nb_NA_40_60=Nb_NA_40_60+1}
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
265
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
266 if ((60<=pct_NA[i,j])&(pct_NA[i,j]<80)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
267 Nb_NA_60_80=Nb_NA_60_80+1}
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
268
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
269 if ((80<=pct_NA[i,j])&(pct_NA[i,j]<=100)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
270 Nb_NA_80_100=Nb_NA_80_100+1}
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
271 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
272 data_bp[1,j] <- Nb_NA_0_20
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
273 data_bp[2,j] <- Nb_NA_20_40
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
274 data_bp[3,j] <- Nb_NA_40_60
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
275 data_bp[4,j] <- Nb_NA_60_80
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
276 data_bp[5,j] <- Nb_NA_80_100
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
277 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
278 rownames(data_bp) <- c("0%-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
279 colnames(data_bp) <- classnames
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
280 data_bp <- as.matrix(data_bp)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
281
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
282
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
283 # Output ---------------------------------------------------------------------------------------------------
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
284
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
285
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
286 write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
287
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
288 #graphics pdf
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
289
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
290 pdf(graphs.output)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
291
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
292 #Barplots for NA
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
293 par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
294
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
295 bp=barplot(data_bp, col=rainbow(nrow(data_bp)), main="Proportion of NA", xlab="Classes", ylab="Variables")
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
296 legend("topright", fill=rainbow(nrow(data_bp)),rownames(data_bp), inset=c(-0.3,0))
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
297
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
298 stock=0
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
299 for (i in 1:nrow(data_bp)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
300 text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white", cex=0.7)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
301 stock <- stock+data_bp[i,]
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
302 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
303
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
304
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
305 #Boxplots for fold test
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
306
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
307 if(nb_class >= 2){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
308
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
309 clean_fold <- fold
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
310 for(i in 1:nrow(clean_fold)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
311 for(j in 1:ncol(clean_fold)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
312 if(is.infinite(clean_fold[i,j])){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
313 clean_fold[i,j] <- NA
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
314 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
315 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
316 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
317 for (j in 1:ncol(clean_fold)){
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
318 title <- paste(fold.names[j])
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
319 boxplot(clean_fold[j], main=title)
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
320 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
321 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
322
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
323 dev.off()
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
324
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
325 }
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
326
51c39ea1fd54 Uploaded
melpetera
parents:
diff changeset
327