Mercurial > repos > melpetera > intensity_checks
comparison Intchecks/Script_intensity_check.R @ 0:51c39ea1fd54 draft
Uploaded
| author | melpetera |
|---|---|
| date | Mon, 14 Jan 2019 08:37:27 -0500 |
| parents | |
| children | d1133a7c26f9 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:51c39ea1fd54 |
|---|---|
| 1 ######################################################################### | |
| 2 # SCRIPT INTENSITY CHECK # | |
| 3 # # | |
| 4 # Input: Data Matrix, VariableMetadata, SampleMetadata # | |
| 5 # Output: VariableMetadata, Graphics (barplots and boxplots) # | |
| 6 # # | |
| 7 # Dependencies: RcheckLibrary.R # | |
| 8 # # | |
| 9 ######################################################################### | |
| 10 | |
| 11 | |
| 12 # Parameters (for dev) | |
| 13 if(FALSE){ | |
| 14 | |
| 15 rm(list = ls()) | |
| 16 setwd("Y:\\Developpement\\Intensity check\\Pour tests") | |
| 17 | |
| 18 DM.name <- "DM_NA.tabular" | |
| 19 SM.name <- "SM_NA.tabular" | |
| 20 VM.name <- "vM_NA.tabular" | |
| 21 class.col <- "2" | |
| 22 type <- "One_class" | |
| 23 class1 <- "Blanks" | |
| 24 fold.frac <- "Top" | |
| 25 logarithm <- "log2" | |
| 26 VM.output <- "new_VM.txt" | |
| 27 graphs.output <- "Barplots_and_Boxplots.pdf" | |
| 28 } | |
| 29 | |
| 30 | |
| 31 | |
| 32 | |
| 33 intens_check <- function(DM.name, SM.name, VM.name, class.col, type, class1, fold.frac, logarithm, | |
| 34 VM.output, graphs.output){ | |
| 35 | |
| 36 | |
| 37 # This function allows to check the intensities considering classes with a mean fold change calculation, | |
| 38 # the number and the proportion of missing values (NA) in dataMatrix | |
| 39 # | |
| 40 # Two options: | |
| 41 # - one class (selected by the user) against all the remaining samples ("One_class") | |
| 42 # - tests on each class ("Each_class") | |
| 43 # | |
| 44 # Parameters: | |
| 45 # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access | |
| 46 # class.col: number of the sampleMetadata's column with classes | |
| 47 # type: "One_class" or "Each_class" | |
| 48 # class1: name of the class, only if type="One_class" | |
| 49 # fold.frac: if type="One class": class1/other ("Top") or other/class1 ("Bottom") | |
| 50 # logarithm: "log2", "log10" or "none" for log mean fold change | |
| 51 # VM.output: output file's access (VM with new columns) | |
| 52 # graphs.output: pdf file's access with barplots for the proportion of NA and boxplots with the folds values | |
| 53 | |
| 54 | |
| 55 | |
| 56 | |
| 57 | |
| 58 # Input --------------------------------------------------------------------------------------------------- | |
| 59 | |
| 60 DM <- read.table(DM.name, header=TRUE, sep="\t", check.names=FALSE) | |
| 61 SM <- read.table(SM.name, header=TRUE, sep="\t", check.names=FALSE) | |
| 62 VM <- read.table(VM.name, header=TRUE, sep="\t", check.names=FALSE) | |
| 63 | |
| 64 | |
| 65 | |
| 66 # Table match check with Rchecklibrary | |
| 67 table.check <- match3(DM, SM, VM) | |
| 68 check.err(table.check) | |
| 69 | |
| 70 | |
| 71 | |
| 72 rownames(DM) <- DM[,1] | |
| 73 var_names <- DM[,1] | |
| 74 DM <- DM[,-1] | |
| 75 DM <- data.frame(t(DM)) | |
| 76 | |
| 77 class.col <- colnames(SM)[as.numeric(class.col)] | |
| 78 | |
| 79 | |
| 80 # check class.col, class1 and the number of classes --------------------------------------------------------- | |
| 81 | |
| 82 if(!(class.col %in% colnames(SM))){ | |
| 83 stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n") | |
| 84 } | |
| 85 | |
| 86 c_class <- SM[,class.col] | |
| 87 c_class <- as.factor(c_class) | |
| 88 nb_class <- nlevels(c_class) | |
| 89 classnames <- levels(c_class) | |
| 90 | |
| 91 if(nb_class < 2){ | |
| 92 err.1class <- c("\n The column",class.col, "contains only one class, fold calculation could not be executed \n") | |
| 93 cat(err.1class) | |
| 94 } | |
| 95 | |
| 96 if((nb_class > (nrow(SM))/3)){ | |
| 97 class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those | |
| 98 with few samples \n") | |
| 99 cat(class.err) | |
| 100 } | |
| 101 | |
| 102 | |
| 103 if(type == "One_class"){ | |
| 104 if(!(class1 %in% classnames)){ | |
| 105 list.class1 <- c("\n Classes:",classnames,"\n") | |
| 106 cat(list.class1) | |
| 107 err.class1 <- c("The class ",class1, " does not appear in the column ", class.col) | |
| 108 stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n") | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 | |
| 113 #If type is "one_class", change others classes in "other" | |
| 114 if(type == "One_class"){ | |
| 115 for(i in 1:length(c_class)){ | |
| 116 if(c_class[i]!=class1){ | |
| 117 c_class <- as.character(c_class) | |
| 118 c_class[i] <- "Other" | |
| 119 c_class <- as.factor(c_class) | |
| 120 nb_class <- nlevels(c_class) | |
| 121 classnames <- c(class1,"Other") | |
| 122 | |
| 123 } | |
| 124 } | |
| 125 } | |
| 126 | |
| 127 DM <- cbind(DM,c_class) | |
| 128 | |
| 129 | |
| 130 | |
| 131 # fold calculation ------------------------------------------------------------------------------------------- | |
| 132 | |
| 133 if(nb_class >= 2){ | |
| 134 | |
| 135 | |
| 136 fold <- data.frame() | |
| 137 n <- 1 | |
| 138 ratio1 <- NULL | |
| 139 ratio2 <- NULL | |
| 140 | |
| 141 if(type=="Each_class"){ | |
| 142 fold.frac <- "Top" | |
| 143 } | |
| 144 | |
| 145 for(j in 1:(nb_class-1)){ | |
| 146 for(k in (j+1):nb_class) { | |
| 147 | |
| 148 if(fold.frac=="Bottom"){ | |
| 149 ratio1 <- classnames[k] | |
| 150 ratio2 <- classnames[j] | |
| 151 }else{ | |
| 152 ratio1 <- classnames[j] | |
| 153 ratio2 <- classnames[k] | |
| 154 } | |
| 155 | |
| 156 for (i in 1:(length(DM)-1)){ | |
| 157 fold[i,n] <- mean(DM[which(DM$c_class==ratio1),i], na.rm=TRUE)/ | |
| 158 mean(DM[which(DM$c_class==ratio2),i], na.rm=TRUE) | |
| 159 if(logarithm=="log2"){ | |
| 160 fold[i,n] <- log2(fold[i,n]) | |
| 161 }else if(logarithm=="log10"){ | |
| 162 fold[i,n] <- log10(fold[i,n]) | |
| 163 } | |
| 164 } | |
| 165 names(fold)[n] <- paste("fold",ratio1,"VS", ratio2, sep="_") | |
| 166 if(logarithm != "none"){ | |
| 167 names(fold)[n] <- paste(logarithm,names(fold)[n], sep="_") | |
| 168 } | |
| 169 n <- n + 1} | |
| 170 } | |
| 171 | |
| 172 } | |
| 173 | |
| 174 # number and proportion of NA --------------------------------------------------------------------------------- | |
| 175 | |
| 176 calcul_NA <- data.frame() | |
| 177 pct_NA <- data.frame() | |
| 178 for (i in 1:(length(DM)-1)){ | |
| 179 for (j in 1:nb_class){ | |
| 180 n <- 0 | |
| 181 new_DM <- DM[which(DM$c_class==classnames[j]),i] | |
| 182 for(k in 1:length(new_DM)){ | |
| 183 if (is.na(new_DM[k])){ | |
| 184 n <- n + 1} | |
| 185 calcul_NA[i,j] <- n | |
| 186 pct_NA[i,j] <- (calcul_NA[i,j]/length(new_DM))*100} | |
| 187 } | |
| 188 } | |
| 189 names(calcul_NA) <- paste("NA",classnames, sep="_") | |
| 190 names(pct_NA) <- paste("Pct_NA", classnames, sep="_") | |
| 191 | |
| 192 # Alert message if there is no NA in data matrix | |
| 193 | |
| 194 sumNA <- colSums(calcul_NA) | |
| 195 sum_total <- sum(sumNA) | |
| 196 alerte <- NULL | |
| 197 if(sum_total==0){ | |
| 198 alerte <- c(alerte, "Data Matrix contains no NA.\n") | |
| 199 } | |
| 200 | |
| 201 if(length(alerte) != 0){ | |
| 202 cat(alerte,"\n") | |
| 203 } | |
| 204 table_NA <- cbind(calcul_NA, pct_NA) | |
| 205 | |
| 206 | |
| 207 | |
| 208 # check columns names --------------------------------------------------------------------------------------- | |
| 209 | |
| 210 | |
| 211 VM.names <- colnames(VM) | |
| 212 | |
| 213 # Fold | |
| 214 | |
| 215 if(nb_class >=2){ | |
| 216 fold.names <- colnames(fold) | |
| 217 | |
| 218 for (i in 1:length(VM.names)){ | |
| 219 for (j in 1:length(fold.names)){ | |
| 220 if (VM.names[i]==fold.names[j]){ | |
| 221 fold.names[j] <- paste(fold.names[j],"2", sep="_") | |
| 222 } | |
| 223 } | |
| 224 } | |
| 225 colnames(fold) <- fold.names | |
| 226 | |
| 227 VM <- cbind(VM,fold) | |
| 228 } | |
| 229 | |
| 230 # NA | |
| 231 NA.names <- colnames(table_NA) | |
| 232 | |
| 233 for (i in 1:length(VM.names)){ | |
| 234 for (j in 1:length(NA.names)){ | |
| 235 if (VM.names[i]==NA.names[j]){ | |
| 236 NA.names[j] <- paste(NA.names[j],"2", sep="_") | |
| 237 } | |
| 238 } | |
| 239 } | |
| 240 colnames(table_NA) <- NA.names | |
| 241 VM <- cbind(VM,table_NA) | |
| 242 | |
| 243 | |
| 244 #for NA barplots ------------------------------------------------------------------------------------------- | |
| 245 | |
| 246 data_bp <- data.frame() | |
| 247 | |
| 248 for (j in 1:ncol(pct_NA)){ | |
| 249 Nb_NA_0_20 <- 0 | |
| 250 Nb_NA_20_40 <- 0 | |
| 251 Nb_NA_40_60 <- 0 | |
| 252 Nb_NA_60_80 <- 0 | |
| 253 Nb_NA_80_100 <- 0 | |
| 254 for (i in 1:nrow(pct_NA)){ | |
| 255 | |
| 256 if ((0<=pct_NA[i,j])&(pct_NA[i,j]<20)){ | |
| 257 Nb_NA_0_20=Nb_NA_0_20+1 | |
| 258 } | |
| 259 | |
| 260 if ((20<=pct_NA[i,j])&(pct_NA[i,j]<40)){ | |
| 261 Nb_NA_20_40=Nb_NA_20_40+1} | |
| 262 | |
| 263 if ((40<=pct_NA[i,j])&(pct_NA[i,j]<60)){ | |
| 264 Nb_NA_40_60=Nb_NA_40_60+1} | |
| 265 | |
| 266 if ((60<=pct_NA[i,j])&(pct_NA[i,j]<80)){ | |
| 267 Nb_NA_60_80=Nb_NA_60_80+1} | |
| 268 | |
| 269 if ((80<=pct_NA[i,j])&(pct_NA[i,j]<=100)){ | |
| 270 Nb_NA_80_100=Nb_NA_80_100+1} | |
| 271 } | |
| 272 data_bp[1,j] <- Nb_NA_0_20 | |
| 273 data_bp[2,j] <- Nb_NA_20_40 | |
| 274 data_bp[3,j] <- Nb_NA_40_60 | |
| 275 data_bp[4,j] <- Nb_NA_60_80 | |
| 276 data_bp[5,j] <- Nb_NA_80_100 | |
| 277 } | |
| 278 rownames(data_bp) <- c("0%-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%") | |
| 279 colnames(data_bp) <- classnames | |
| 280 data_bp <- as.matrix(data_bp) | |
| 281 | |
| 282 | |
| 283 # Output --------------------------------------------------------------------------------------------------- | |
| 284 | |
| 285 | |
| 286 write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE) | |
| 287 | |
| 288 #graphics pdf | |
| 289 | |
| 290 pdf(graphs.output) | |
| 291 | |
| 292 #Barplots for NA | |
| 293 par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) | |
| 294 | |
| 295 bp=barplot(data_bp, col=rainbow(nrow(data_bp)), main="Proportion of NA", xlab="Classes", ylab="Variables") | |
| 296 legend("topright", fill=rainbow(nrow(data_bp)),rownames(data_bp), inset=c(-0.3,0)) | |
| 297 | |
| 298 stock=0 | |
| 299 for (i in 1:nrow(data_bp)){ | |
| 300 text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white", cex=0.7) | |
| 301 stock <- stock+data_bp[i,] | |
| 302 } | |
| 303 | |
| 304 | |
| 305 #Boxplots for fold test | |
| 306 | |
| 307 if(nb_class >= 2){ | |
| 308 | |
| 309 clean_fold <- fold | |
| 310 for(i in 1:nrow(clean_fold)){ | |
| 311 for(j in 1:ncol(clean_fold)){ | |
| 312 if(is.infinite(clean_fold[i,j])){ | |
| 313 clean_fold[i,j] <- NA | |
| 314 } | |
| 315 } | |
| 316 } | |
| 317 for (j in 1:ncol(clean_fold)){ | |
| 318 title <- paste(fold.names[j]) | |
| 319 boxplot(clean_fold[j], main=title) | |
| 320 } | |
| 321 } | |
| 322 | |
| 323 dev.off() | |
| 324 | |
| 325 } | |
| 326 | |
| 327 |
