# HG changeset patch # User dereeper # Date 1541382530 18000 # Node ID 0658dd612eed9c636c733f3d972e3bca661b0fd5 Uploaded diff -r 000000000000 -r 0658dd612eed blupcal.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blupcal.R Sun Nov 04 20:48:50 2018 -0500 @@ -0,0 +1,326 @@ +# Calculation of BLUE/BLUP +# Umesh Rosyara, CIMMYT +blupcal<- function(data, + Replication = "Rep", + Genotype = "Entry", + y = "y", + design = "rcbd", + Block = NULL, + summarizeby = NULL, + groupvar1 = NULL, + groupvar2 = NULL) { + + suppressMessages(library(arm)) + # so that it would not throw messages at the stderr channel of Galaxy + library(lme4, quietly = TRUE) + + # Basic summary of the variables eused + print(paste("Replication variable = ", Replication)) + print(paste("block variable = ", Block)) + print(paste("Genotype variable = ", Genotype)) + print(paste("summarizeby (included in the model) = ", summarizeby)) + print(paste("groupvariable 1 (included in the model) = ", groupvar1)) + print(paste("groupvariable 2 (included in the model) = ", groupvar2)) + print(paste("design = ", design)) + print(paste("y = ", y)) + + # Summary of data + print("*************************************") + print("*************************************") + print(summary(data)) + print("*************************************") + print("*************************************") + + data_list <- list() + if (length(summarizeby) != 0) { + data$summarizeby <- as.factor(data[,summarizeby]) + data_list = split(data, f = data$summarizeby) + } else { + data$summarizeby <- "none" + data_list[[1]] <- data + } + + all_results <- list() + + for (i in 1: length(data_list)) { + data1 <- data_list[[i]] + + data1$Rep <- as.factor(data1[, Replication]) + cat("\ndata1$Rep:\n") + print(data1$Rep) + + data1$Entry <- as.factor(data1[, Genotype]) + cat("\ndata1$Entry:\n") + print(data1$Entry) + + if (design == "lattice") { + data1$Subblock <- as.factor(data1[, Block]) + } + + data1$y <- as.numeric(data1[, y]) + cat("\ndata1$y:\n") + print(data1$y) + + if (length(groupvar1) != 0) { + data1$groupvar1 <- as.factor(data1[, groupvar1]) + cat("\ndata1$groupvar1:\n") + print(data1$groupvar1) + } + + if (length(groupvar2) != 0) { + data1$groupvar2 <- as.factor(data1[, groupvar2]) + cat("\ndata1$groupvar2:\n") + print(data1$groupvar2) + } + + if (design == "rcbd") { + if (length(groupvar1) != 0) { + if (length(groupvar2) != 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar1) + + (1|groupvar2) + + (1|Entry:groupvar1) + + (1|Entry:groupvar2) + + (1|Entry:groupvar1:groupvar2) + + (1|Rep), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar1) + + (1|groupvar2) + + (1|Entry:groupvar1) + + (1|Entry:groupvar2) + + (1|Entry:groupvar1:groupvar2) + + (1|Rep), + data1) + } + if (length(groupvar2) == 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar1) + + (1|Entry:groupvar1) + + (1|Rep), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar1) + + (1|Entry:groupvar1) + + (1|Rep), + data1) + } + } + if (length(groupvar1) == 0) { + if (length(groupvar2) != 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar2) + + (1|Entry:groupvar2) + + (1|Rep), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar2) + + (1|Entry:groupvar2) + + (1|Rep), + data1) + } + if (length(groupvar2) == 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|Rep), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|Rep), + data1) + } + } + } + + if (design == "lattice") { + if (length(groupvar1) != 0) { + if (length(groupvar2) != 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar1) + + (1|groupvar2) + + (1|Entry:groupvar1) + + (1|Entry:groupvar2) + + (1|Entry:groupvar1:groupvar2) + + (1|Rep) + + (1|Rep:Subblock), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar1) + + (1|groupvar2) + + (1|Entry:groupvar1) + + (1|Entry:groupvar2) + + (1|Entry:groupvar1:groupvar2) + + (1|Rep) + + (1|Rep:Subblock), + data1) + } + if (length(groupvar2) == 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar1) + + (1|Entry:groupvar1) + + (1|Rep) + + (1|Rep:Subblock), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar1) + + (1|Entry:groupvar1) + + (1|Rep) + + (1|Rep:Subblock), + data1) + } + } + if (length(groupvar1) == 0) { + if (length(groupvar2) != 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|groupvar2) + + (1|Entry:groupvar2) + + (1|Rep) + + (1|Rep:Subblock), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|groupvar2) + + (1|Entry:groupvar2) + + (1|Rep) + + (1|Rep:Subblock), + data1) + } + if (length(groupvar2) == 0) { + fm1 <- lmer(y ~ 1 + + (1|Entry) + + (1|Rep) + + (1|Rep:Subblock), + data1) + fm2 <- lmer(y ~ (-1) + + Entry + + (1|Rep) + + (1|Rep:Subblock), + data1) + } + } + } + + cat("\nfm1:\n") + print(fm1) + cat("\nfm2:\n") + print(fm2) + + mean1 <- mean(data1$y, na.rm = TRUE) + tp <- ranef(fm1)$Entry + if (all(tp == 0)) { + stop("error in model: all BLUP effects are zero") + } + + ######## + # BLUP + ######## + blup <- tp + mean1 + names(blup) <- "blup" + + varComponents <- as.data.frame(VarCorr(fm1)) + + # extract the genetic variance component + Vg <- varComponents[match('Entry', varComponents[,1]), 'vcov'] + + # This function extracts standard errors of model random effect from modeled object in lmer + SErr <- se.ranef(fm1)$Entry[,1] + + # Prediction Error Variance (PEV) + PEV <- (SErr) ^ 2 + + # PEV reliability + pevReliability <- 1 - (PEV / Vg) + names(pevReliability) <- "PEV reliability" + + blupdf = data.frame(genotype = rownames(ranef(fm1)$Entry), + blup = blup, + BLUP_PEV = PEV, + BLUP_pevReliability = pevReliability) + + ######## + # BLUE + ######## + blue <- fixef(fm2) + + bluedf <- data.frame(genotype = substr(names(blue), 6, nchar(names(blue))), + blue = blue) + + ######### + # MEANS + ######### + means <- with(data1, tapply(y, Entry, mean, na.rm = TRUE)) + + meandf <- data.frame(genotype = names(means), + means = means) + + resultdf1 <- merge(meandf, bluedf, by = "genotype") + resultdf <- merge(resultdf1, blupdf, by = "genotype") + + if (length(summarizeby) != 0) { + results <- data.frame(row.names = NULL, + genotype = resultdf$genotype, + blue = resultdf$blue, + blup = resultdf$blup, + BLUP_PEV = resultdf$BLUP_PEV, + pevReliability = resultdf$BLUP_pevReliability, + means = resultdf$means, + group = levels(data$summarizeby)[i]) + names(results) <- c(Genotype, + paste(y, "_blue", sep = ""), + paste(y, "_blup", sep = ""), + paste(y, "_PEV", sep = ""), + paste(y, "_pevReliability", sep = ""), + paste(y, "_means", sep = ""), + summarizeby) + } else { + results <- data.frame(row.names = NULL, + genotype = resultdf$genotype, + blue = resultdf$blue, + blup = resultdf$blup, + BLUP_PEV = resultdf$BLUP_PEV, + pevReliability = resultdf$BLUP_pevReliability, + means = resultdf$means) + names(results) <- c(Genotype, + paste(y, "_blue", sep = ""), + paste(y, "_blup", sep = ""), + paste(y, "_PEV", sep = ""), + paste(y, "_pevReliability", sep = ""), + paste(y, "_means", sep = "")) + } + + all_results[[i]] <- results + } + outdf <- do.call("rbind", all_results) + class(outdf) <- c("blupcal", class(outdf)) + return(outdf) +} + +# plot function of blupcal module +plot2.blupcal <- function(outdf) { + hist_with_box <- function(data, main = main, hist.col, box.col) { + histpar <- hist(data, plot = FALSE) + hist(data, col = hist.col, main = main, ylim = c(0, max(histpar$density) + max(histpar$density) * 0.3), prob = TRUE) + m = mean(data, na.rm = TRUE) + std = sd(data, na.rm = TRUE) + curve(dnorm(x, mean = m, sd = std), col = "yellow", lwd = 2, add = TRUE, yaxt = "n") + boxout <- boxplot(data, plot = FALSE) + points(boxout$out, y = rep(max(histpar$density) * 0.3, length(boxout$out)), col = "red", pch = 1) + texts <- paste("mean = ", round(mean(data, na.rm = TRUE), 2), " sd = ", round(sd(data, na.rm = TRUE), 2), " n = ", length(data)) + text(min(histpar$breaks), max(histpar$density) + max(histpar$density) * 0.2, labels = texts, pos = 4) + } + par(mfrow = c(3,1), mar = c(3.1, 3.1, 1.1, 2.1)) + hist_with_box(outdf[,2], main = names(outdf)[2], hist.col = "green4", box.col = "green1") + hist_with_box(outdf[,3], main = names(outdf)[3], hist.col = "blue4", box.col = "blue1") + hist_with_box(outdf[,4], main = names(outdf)[4], hist.col = "gray20", box.col = "gray60") +} diff -r 000000000000 -r 0658dd612eed blupcal.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blupcal.xml Sun Nov 04 20:48:50 2018 -0500 @@ -0,0 +1,185 @@ + + calculator + + r-getopt + r-lme4qtl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This function calculates BLUP, BLUE and means for Y variable based on the model specified by the user. + +The required variables for RCBD are Genotype and Replication and for lattice required variables are Genotype, Replication (main block) and Block within Replication (sub block or block). User can add additional two X variables (eg. Location, Season etc) in the model and are treated as factor ( even supplied as numerical variable). +If you want to calculate BLUP / BLUE by a grouping variable (eg. Management), then this will calculate BLUP / BLUE for each level of the variable. + +**Model** + +for RCBD design + +without additional factors (variable 1 and variable 2) + +*Y = Genotype + Replication + error* RCBD + +*Y = Genotype + Replication + Block within Replication + error* Lattice + + +With variable 1 + +*Y = Variable 1 + Genotype + Variable 1 : Genotype + Replication + error* RCBD + +*Y = Variable 1 + Genotype + Replication + Variable 1 : Genotype + Block within Replication + error* Lattice + + +With variable 1 and variable 2 + +*Y = Variable 2 + Variable 1 + Genotype + Variable 1 : Genotype + Variable 1: Variable 2 + Variable 1:Variable 2: Genotype + Replication + error* RCBD +*Y = Variable 2 + Variable 1 + Genotype + Variable 1 : Genotype + Variable 1: Variable 2 + Variable 1:Variable 2: Genotype + Replication + Block within Replication + error* RCBD + +**Model details** + +- Genotype is treated as Fixed for BLUE calculation and random for BLUP calculation. +- Replication, Block within Replication are treated as random both BLUP and BLUE calculations. +- Variable 1, Genotype : Variable 1 are treated as random both BLUP and BLUE calculations. +- Variable 2, Genotype : Variable 2, Variable 1 : Variable 2, Genotype : Variable 1 : Variable 2 are treated as random effects for both BLUP and BLUE calculations. + + +**Citation** +Douglas Bates, Martin Maechler, Ben Bolker, Steve Walker (2015). Fitting Linear Mixed-Effects Models Using lme4. Journal of Statistical Software, 67(1), 1-48. + +**Author(s)** + + diff -r 000000000000 -r 0658dd612eed blupcal_wrapper.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blupcal_wrapper.R Sun Nov 04 20:48:50 2018 -0500 @@ -0,0 +1,186 @@ +#!/usr/bin/Rscript + +options(show.error.messages = F, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, F) } ) +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") +library("getopt") + +options(stringAsfactors = FALSE, useFancyQuotes = FALSE) +options(warn = -1) +args <- commandArgs(trailingOnly = TRUE) +option_specification <- matrix(c( + "tool_directory", "a", 1, "character" +,"tabular_file", "b", 1, "character" +,"replication_vector_column_index", "c", 1, "integer" +,"genotype_vector_column_index", "d", 1, "integer" +,"y_vector_column_index", "e", 1, "integer" +,"design", "f", 1, "character" +,"block_vector_column_index", "g", 1, "integer" +,"summarize_by", "h", 1, "character" +,"summarize_by_vector_column_index", "i", 1, "integer" +,"group_variable_1", "j", 1, "character" +,"group_variable_1_vector_column_index", "k", 1, "integer" +,"group_variable_2", "l", 1, "character" +,"group_variable_2_vector_column_index", "m", 1, "integer" +,"output_file_path", "n", 1, "character" +,"png_plots_file_path", "o", 1, "character" +,"png_histograms_file_path", "p", 1, "character" +,"pdf_plots_file_path", "q", 1, "character" +,"pdf_histograms_file_path", "r", 1, "character" +), byrow = TRUE, ncol = 4) +options <- getopt(option_specification) + +cat("\nTool Directory: ", options$tool_directory) +cat("\nTabular File: ", options$tabular_file) +cat("\nReplication Vector Column Index: ", options$replication_vector_column_index) +cat("\nGenotype Vector Column Index: ", options$genotype_vector_column_index) +cat("\nY Vector Column Index: ", options$y_vector_column_index) +cat("\nDesign: ", options$design) +cat("\nBlock Vector Column Index: ", options$block_vector_column_index) +cat("\nSummarize By: ", options$summarize_by) +cat("\nSummarize By Vector Column Index: ", options$summarize_by_vector_column_index) +cat("\nGroup Variable #1: ", options$group_variable_1) +cat("\nGroup Variable #1 Vector Column Index: ", options$group_variable_1_vector_column_index) +cat("\nGroup Variable #2: ", options$group_variable_2) +cat("\nGroup Variable #2 Vector Column Index: ", options$group_variable_2_vector_column_index) +cat("\nOutput file path: ", options$output_file_path) +cat("\nPNG plots file path: ", options$png_plots_file_path) +cat("\nPNG histograms file path: ", options$png_histograms_file_path) +cat("\nPDF plots file path: ", options$pdf_plots_file_path) +cat("\nPDF histograms file path: ", options$pdf_histograms_file_path) +cat("\n\n") + +tabular_data <- read.table(file = options$tabular_file, + header = TRUE, + sep = "\t", + stringsAsFactors = FALSE, + strip.white = TRUE, + na.strings = ".") +#tabular_data + +column_names <- colnames(tabular_data) +cat("Column names:\n") +column_names +cat("\n\n") + +# Replication +replication_vector_header <- column_names[options$replication_vector_column_index] +cat("\nreplication vector header: ", replication_vector_header) + +# Genotype +genotype_vector_header <- column_names[options$genotype_vector_column_index] +cat("\ngenotype vector header: ", genotype_vector_header) + +# Y +y_vector_header <- column_names[options$y_vector_column_index] +cat("\ny vector header: ", y_vector_header) + +# Design +design <- options$design +cat("\ndesign: ", design) + +# Block +if (design == "rcbd") { + block_vector_header <- NULL +} else { + block_vector_header <- column_names[options$block_vector_column_index] +} +cat("\nblock vector header: ", block_vector_header) + +# Summarize By +if (options$summarize_by == "false") { + summarize_by_vector_header <- NULL +} else { + summarize_by_vector_header <- column_names[options$summarize_by_vector_column_index] +} +cat("\nsummarize by vector header: ", summarize_by_vector_header) + +# Group Variable #1 +if (options$group_variable_1 == "false") { + group_variable_1_vector_header <- NULL +} else { + group_variable_1_vector_header <- column_names[options$group_variable_1_vector_column_index] +} +cat("\ngroup variable 1 vector header: ", group_variable_1_vector_header) + +# Group Variable #2 +if (options$group_variable_2 == "false") { + group_variable_2_vector_header <- NULL +} else { + group_variable_2_vector_header <- column_names[options$group_variable_2_vector_column_index] +} +cat("\ngroup variable 2 vector header: ", group_variable_2_vector_header) + +cat("\n\n") + +source(paste(options$tool_directory, "/blupcal.R", sep = "")) + +fit <- blupcal(data = tabular_data, + Replication = replication_vector_header, + Genotype = genotype_vector_header, + y = y_vector_header, + design = design, + Block = block_vector_header, + summarizeby = summarize_by_vector_header, + groupvar1 = group_variable_1_vector_header, + groupvar2 = group_variable_2_vector_header) + +cat("\n\n") + +output_file_path <- options$output_file_path + +cat("\nfit: ") +cat("\n") +fit +cat("\n") + +#fit$gid <- as.numeric(levels(fit$gid))[fit$gid] +if (options$summarize_by == "false") { + fit_view <- fit +} else { + fit_view <- fit[c(1, 7, 2, 3, 4, 5, 6)] +} + +formatted_table <- lapply(fit_view, function(.col) { + if (is.numeric(.col)) { + return(as.numeric(sprintf("%.3f", .col))) + } else { + return(.col) + } + }) + +write.table(formatted_table, + file = output_file_path, + sep = "\t", + row.names = FALSE, + quote = FALSE) + +png_plots_file_path <-options$png_plots_file_path +if (!(is.null(png_plots_file_path))) { + png(png_plots_file_path) + plot(fit) + dev.off() +} + +png_histograms_file_path <-options$png_histograms_file_path +if (!(is.null(png_histograms_file_path))) { + png(png_histograms_file_path) + plot2.blupcal(fit) + dev.off() +} + +pdf_plots_file_path <-options$pdf_plots_file_path +if (!(is.null(pdf_plots_file_path))) { + pdf(pdf_plots_file_path) + plot(fit) + dev.off() +} + +pdf_histograms_file_path <-options$pdf_histograms_file_path +if (!(is.null(pdf_histograms_file_path))) { + pdf(pdf_histograms_file_path) + plot2.blupcal(fit) + dev.off() +}