# HG changeset patch # User galaxyp # Date 1545339927 18000 # Node ID dec87511835efac5801c434e38691f0c41ab680a # Parent f1db758949f49b71f49bac0c1aa4f993b34c66e8 planemo upload commit 1887dff812162880d66b003a927867cd5000c98f diff -r f1db758949f4 -r dec87511835e quantp.r --- a/quantp.r Fri Sep 14 12:22:13 2018 -0400 +++ b/quantp.r Thu Dec 20 16:05:27 2018 -0500 @@ -16,6 +16,7 @@ png(outfile, width = 6, height = 6, units = 'in', res=300); # bitmap(outfile, "png16m"); g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3); + saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outplot))) plot(g); dev.off(); } @@ -30,20 +31,20 @@ regmodel_summary = summary(regmodel); cat("

Linear Regression model fit between Proteome and Transcriptome data

\n", - "

Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.

\n", - '\n', - file = htmloutfile, append = TRUE); + "

Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.

\n", + '

Parameter	Value

\n', + file = htmloutfile, append = TRUE); cat("\n", - "","\n", - "\n", - "\n", - "","\n", - "\n", - "\n", - "\n", - "\n", - file = htmloutfile, append = TRUE); + "","\n", + "\n", + "\n", + "","\n", + "\n", + "\n", + "\n", + "\n", + file = htmloutfile, append = TRUE); cat("

Parameter	Value
Formula	","PE_abundance~TE_abundance","
Coefficients
",names(regmodel$coefficients[1]),"	",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","
",names(regmodel$coefficients[2]),"	",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","
Model parameters
Residual standard error	",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)
F-statistic	",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)
R-squared	",regmodel_summary$r.squared,"
Adjusted R-squared	",regmodel_summary$adj.r.squared,"
Coefficients
",names(regmodel$coefficients[1]),"	",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","
",names(regmodel$coefficients[2]),"	",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","
Model parameters
Residual standard error	",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)
F-statistic	",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)
R-squared	",regmodel_summary$r.squared,"
Adjusted R-squared	",regmodel_summary$adj.r.squared,"

\n", file = htmloutfile, append = TRUE); @@ -58,13 +59,27 @@ plot(regmodel, 1, cex.lab=1.5); dev.off(); + suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[1]] + + geom_point(aes(text=sprintf("Residual: %.2f
Fitted value: %.2f
Gene: %s", .fitted, .resid, PE_TE_data$PE_ID)), + shape = 1, size = .1, stroke = .2) + + theme_light()) + saveWidget(ggplotly(g, tooltip= c("text")), file.path(gsub("\\.png", "\\.html", outplot))) + outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""); png(outplot,width = 10, height = 10, units = 'in', res=300); # bitmap(outplot, "png16m"); par(mfrow=c(1,1)); - plot(regmodel, 2, cex.lab=1.5); + g <- plot(regmodel, 2, cex.lab=1.5); + ggplotly(g) dev.off(); + suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[2]] + + geom_point(aes(text=sprintf("Standarized residual: %.2f
Theoretical quantile: %.2f
Gene: %s", .qqx, .qqy, PE_TE_data$PE_ID)), + shape = 1, size = .1) + + theme_light()) + saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot))) + + outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""); png(outplot, width = 10, height = 10, units = 'in',res=300); # bitmap(outplot, "png16m"); @@ -72,79 +87,94 @@ plot(regmodel, 5, cex.lab=1.5); dev.off(); + cd_cont_pos <- function(leverage, level, model) {sqrt(level*length(coef(model))*(1-leverage)/leverage)} + cd_cont_neg <- function(leverage, level, model) {-cd_cont_pos(leverage, level, model)} + + suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[4]] + + aes(label = PE_TE_data$PE_ID) + + geom_point(aes(text=sprintf("Leverage: %.2f
Standardized residual: %.2f
Gene: %s", .hat, .stdresid, PE_TE_data$PE_ID))) + + theme_light()) + saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot))) + cat('', file = htmloutfile, append = TRUE); - cat( + cat( '\n", '\n', file = htmloutfile, append = TRUE); cat( - '\n', - file = htmloutfile, append = TRUE); + '\n', file = htmloutfile, append = TRUE); cat( '\n', '

', " 1) Residuals vs Fitted plot	2) Normal Q-Q plot of residuals

', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$widget_div), + '	', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$widget_div), + '
This plot checks for linear relationship assumptions. If a horizontal line is observed without any distinct patterns, it indicates a linear relationship.	This plot checks whether residuals are normally distributed or not. It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.

\n', file = htmloutfile, append = TRUE); - - - #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - # Residuals data - #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - res_all = regmodel$residuals; - res_mean = mean(res_all); - res_sd = sd(res_all); - res_diff = (res_all-res_mean); - res_zscore = res_diff/res_sd; - # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))] - - - tempind = which((res_zscore > 2)|(res_zscore < -2)); - res_PE_TE_data_no_outlier = PE_TE_data[-tempind,]; - res_PE_TE_data_no_outlier$residuals = res_all[-tempind]; - res_PE_TE_data_outlier = PE_TE_data[tempind,]; - res_PE_TE_data_outlier$residuals = res_all[tempind]; + + + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + # Residuals data + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + res_all = regmodel$residuals; + res_mean = mean(res_all); + res_sd = sd(res_all); + res_diff = (res_all-res_mean); + res_zscore = res_diff/res_sd; + # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))] + + + tempind = which((res_zscore > 2)|(res_zscore < -2)); + res_PE_TE_data_no_outlier = PE_TE_data[-tempind,]; + res_PE_TE_data_no_outlier$residuals = res_all[-tempind]; + res_PE_TE_data_outlier = PE_TE_data[tempind,]; + res_PE_TE_data_outlier$residuals = res_all[tempind]; # Save the complete table for download (influential_observations) - temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals) - colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") - outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse=""); - write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); + temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals) + colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") + outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse=""); + write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); # Save the complete table for download (non influential_observations) - temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all) - colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") - outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse=""); - write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F); + temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all) + colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") + outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse=""); + write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F); cat('

Outliers based on the residuals from regression analysis

\n', - file = htmloutfile, append = TRUE); + file = htmloutfile, append = TRUE); cat('\n', - '\n', - '\n', - file = htmloutfile, append = TRUE); - + '\n', + '\n', + file = htmloutfile, append = TRUE); + cat("\n", - "\n", - '\n', - '\n', - "

Residuals from Regression
Parameter	Value
Residuals from Regression
Parameter	Value
Mean Residual value	",res_mean,"
Standard deviation (Residuals)	",res_sd,"
Total outliers (Residual value > 2 standard deviation from the mean)	',length(tempind),' (Download these ',length(tempind),' data points with high residual values here)
(Download the complete residuals data here)

\n", - file = htmloutfile, append = TRUE); - + "Standard deviation (Residuals)",res_sd,"\n", + 'Total outliers (Residual value > 2 standard deviation from the mean)',length(tempind),' (Download these ',length(tempind),' data points with high residual values here)\n', + '', + '(Download the complete residuals data here)', + "\n

\n", + file = htmloutfile, append = TRUE); + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - - + + cat('

', file = htmloutfile, append = TRUE); - + cat( '\n', file = htmloutfile, append = TRUE); cat( - '\n', + '\n', file = htmloutfile, append = TRUE); cat( @@ -157,7 +187,7 @@ # Cook's Distance #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ cat('

INFLUENTIAL OBSERVATIONS

\n', - file = htmloutfile, append = TRUE); + file = htmloutfile, append = TRUE); cat( '

Cook\'s distance computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.
In general use, those observations that have a Cook\'s distance > than ', cookdist_upper_cutoff,' times the mean may be classified as influential.

\n', file = htmloutfile, append = TRUE); @@ -177,11 +207,31 @@ #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels dev.off(); + cooksd_df <- data.frame(cooksd) + cooksd_df$genes <- row.names(cooksd_df) + cooksd_df$index <- 1:nrow(cooksd_df) + cooksd_df$colors <- "black" + cutoff <- as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T) + cooksd_df[cooksd_df$cooksd > cutoff,]$colors <- "red" + + g <- ggplot(cooksd_df, aes(x = index, y = cooksd, label = row.names(cooksd_df), color=as.factor(colors), + text=sprintf("Gene: %s
Cook's Distance: %.3f", row.names(cooksd_df), cooksd))) + + ggtitle("Influential Obs. by Cook's distance") + xlab("Observations") + ylab("Cook's Distance") + + #xlim(0, 3000) + ylim(0, .15) + + scale_shape_discrete(solid=F) + + geom_point(size = 2, shape = 8) + + geom_hline(yintercept = cutoff, + linetype = "dashed", color = "red") + + scale_color_manual(values = c("black" = "black", "red" = "red")) + + theme_light() + theme(legend.position="none") + saveWidget(ggplotly(g, tooltip= "text"), file.path(gsub("\\.png", "\\.html", outplot))) + cat( '

', + gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), '
In the above plot, observations above red line (',cookdist_upper_cutoff,' * mean Cook\'s distance) are influential. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients

', file = htmloutfile, append = TRUE); - + tempind = which(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)); PE_TE_data_no_outlier = PE_TE_data[-tempind,]; PE_TE_data_no_outlier$cooksd = cooksd[-tempind]; @@ -195,49 +245,63 @@ file = htmloutfile, append = TRUE); # Save the complete table for download (influential_observations) - temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd) - colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") - outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse=""); - write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); + temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd) + colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") + outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse=""); + write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); # Save the complete table for download (non influential_observations) - temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd) - colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") - outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse=""); - write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); + temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd) + colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") + outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse=""); + write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); cat("\n", - "\n", - - "

3) Residuals vs Leverage plot

', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$widget_div) + , '
Mean Cook\'s distance	",mean(cooksd, na.rm=T),"
Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)	",length(tempind),"
Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance	",length(which(cooksd\n", - "

\n", - file = htmloutfile, append = TRUE); - - - #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - # Scatter plot after removal of influential points - #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); - min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); - max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); - png(outplot, width = 10, height = 10, units = 'in', res=300); - # bitmap(outplot,"png16m"); - g = ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance))+geom_point() + geom_smooth() + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + xlim(min_lim,max_lim) + ylim(min_lim,max_lim); - suppressMessages(plot(g)); - dev.off(); - - cat('\n', file = htmloutfile, append = TRUE); - # Before - cat("\n', file = htmloutfile, append = TRUE); - - # After - cat("\n', - file = htmloutfile, append = TRUE); - #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - + "\n", + + "

Scatterplot: Before removal	Scatterplot: After removal
", '	\n", - '
Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)	",length(tempind),"
Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance	",length(which(cooksd\n", + "

\n", + file = htmloutfile, append = TRUE); + + + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + # Scatter plot after removal of influential points + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); + min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); + max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); + png(outplot, width = 10, height = 10, units = 'in', res=300); + # bitmap(outplot,"png16m"); + suppressWarnings(g <- ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() + + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + + xlim(min_lim,max_lim) + ylim(min_lim,max_lim) + + geom_point(aes(text=sprintf("Gene: %s
Transcript Abundance (log fold-change): %.3f
Protein Abundance (log fold-change): %.3f", + PE_ID, TE_abundance, PE_abundance)))) + suppressMessages(plot(g)) + suppressMessages(saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot)))) + dev.off(); + + + cat('\n', file = htmloutfile, append = TRUE); + # Before + cat("\n', + file = htmloutfile, append = TRUE); + + # After + cat("\n', + file = htmloutfile, append = TRUE); + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + cor_result_pearson = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "pearson"); cor_result_spearman = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "spearman"); @@ -249,7 +313,7 @@ cat('

Scatterplot: Before removal

Scatterplot: After removal

", + '

', + gsub('id="html', 'id="secondhtml"', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$widget_div)), + '

\n", + '

', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(outplot)$widget_div), + + '

\n', - file = htmloutfile, append = TRUE); + file = htmloutfile, append = TRUE); cat( "\n", @@ -267,13 +331,13 @@ } cat("

Download the complete list of influential observations ", - "Download the complete list (After removing influential points)
\n", - '

Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)

\n', - file = htmloutfile, append = TRUE); + "Download the complete list (After removing influential points)
\n", + '

Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)

\n', + file = htmloutfile, append = TRUE); cat('

Parameter	Method 1	Method 2	Method 3
Correlation method	",cor_result_pearson$method,"	",cor_result_spearman$method,"	",cor_result_kendall$method,"

\n', sep = "",file = htmloutfile, append = TRUE); cat("\n", - file = htmloutfile, append = TRUE); + file = htmloutfile, append = TRUE); for(i in 1:tab_n_row) @@ -285,9 +349,9 @@ '\n', file = htmloutfile, append = TRUE); } - cat('

Gene	Protein Log Fold-Change	Transcript Log Fold-Change	Cook's Distance
',format(PE_TE_data_outlier_sorted[i,5], scientific=F),'

\n',file = htmloutfile, append = TRUE); - - + cat('

\n',file = htmloutfile, append = TRUE); + + } @@ -297,7 +361,7 @@ #=============================================================================== singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){ cat('
\n', - file = htmloutfile, append = TRUE); + file = htmloutfile, append = TRUE); hc=hclust(dist(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]))) hm_cluster = cutree(hc,k=hm_nclust); @@ -306,11 +370,25 @@ png(outplot, width = 10, height = 10, units = 'in', res=300); # bitmap(outplot, "png16m"); par(mfrow=c(1,1)); - hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("Proteins","Transcripts"), scale="col", hclustfun = hclust, distfun = dist); + hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), + trace="none", cexCol=1, col=greenred(100),Colv=F, + labCol=c("Proteins","Transcripts"), scale="col", + hclustfun = hclust, distfun = dist); + dev.off(); - cat('\n', - file = htmloutfile, append = TRUE); + + p <- d3heatmap(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), scale = "col", + dendrogram = "row", colors = greenred(100), + hclustfun = hclust, distfun = dist, + show_grid = FALSE) + saveWidget(p, file.path(gsub("\\.png", "\\.html", outplot))) + + cat('\n', + file = htmloutfile, append = TRUE); temp_PE_TE_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, hm_cluster); @@ -320,7 +398,7 @@ cat('

Heatmap of PE and TE abundance values (Hierarchical clustering)	Number of clusters to extract: ',hm_nclust,'

', + '', + gsub("width:960px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), + '
Download the hierarchical cluster list

\n', - file = htmloutfile, append = TRUE); + file = htmloutfile, append = TRUE); } @@ -338,7 +416,6 @@ legend(1, 95, legend=c("Cluster 1", "Line 2"), col="red", lty=1:1, cex=0.8) legend(1, 95, legend="Cluster 2", col="green", lty=1:1, cex=0.8) - ind=which(k1$cluster==1); points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="red", pch=16); ind=which(k1$cluster==2); @@ -361,18 +438,30 @@ points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16); dev.off(); + # Interactive plot for k-means clustering + g <- ggplot(PE_TE_data, aes(x = TE_abundance, y = PE_abundance, label = row.names(PE_TE_data), + text=sprintf("Gene: %s
Transcript Abundance: %.3f
Protein Abundance: %.3f", + PE_ID, TE_abundance, PE_abundance), + color=as.factor(k1$cluster))) + + xlab("Transcript Abundance") + ylab("Protein Abundance") + + scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) + + geom_point(size = 1, shape = 8) + + theme_light() + theme(legend.position="none") + saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot))) + cat('

\n', - file = htmloutfile, append = TRUE); + file = htmloutfile, append = TRUE); tempind = order(k1$cluster); tempoutfile = paste(outdir,"/PE_TE_kmeans_clusterpoints.txt",sep="",collapse=""); write.table(data.frame(PE_TE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") - - cat('\n', - file = htmloutfile, append = TRUE); + #paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""); + cat('\n', + file = htmloutfile, append = TRUE); cat('

K-mean clustering	Number of clusters: ',nclust,'

', + gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), '
Download the cluster list

\n', - file = htmloutfile, append = TRUE); + file = htmloutfile, append = TRUE); } @@ -385,9 +474,14 @@ max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); png(outfile, width = 10, height = 10, units = 'in', res=300); # bitmap(outfile, "png16m"); - g = ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance))+geom_point() + geom_smooth() + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + xlim(min_lim,max_lim) + ylim(min_lim,max_lim); - suppressMessages(plot(g)); - # plot(g); + suppressWarnings(g <- ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() + + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + + xlim(min_lim,max_lim) + ylim(min_lim,max_lim) + + geom_point(aes(text=sprintf("Gene: %s
Transcript Abundance (log fold-change): %.3f
Protein Abundance (log fold-change): %.3f", + PE_ID, TE_abundance, PE_abundance)), + size = .5)) + suppressMessages(plot(g)) + suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile)))) dev.off(); } @@ -422,23 +516,41 @@ tempdf[is.na(tempdf)] = 0; tempdf$Sample = rownames(tempdf); tempdf1 = melt(tempdf, id.vars = "Sample"); + + if("Gene" %in% colnames(df)){ + tempdf1$Name = df$Gene; + } else if ("Protein" %in% colnames(df)){ + tempdf1$Name = df$Protein; + } else if ("Genes" %in% colnames(df)){ + tempdf1$Name = df$Genes; + } + tempdf1$Group = sampleinfo_df[tempdf1$Sample,2]; png(outplot, width = 6, height = 6, units = 'in', res=300); # bitmap(outplot, "png16m"); - if(fill_leg=="Yes") - { - g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab) - }else{ - if(fill_leg=="No") - { - tempdf1$Group = c("case", "control") - g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab) - } + if(fill_leg=="No"){ + tempdf1$Group = c("case", "control") } + + g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + + geom_boxplot()+ + labs(x=user_xlab) + labs(y=user_ylab) + saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outfile))) plot(g); dev.off(); } +## A wrapper to saveWidget which compensates for arguable BUG in +## saveWidget which requires `file` to be in current working +## directory. +saveWidget <- function (widget,file,...) { + wd<-getwd() + on.exit(setwd(wd)) + outDir<-dirname(file) + file<-basename(file) + setwd(outDir); + htmlwidgets::saveWidget(widget,file=file,selfcontained = FALSE) +} #=============================================================================== # Mean or Median of Replicates @@ -446,35 +558,35 @@ mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method) { -grps = unique(sampleinfo_df[,2]); - -TE_df_merged <<- sapply(grps, function(x){ - tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] - if(length(tempsample)!=1){ - apply(TE_df[,tempsample],1,method); - }else{ - return(TE_df[,tempsample]); - } -}); -TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged); -colnames(TE_df_merged) = c(colnames(TE_df)[1], grps); - -PE_df_merged <<- sapply(grps, function(x){ - tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] - if(length(tempsample)!=1){ - apply(PE_df[,tempsample],1,method); - }else{ - return(PE_df[,tempsample]); - } -}); - -PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged); -colnames(PE_df_merged) = c(colnames(PE_df)[1], grps); - -#sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F); -sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F); - -return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged)); + grps = unique(sampleinfo_df[,2]); + + TE_df_merged <<- sapply(grps, function(x){ + tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] + if(length(tempsample)!=1){ + apply(TE_df[,tempsample],1,method); + }else{ + return(TE_df[,tempsample]); + } + }); + TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged); + colnames(TE_df_merged) = c(colnames(TE_df)[1], grps); + + PE_df_merged <<- sapply(grps, function(x){ + tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] + if(length(tempsample)!=1){ + apply(PE_df[,tempsample],1,method); + }else{ + return(PE_df[,tempsample]); + } + }); + + PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged); + colnames(PE_df_merged) = c(colnames(PE_df)[1], grps); + + #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F); + sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F); + + return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged)); } #=============================================================================== @@ -483,127 +595,162 @@ perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with) { - -PE_colnames = colnames(PE_df_data); -control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; -control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); -condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; -condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); - -if(method=="mean"){ - #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); - PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) -}else{ -if(method=="median"){ - PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) - # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); + + PE_colnames = colnames(PE_df_data); + control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; + control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); + condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; + condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); + + if(method=="mean"){ + #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); + PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) + }else{ + if(method=="median"){ + PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) + # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); + } } -} -PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval)) - - -TE_colnames = colnames(TE_df_data); -control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; -control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); -condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; -condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); - -if(method=="mean"){ - # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); - TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) -}else{ - if(method=="median"){ - TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) - # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); + PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval)) + + + TE_colnames = colnames(TE_df_data); + control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; + control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); + condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; + condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); + + if(method=="mean"){ + # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); + TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) + }else{ + if(method=="median"){ + TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) + # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); + } } -} -TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval)) - - -PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval); -colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)"); -outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse=""); -write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F); -cat("

Download the complete fold change data here
\n", - file = htmloutfile, append = TRUE); - - if(length(condition_ind)!=1) - { - # Volcano Plot - - if(volc_with=="adj_pval") - { - PE_pval = PE_adj_pval - TE_pval = TE_adj_pval - volc_ylab = "-log10 Adjusted p-value"; - }else{ - if(volc_with=="pval") - { - volc_ylab = "-log10 p-value"; - } - } - outplot = paste(outdir,"/PE_volcano.png",sep="",collapse=""); - png(outplot, width = 10, height = 10, units = 'in', res=300); - # bitmap(outplot, "png16m"); - par(mfrow=c(1,1)); + TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval)) + + + PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval); + colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)"); + outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse=""); + write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F); + cat("

Download the complete fold change data here
\n", + file = htmloutfile, append = TRUE); - plot(PE_df_logfold$LogFold, -log10(PE_pval), - xlab="log2 fold change", ylab=volc_ylab, - type="n") - sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use - points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black") - #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFoldlog(2,base=2))|(PE_df_logfold$LogFoldlog(2,base=2))|(PE_df_logfold$LogFold0.05) - sel=intersect(sel,sel1) - points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue") - abline(h = -log(0.05,base=10), col="red", lty=2) - abline(v = log(2,base=2), col="red", lty=2) - abline(v = log(0.5,base=2), col="red", lty=2) - dev.off(); + if(length(condition_ind)!=1) + { + # Volcano Plot + + if(volc_with=="adj_pval") + { + PE_pval = PE_adj_pval + TE_pval = TE_adj_pval + volc_ylab = "-log10 Adjusted p-value"; + }else{ + if(volc_with=="pval") + { + volc_ylab = "-log10 p-value"; + } + } + outplot_PE = paste(outdir,"/PE_volcano.png",sep="",collapse=""); + png(outplot_PE, width = 10, height = 10, units = 'in', res=300); + # bitmap(outplot, "png16m"); + par(mfrow=c(1,1)); + + plot(PE_df_logfold$LogFold, -log10(PE_pval), + xlab="log2 fold change", ylab=volc_ylab, + type="n") + sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use + points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black") + PE_df_logfold$color <- "black" + #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFoldlog(2,base=2))|(PE_df_logfold$LogFoldlog(2,base=2))|(PE_df_logfold$LogFold0.05) + sel=intersect(sel,sel1) + points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue") + PE_df_logfold[sel,]$color <- "blue" + abline(h = -log(0.05,base=10), col="red", lty=2) + abline(v = log(2,base=2), col="red", lty=2) + abline(v = log(0.5,base=2), col="red", lty=2) + dev.off(); + + g <- ggplot(PE_df_logfold, aes(x = LogFold, -log10(PE_pval), color = as.factor(color), + text=sprintf("Gene: %s
Log2 Fold-Change: %.3f
-log10 p-value: %.3f
p-value: %.3f", + Genes, LogFold, -log10(PE_pval), PE_pval))) + + xlab("log2 fold change") + ylab("-log10 p-value") + + geom_point(shape=1, size = 1.5, stroke = .2) + + scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + + geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + + geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") + + geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") + + theme_light() + theme(legend.position="none") + saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_PE))) + + outplot_TE = paste(outdir,"/TE_volcano.png",sep="",collapse=""); + png(outplot_TE, width = 10, height = 10, units = 'in', res=300); + # bitmap(outplot, "png16m"); + par(mfrow=c(1,1)); + + plot(TE_df_logfold$LogFold, -log10(TE_pval), + xlab="log2 fold change", ylab=volc_ylab, + type="n") + + sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use + points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black") + TE_df_logfold$color <- "black" + #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFoldlog(2,base=2))|(TE_df_logfold$LogFoldlog(2,base=2))|(TE_df_logfold$LogFold0.05) + sel=intersect(sel,sel1) + points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue") + TE_df_logfold[sel,]$color <- "blue" + abline(h = -log(0.05,base=10), col="red", lty=2) + abline(v = log(2,base=2), col="red", lty=2) + abline(v = log(0.5,base=2), col="red", lty=2) + dev.off(); + + g <- ggplot(TE_df_logfold, aes(x = LogFold, -log10(TE_pval), color = as.factor(color), + text=sprintf("Gene: %s
Log2 Fold-Change: %.3f
-log10 p-value: %.3f
p-value: %.3f", + Genes, LogFold, -log10(TE_pval), TE_pval))) + + xlab("log2 fold change") + ylab("-log10 p-value") + + geom_point(shape=1, size = 1.5, stroke = .2) + + scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + + geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + + geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") + + geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") + + theme_light() + theme(legend.position="none") + saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_TE))) + + + cat('
\n', file = htmloutfile, append = TRUE); + cat("\n', file = htmloutfile, append = TRUE); + cat("

Transcript Fold-Change	Protein Fold-Change
", + '', + extractWidgetCode(outplot_TE)$widget_div, + '	", + '', + extractWidgetCode(outplot_PE)$widget_div, + '

\n', + file = htmloutfile, append = TRUE); + + + }else{ + cat('

!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!

', + file = htmloutfile, append = TRUE); + } - - - outplot = paste(outdir,"/TE_volcano.png",sep="",collapse=""); - png(outplot, width = 10, height = 10, units = 'in', res=300); - # bitmap(outplot, "png16m"); - par(mfrow=c(1,1)); - - plot(TE_df_logfold$LogFold, -log10(TE_pval), - xlab="log2 fold change", ylab=volc_ylab, - type="n") - sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use - points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black") - #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFoldlog(2,base=2))|(TE_df_logfold$LogFoldlog(2,base=2))|(TE_df_logfold$LogFold0.05) - sel=intersect(sel,sel1) - points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue") - abline(h = -log(0.05,base=10), col="red", lty=2) - abline(v = log(2,base=2), col="red", lty=2) - abline(v = log(0.5,base=2), col="red", lty=2) - dev.off(); - - - - cat('
\n', file = htmloutfile, append = TRUE); - cat("\n', file = htmloutfile, append = TRUE); - cat("

Transcript Fold-Change	Protein Fold-Change
", '	", - '

\n', - file = htmloutfile, append = TRUE); - }else{ - cat('

!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!

', - file = htmloutfile, append = TRUE); - } - } @@ -612,8 +759,6 @@ #*************************************************************************************************************************************** - - #=============================================================================== # Arguments #=============================================================================== @@ -638,7 +783,6 @@ htmloutfile = args[11]; # html output file outdir = args[12]; # html supporting files - #=============================================================================== # Check for file existance #=============================================================================== @@ -661,31 +805,33 @@ suppressPackageStartupMessages(library(gplots)); suppressPackageStartupMessages(library(ggplot2)); suppressPackageStartupMessages(library(ggfortify)); +suppressPackageStartupMessages(library(plotly)); +suppressPackageStartupMessages(library(d3heatmap)); #=============================================================================== # Select mode and parse experiment design file #=============================================================================== if(mode=="multiple") { - expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame(); - expDesign_cc = expDesign[1:2,]; - - sampleinfo_df = expDesign[3:nrow(expDesign),]; - rownames(sampleinfo_df)=1:nrow(sampleinfo_df); - colnames(sampleinfo_df) = c("Sample","Group"); - - condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1]; - condition_g_name = "case"; - control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1]; - control_g_name = "control"; - sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case"; - sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control"; - sampleinfo_df_orig = sampleinfo_df; + expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame(); + expDesign_cc = expDesign[1:2,]; + + sampleinfo_df = expDesign[3:nrow(expDesign),]; + rownames(sampleinfo_df)=1:nrow(sampleinfo_df); + colnames(sampleinfo_df) = c("Sample","Group"); + + condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1]; + condition_g_name = "case"; + control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1]; + control_g_name = "control"; + sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case"; + sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control"; + sampleinfo_df_orig = sampleinfo_df; } if(mode=="logfold") { - sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) + sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) } #=============================================================================== @@ -694,12 +840,12 @@ TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); if(mode=="multiple") { - TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)]; + TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)]; } if(mode=="logfold") { - TE_df = TE_df_orig; - colnames(TE_df) = c("Genes", "LogFold"); + TE_df = TE_df_orig; + colnames(TE_df) = c("Genes", "LogFold"); } #=============================================================================== # Parse Proteome data @@ -707,12 +853,12 @@ PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); if(mode=="multiple") { - PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)]; + PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)]; } if(mode=="logfold") { - PE_df = PE_df_orig; - colnames(PE_df) = c("Genes", "LogFold"); + PE_df = PE_df_orig; + colnames(PE_df) = c("Genes", "LogFold"); } #============================================================================================================= @@ -725,17 +871,17 @@ #=============================================================================== # Write initial data summary in html outfile #=============================================================================== - cat("\n", file = htmloutfile); - - cat("

QuanTP: Association between abundance ratios of transcript and protein

\n", +cat("\n", file = htmloutfile); + +cat("

QuanTP: Association between abundance ratios of transcript and protein

\n", "

Input data summary

\n", "

Abbreviations used: PE (Proteome data) and TE (Transcriptome data)","

Input Proteome data dimension (Row Column): ", dim(PE_df)[1]," x ", dim(PE_df)[2],"
Input Transcriptome data dimension (Row Column): ", dim(TE_df)[1]," x ", dim(TE_df)[2],"

\n", file = htmloutfile, append = TRUE); - - cat("

Linear Regression model fit between Proteome and Transcriptome data

1) Residuals vs Fitted plot

2) Normal Q-Q plot of residuals

Outliers based on the residuals from regression analysis

INFLUENTIAL OBSERVATIONS

3) Residuals vs Leverage plot

Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)

Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)

QuanTP: Association between abundance ratios of transcript and protein

QuanTP: Association between abundance ratios of transcript and protein

Input data summary

Table of Contents:

Table of Contents: