view visualize_pore_diameter_aqp.R @ 10:afd0260e676f draft

"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/aubi_piaf commit 3111e03da3d8644ceaf94e9796f6c4206d3fe440-dirty"
author agpetit
date Tue, 07 Jun 2022 12:04:45 +0000
parents e5cf7698a2af
children f5064c93f7ab
line wrap: on
line source

#!/usr/bin/env Rscript

# install and/or load necessary packages
useful_packages <- c("conflicted", "getopt", "tidyverse", "ggplot2", "ggpubr")
uninstalled_packages <- setdiff(useful_packages, rownames(installed.packages()))
invisible(lapply(useful_packages, require, character.only = TRUE, warn.conflicts = TRUE, quietly = TRUE))

spec <- matrix(c(
  "input_file", "i", 1, "character",
  "aqp_distribution", "a", 2, "logical",
  "protomer_distribution", "p", 2, "logical",
  "all_distribution", "d", 2, "logical",
  "pdf", "f", 2, "logical"
), byrow = TRUE, ncol = 4)
opt <- getopt(spec)

if (is.null(opt$input_file)) {
  print("A file containing an array must be given as input with the -f argument")
  quit(status = 1)
}

if (is.null(opt$aqp_distribution)) {
  opt$aqp_distribution <- TRUE
}

if (is.null(opt$protomer_distribution)) {
  opt$protomer_distribution <- TRUE
}

if (is.null(opt$all_distribution)) {
  opt$all_distribution <- TRUE
}

if (is.null(opt$pdf)) {
  opt$pdf <- TRUE
}

tibble_sort <- read.delim(opt$input_file)
tibble_sort <- as_tibble(tibble_sort)
colnames(tibble_sort) <- colnames(tibble_sort) %>%
  as_tibble() %>%
  mutate(value = str_replace_all(value, "\\.", "_"), value = gsub("_+Angstroms_", "", value), value = str_replace(value, "Time__ps_", "time"), value = str_replace(value, "_", "."), value = str_replace(value, "_", ".")) %>%
  unlist()
tibble_sort$time <- as.numeric(gsub("-[0-9]+.[0-9]+", "", tibble_sort$time))

tibble_sort_mean <- tibble_sort[, 1:9]
tibble_sort_std <-  tibble_sort[, c(1, 10:17)]
colnames(tibble_sort_mean) <- gsub("_mean", "", colnames(tibble_sort_mean))
colnames(tibble_sort_std) <- gsub("_std", "", colnames(tibble_sort_std))

tibble_sort_mean_long <- tibble_sort_mean %>%
  pivot_longer(cols = contains("AQP"), values_to = "distance") %>%
  separate(name, into = c("aqp", "protomer", "Couple"), sep = "[.]") %>%
  mutate(protomer = str_replace(protomer, "P", "Protomer "))

tibble_sort_std_long <- tibble_sort_std %>%
  pivot_longer(cols = contains("AQP"), values_to = "distance") %>%
  separate(name, into = c("aqp", "protomer", "Couple"), sep = "[.]") %>%
  mutate(protomer = str_replace(protomer, "P", "Protomer "))

create_ggplot <- function(tibble_mean, tibble_std, group, color, wrap, title) {
  tibble_mean_distance <- tibble_mean %>%
    group_by_at(group) %>%
    summarise(mean_distance = (mean(distance)))
  tibble_std_distance <- tibble_std %>%
    group_by_at(group) %>%
    summarise(std_distance = (mean(distance)))
  tibble_mean_std_distance <- inner_join(tibble_mean_distance, tibble_std_distance, by = group)
  g_distribution <- ggplot(tibble_mean_std_distance, aes(x = time / 1000, y = mean_distance)) +
    geom_line(aes(color = .data[[color]])) +
    geom_ribbon(aes(ymin = mean_distance - std_distance, ymax = mean_distance
                    + std_distance, fill = .data[[color]]), alpha = 0.3) +
    facet_wrap(wrap, nrow = 2) + theme(legend.position = "top") + guides(color = "none", fill = "none") +
    ggtitle(label = title, subtitle = "The envelope represents the standard deviation.") +
    ylab(label = "ArR-ArR distance (Ångströms)") + xlab(label = "Time (ns)")
  return(g_distribution)
}

list_ggplot <- list()

if (opt$aqp_distribution == TRUE) {
  group_aqp <- c("time", "aqp")
  color_aqp <- "aqp"
  wrap_aqp <- c("aqp")
  title_aqp <- "Average distance ArR-ArR by AQP (4 protomers)"
  g_aqp_distribution <- create_ggplot(tibble_sort_mean_long, tibble_sort_std_long, group_aqp, color_aqp, wrap_aqp, title_aqp)
  ggsave("Distance_distribution_by_aquaporin.png", g_aqp_distribution, width = 40, height = 20, units = "cm")
  list_ggplot[[1]] <- g_aqp_distribution
}

if (opt$protomer_distribution == TRUE) {
  group_protomer <- c("time", "protomer")
  color_protomer <- "protomer"
  wrap_protomer <- c("protomer")
  title_protomer <- "Average distance ArR-ArR by protomer (2 aquaporins)"
  g_protomer_distribution <- create_ggplot(tibble_sort_mean_long, tibble_sort_std_long, group_protomer, color_protomer, wrap_protomer, title_protomer)
  ggsave("Distance_distribution_by_protomer.png", g_protomer_distribution, width = 40, height = 20, units = "cm")
  list_ggplot[[2]] <- g_protomer_distribution
}

if (opt$all_distribution == TRUE) {
  group_all <- c("time", "aqp", "protomer")
  color_all <- "protomer"
  wrap_all <- c("aqp", "protomer")
  title_all <- "Average distance ArR-ArR by protomer and AQP"
  g_all_distribution <- create_ggplot(tibble_sort_mean_long, tibble_sort_std_long, group_all, color_all, wrap_all, title_all)
  ggsave("Distance_distribution_on_all_protomers.png", g_all_distribution, width = 40, height = 20, units = "cm")
  list_ggplot[[3]] <- g_all_distribution
}

if (opt$pdf == TRUE) {
  ggexport(list_ggplot, filename = "all_graphics_distribution.pdf", width = 18, heigh = 18)
}