diff sequenza_to_hrdtools_input.R @ 0:0e54da14f831 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/snvtocnv commit fd049d0796cbfaa6c41a7d80e84d3a734b30acc8"
author artbio
date Thu, 18 Feb 2021 18:59:53 +0000
parents
children e299c477b475
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sequenza_to_hrdtools_input.R	Thu Feb 18 18:59:53 2021 +0000
@@ -0,0 +1,67 @@
+# load packages that are provided in the conda env
+options(show.error.messages = F, error = function() {
+        cat(geterrmessage(), file = stderr()); q("no", 1, F) })
+Sys.setenv(TZ = "Pacific/Auckland") # turnaround the tidyverse bug "In OlsonNames() : no Olson database found"
+
+library(optparse)
+library(tidyverse)
+
+option_list <- list(
+  make_option(
+    c("-i", "--input"),
+    default = NA,
+    type = "character",
+    help = "Path to Sequenza output segments file"
+  ),
+  make_option(
+    c("-o", "--output"),
+    default = NA,
+    type = "character",
+    help = "output file, to be used as input for HRDetect"
+  ),
+  make_option(
+    c("-s", "--solutions"),
+    default = NA,
+    type = "character",
+    help = "Path to Sequenza list of alternative solutions"
+  )
+)
+
+opt <- parse_args(OptionParser(option_list = option_list),
+                 args = commandArgs(trailingOnly = TRUE))
+
+sequenza_data <- as.tibble(read.delim(opt$input, header = TRUE))
+solutions_data <- as.tibble(read.delim(opt$solutions, header = TRUE))
+
+
+ploidy <- round(solutions_data$ploidy[1])
+cellularity <- solutions_data$cellularity[1]
+
+reformatted <- sequenza_data %>%
+  select(
+    chr = chromosome,
+    start = start.pos,
+    end = end.pos,
+    copynumber = CNt,
+    A, B
+  ) %>%
+  mutate(
+    ploidy = ploidy,
+    cellularity = cellularity,
+    lohtype = case_when(
+      copynumber == 0 ~ "HOMD",
+      B == 0 & A == ploidy ~ "NLOH",
+      B == 0 & A < ploidy & A > 0 ~ "DLOH",
+      copynumber > ploidy & A > B ~ "ASCNA",
+      copynumber > ploidy & A == B ~ "BCNA",
+      TRUE ~ "HET"
+    )
+  )
+
+message("Preview of output:")
+print(reformatted)
+
+reformatted %>%
+  write_tsv(opt$output)
+
+message(sprintf("Output written to %s", opt$output))