diff h-concatenate.R @ 0:6a2bb42acfe4 draft

planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit 6c48bd51987a28401de6cf5e49b1b30e5e73fe16-dirty
author tomnl
date Tue, 27 Mar 2018 06:53:36 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/h-concatenate.R	Tue Mar 27 06:53:36 2018 -0400
@@ -0,0 +1,63 @@
+library(optparse)
+
+option_list <- list(
+  make_option(c("-f", "--files"), type="character"),
+  make_option(c("-c", "--columns"), type="character"),
+  make_option(c("-o", "--out_dir"), type="character"),
+  make_option(c("-u", "--uc"), type="character")
+)
+
+# store options
+opt<- parse_args(OptionParser(option_list=option_list),  args = commandArgs(trailingOnly = TRUE))
+
+print(sessionInfo())
+print(opt)
+
+file_paths <- trimws(strsplit(opt$files, ';')[[1]])
+file_paths <- file_paths[file_paths != ""]
+
+columns <- trimws(strsplit(opt$columns, ';')[[1]])
+columns <- columns[columns != ""]
+
+columns <- lapply(columns, function(x){trimws(strsplit(x, ',')[[1]])})
+
+
+readIn <- function(f, c){
+  print('#### reading in file with columns ####')
+  print(f)
+  print(c)
+  df <- read.table(f, header = TRUE, sep='\t', stringsAsFactors=FALSE)
+  if (sum(colnames(df) %in% c)==0){
+    print('PLEASE CHECK: Selected columns not in file!!')
+    print(colnames(df))
+    print(c)
+    q()
+  }
+
+  df <- df[ , (colnames(df) %in% c)]
+  if (length(c)==1){
+    df <- data.frame(df)
+    colnames(df) <- c
+  }
+
+  return(df)
+}
+
+m <- mapply(readIn, file_paths, columns, SIMPLIFY = FALSE)
+
+m <- unname(m)
+merged <- do.call(cbind, m)
+
+if(!is.null(opt$uc)){
+  uc <- trimws(strsplit(opt$uc, ',')[[1]])
+  uc <- uc[uc != ""]
+  print(colnames(merged))
+  print(uc)
+  merged <- merged[uc]
+}
+
+print(head(merged))
+
+write.table(merged, file.path(opt$out_dir, 'combined_table.tsv'), row.names=FALSE, sep='\t')
+
+