diff frag4feature.R @ 1:c694d8172abf draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2e847122cf605951c334858455fc1d3ebdb189e9-dirty
author tomnl
date Tue, 27 Mar 2018 05:41:35 -0400
parents b6b051e7a5ef
children 146699c00d38
line wrap: on
line diff
--- a/frag4feature.R	Mon Mar 05 10:08:14 2018 -0500
+++ b/frag4feature.R	Tue Mar 27 05:41:35 2018 -0400
@@ -1,5 +1,45 @@
+library(optparse)
 library(msPurity)
-library(optparse)
+library(xcms)
+
+xset_pa_filename_fix <- function(opt, pa, xset){
+
+
+  if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)){
+    # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables 
+    # needs to be done due to Galaxy moving the files around and screwing up any links to files
+
+    filepaths <- trimws(strsplit(opt$mzML_files, ',')[[1]])
+    filepaths <- filepaths[filepaths != ""]
+    new_names <- basename(filepaths)
+
+    galaxy_names <- trimws(strsplit(opt$galaxy_names, ',')[[1]])
+    galaxy_names <- galaxy_names[galaxy_names != ""]
+
+    nsave <- names(pa@fileList)
+    old_filenames  <- basename(pa@fileList)
+    pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)]
+    names(pa@fileList) <- nsave
+
+    pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)])
+    pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)])
+  }
+
+
+ if(!all(basename(pa@fileList)==basename(xset@filepaths))){
+    if(!all(names(pa@fileList)==basename(xset@filepaths))){
+       print('FILELISTS DO NOT MATCH')
+       message('FILELISTS DO NOT MATCH')
+       quit(status = 1)
+    }else{
+      xset@filepaths <- unname(pa@fileList)
+    }
+  }
+
+
+  return(list(pa, xset))
+}
+
 
 option_list <- list(
   make_option(c("-o", "--out_dir"), type="character"),
@@ -9,6 +49,7 @@
   make_option("--plim", default=0.0),
   make_option("--convert2RawRT", action="store_true"),
   make_option("--mostIntense", action="store_true"),
+  make_option("--createDB", action="store_true"),
   make_option("--cores", default=4),
   make_option("--mzML_files", type="character"),
   make_option("--galaxy_names", type="character"),
@@ -30,6 +71,9 @@
 
 pa@cores <- opt$cores
 
+print(pa@fileList)
+print(xset@filepaths)
+
 if(is.null(opt$mostIntense)){
     mostIntense = FALSE
 }else{
@@ -42,64 +86,37 @@
     convert2RawRT= TRUE
 }
 
-# Makes sure the same files are being used
-if(!all(basename(pa@fileList)==basename(xset@filepaths))){
-  if(!all(names(pa@fileList)==basename(xset@filepaths))){
-    quit(status = 1)
-  }else{
-    xset@filepaths <- unname(pa@fileList)
-  }
+if(is.null(opt$createDB)){
+    createDB = FALSE
+}else{
+    createDB = TRUE
 }
 
-if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)){
-    # NOTE: This only works if the pa file was generated IN Galaxy!! Relies on
-    # the pa@fileList having the names of files given as 'names' of the variables (done in frag4feature)
-    # Will update in the next version of msPurity
-    filepaths <- trimws(strsplit(opt$mzML_files, ',')[[1]])
-    filepaths <- filepaths[filepaths != ""]
-    new_names <- basename(filepaths)
 
-    galaxy_names <- trimws(strsplit(opt$galaxy_names, ',')[[1]])
-    galaxy_names <- galaxy_names[galaxy_names != ""]
-
-    nsave <- names(pa@fileList)
-    old_filenames  <- basename(pa@fileList)
-    pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)]
-
-    pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)])
-    pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)])
-}
-
-if(!all(basename(pa@fileList)==basename(xset@filepaths))){
-  if(!all(names(pa@fileList)==basename(xset@filepaths))){
-    quit(status = 1)
-  }else{
-    xset@filepaths <- unname(pa@fileList)
-  }
-}
+fix <- xset_pa_filename_fix(opt, pa, xset)
+pa <- fix[[1]]
+xset <- fix[[2]]
 
 if(is.null(opt$grp_peaklist)){
     grp_peaklist = NA
+
+
 }else{
     grp_peaklist = opt$grp_peaklist
 }
 
-print(pa)
+print('heck')
 print(pa@fileList)
-print(xset)
+print(names(pa@fileList))
 print(xset@filepaths)
-print(opt$ppm)
-print(opt$plim)
-print(convert2RawRT)
-
-
-
+saveRDS(pa, 'test_pa.rds')
 
 pa <- msPurity::frag4feature(pa=pa, xset=xset, ppm=opt$ppm, plim=opt$plim,
                             intense=opt$mostIntense, convert2RawRT=convert2RawRT,
-                            db_name='alldata.sqlite', out_dir=opt$out_dir, grp_peaklist=grp_peaklist)
+                            db_name='alldata.sqlite', out_dir=opt$out_dir, grp_peaklist=grp_peaklist,
+                             create_db=createDB)
 
 save(pa, file=file.path(opt$out_dir, 'frag4feature.RData'))
 
 print(head(pa@grped_df))
-write.table(pa@grped_df, file.path(opt$out_dir, 'frag4feature.tsv'), row.names=FALSE, sep='\t')
\ No newline at end of file
+write.table(pa@grped_df, file.path(opt$out_dir, 'frag4feature.tsv'), row.names=FALSE, sep='\t')