annotate msnbase_readmsdata.r @ 5:8705b9de3e6c draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit e131bacd37bfaf2c4132fd214c81db9b8a9df513
author lecorguille
date Mon, 17 Sep 2018 08:42:17 -0400
parents fa17ee3bc9ce
children 3d8a68b45153
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
1 #!/usr/bin/env Rscript
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
2
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
3 # ----- LOG FILE -----
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
4 log_file <- file("log.txt", open="wt")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
5 sink(log_file)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
6 sink(log_file, type = "output")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
7
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
8
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
9 # ----- PACKAGE -----
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
10 cat("\tSESSION INFO\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
11
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
12 #Import the different functions
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
13 source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
14 source_local("lib.r")
5
8705b9de3e6c planemo upload for repository https://github.com/workflow4metabolomics/xcms commit e131bacd37bfaf2c4132fd214c81db9b8a9df513
lecorguille
parents: 2
diff changeset
15 source_local("lib-xcms3.x.x.r")
0
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
16
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
17 pkgs <- c("MSnbase","batch")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
18 loadAndDisplayPackages(pkgs)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
19 cat("\n\n");
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
20
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
21
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
22 # ----- ARGUMENTS -----
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
23 cat("\tARGUMENTS INFO\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
24 args <- parseCommandArgs(evaluate = FALSE) #interpretation of arguments given in command line as an R list of objects
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
25 write.table(as.matrix(args), col.names=F, quote=F, sep='\t')
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
26
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
27 cat("\n\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
28
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
29
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
30 # ----- PROCESSING INFILE -----
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
31 cat("\tARGUMENTS PROCESSING INFO\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
32
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
33
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
34 cat("\n\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
35
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
36 # ----- INFILE PROCESSING -----
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
37 cat("\tINFILE PROCESSING INFO\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
38
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
39 # Handle infiles
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
40 if (!exists("singlefile")) singlefile <- NULL
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
41 if (!exists("zipfile")) zipfile <- NULL
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
42 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
43 zipfile <- rawFilePath$zipfile
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
44 singlefile <- rawFilePath$singlefile
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
45 directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
46
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
47 # Check some character issues
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
48 md5sumList <- list("origin" = getMd5sum(directory))
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
49 checkXmlStructure(directory)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
50 checkFilesCompatibilityWithXcms(directory)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
51
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
52
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
53 cat("\n\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
54
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
55
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
56 # ----- MAIN PROCESSING INFO -----
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
57 cat("\tMAIN PROCESSING INFO\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
58
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
59
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
60 cat("\t\tCOMPUTE\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
61
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
62 ## Get the full path to the files
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
63 files <- getMSFiles(directory)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
64
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
65 cat("\t\t\tCreate a phenodata data.frame\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
66 s_groups <- sapply(files, function(x) tail(unlist(strsplit(dirname(x),"/")), n=1))
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
67 s_name <- tools::file_path_sans_ext(basename(files))
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
68 pd <- data.frame(sample_name=s_name, sample_group=s_groups, stringsAsFactors=FALSE)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
69 print(pd)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
70
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
71 cat("\t\t\tLoad Raw Data\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
72 raw_data <- readMSData(files=files, pdata = new("NAnnotatedDataFrame", pd), mode="onDisk")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
73
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
74 # Transform the files absolute pathways into relative pathways
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
75 raw_data@processingData@files <- sub(paste(getwd(), "/", sep="") , "", raw_data@processingData@files)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
76
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
77 # Create a sampleMetada file
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
78 sampleNamesList <- getSampleMetadata(xdata=raw_data, sampleMetadataOutput="sampleMetadata.tsv")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
79
2
fa17ee3bc9ce planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 5e066c50d2e4ac6d5796b53331cbd3198ab8c4f9-dirty
lecorguille
parents: 0
diff changeset
80 cat("\t\t\tCompute and Store TIC and BPI\n")
fa17ee3bc9ce planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 5e066c50d2e4ac6d5796b53331cbd3198ab8c4f9-dirty
lecorguille
parents: 0
diff changeset
81 chromTIC <- chromatogram(raw_data, aggregationFun = "sum")
fa17ee3bc9ce planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 5e066c50d2e4ac6d5796b53331cbd3198ab8c4f9-dirty
lecorguille
parents: 0
diff changeset
82 chromBPI <- chromatogram(raw_data, aggregationFun = "max")
fa17ee3bc9ce planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 5e066c50d2e4ac6d5796b53331cbd3198ab8c4f9-dirty
lecorguille
parents: 0
diff changeset
83
0
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
84 cat("\n\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
85
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
86 # ----- EXPORT -----
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
87
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
88 cat("\tMSnExp OBJECT INFO\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
89 print(raw_data)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
90 cat("\t\tphenoData\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
91 print(raw_data@phenoData@data)
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
92 cat("\n\n")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
93
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
94 #saving R data in .Rdata file to save the variables used in the present tool
2
fa17ee3bc9ce planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 5e066c50d2e4ac6d5796b53331cbd3198ab8c4f9-dirty
lecorguille
parents: 0
diff changeset
95 objects2save <- c("raw_data", "zipfile", "singlefile", "md5sumList", "sampleNamesList", "chromTIC", "chromBPI")
0
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
96 save(list=objects2save[objects2save %in% ls()], file="readmsdata.RData")
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
97
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
98
24f7d4e5c186 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a-dirty
lecorguille
parents:
diff changeset
99 cat("\tDONE\n")