comparison dimsPredictPuritySingle.R @ 14:1389e565f95e draft default tip

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc
author tomnl
date Wed, 27 Nov 2019 12:37:27 +0000
parents fc9bb7e49a3a
children
comparison
equal deleted inserted replaced
13:4164de21d178 14:1389e565f95e
2 library(optparse) 2 library(optparse)
3 print(sessionInfo()) 3 print(sessionInfo())
4 4
5 option_list <- list( 5 option_list <- list(
6 make_option(c("--mzML_file"), type="character"), 6 make_option(c("--mzML_file"), type="character"),
7 make_option(c("--mzML_files"), type="character"),
8 make_option(c("--mzML_filename"), type="character", default=''),
9 make_option(c("--mzML_galaxy_names"), type="character", default=''),
7 make_option(c("--peaks_file"), type="character"), 10 make_option(c("--peaks_file"), type="character"),
8 make_option(c("-o", "--out_dir"), type="character"), 11 make_option(c("-o", "--out_dir"), type="character"),
9 make_option("--minOffset", default=0.5), 12 make_option("--minoffset", default=0.5),
10 make_option("--maxOffset", default=0.5), 13 make_option("--maxoffset", default=0.5),
11 make_option("--ilim", default=0.05), 14 make_option("--ilim", default=0.05),
12 make_option("--ppm", default=4), 15 make_option("--ppm", default=4),
13 make_option("--dimspy", action="store_true"), 16 make_option("--dimspy", action="store_true"),
14 make_option("--sim", action="store_true"), 17 make_option("--sim", action="store_true"),
15 make_option("--remove_nas", action="store_true"), 18 make_option("--remove_nas", action="store_true"),
23 opt<- parse_args(OptionParser(option_list=option_list)) 26 opt<- parse_args(OptionParser(option_list=option_list))
24 27
25 print(sessionInfo()) 28 print(sessionInfo())
26 print(opt) 29 print(opt)
27 30
28 if (is.null(opt$dimspy)){ 31 print(opt$mzML_files)
32 print(opt$mzML_galaxy_names)
29 33
30 df <- read.table(opt$peaks_file, header = TRUE, sep='\t') 34 str_to_vec <- function(x){
31 filename = NA 35 print(x)
32 mzml_file <- opt$mzML_file 36 x <- trimws(strsplit(x, ',')[[1]])
33 }else{ 37 return(x[x != ""])
34 indf <- read.table(opt$peaks_file, 38 }
35 header = TRUE, sep='\t', stringsAsFactors = FALSE)
36
37 39
38 if (file.exists(opt$mzML_file)){ 40 find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename){
39 mzml_file <- opt$mzML_file 41 mzML_filename <- trimws(mzML_filename)
40 }else{ 42 mzML_files <- str_to_vec(mzML_files)
41 43 galaxy_names <- str_to_vec(galaxy_names)
42 filename = colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] 44 if (mzML_filename %in% galaxy_names){
43 print(filename) 45 return(mzML_files[galaxy_names==mzML_filename])
44 # check if the data file is mzML or RAW (can only use mzML currently) so 46 }else{
45 # we expect an mzML file of the same name in the same folder 47 stop(paste("mzML file not found - ", mzML_filename))
46 indf$i <- indf[,colnames(indf)==filename] 48 }
47 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename]) 49 }
48
49 filename = sub("raw", "mzML", filename, ignore.case = TRUE)
50 print(filename)
51
52 mzml_file <- file.path(opt$mzML_file, filename)
53
54 }
55
56 df <- indf[4:nrow(indf),]
57 if ('blank_flag' %in% colnames(df)){
58 df <- df[df$blank_flag==1,]
59 }
60 colnames(df)[colnames(df)=='m.z'] <- 'mz'
61
62 if ('nan' %in% df$mz){
63 df[df$mz=='nan',]$mz <- NA
64 }
65 df$mz <- as.numeric(df$mz)
66
67 50
68 51
69 52 if (is.null(opt$dimspy)){
53 df <- read.table(opt$peaks_file, header = TRUE, sep='\t')
54 if (file.exists(opt$mzML_file)){
55 mzML_file <- opt$mzML_file
56 }else if (!is.null(opt$mzML_files)){
57 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names,
58 opt$mzML_filename)
59 }else{
60 mzML_file <- file.path(opt$mzML_file, filename)
61 }
62 }else{
63 indf <- read.table(opt$peaks_file,
64 header = TRUE, sep='\t', stringsAsFactors = FALSE)
65
66 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy]
67 print(filename)
68 # check if the data file is mzML or RAW (can only use mzML currently) so
69 # we expect an mzML file of the same name in the same folder
70 indf$i <- indf[,colnames(indf)==filename]
71 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename])
72
73 filename = sub("raw", "mzML", filename, ignore.case = TRUE)
74 print(filename)
75
76
77 if (file.exists(opt$mzML_file)){
78 mzML_file <- opt$mzML_file
79 }else if (!is.null(opt$mzML_files)){
80 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename)
81 }else{
82 mzML_file <- file.path(opt$mzML_file, filename)
83 }
84
85 # Update the dimspy output with the correct information
86 df <- indf[4:nrow(indf),]
87 if ('blank_flag' %in% colnames(df)){
88 df <- df[df$blank_flag==1,]
89 }
90 colnames(df)[colnames(df)=='m.z'] <- 'mz'
91
92 if ('nan' %in% df$mz){
93 df[df$mz=='nan',]$mz <- NA
94 }
95 df$mz <- as.numeric(df$mz)
70 } 96 }
71 97
72 if (!is.null(opt$remove_nas)){ 98 if (!is.null(opt$remove_nas)){
73 df <- df[!is.na(df$mz),] 99 df <- df[!is.na(df$mz),]
74 } 100 }
84 isotopes <- FALSE 110 isotopes <- FALSE
85 }else{ 111 }else{
86 isotopes <- TRUE 112 isotopes <- TRUE
87 } 113 }
88 114
89
90
91 if (is.null(opt$sim)){ 115 if (is.null(opt$sim)){
92 sim=FALSE 116 sim=FALSE
93 }else{ 117 }else{
94 sim=TRUE 118 sim=TRUE
95 } 119 }
96 120
97 minOffset = as.numeric(opt$minOffset) 121 minOffset = as.numeric(opt$minoffset)
98 maxOffset = as.numeric(opt$maxOffset) 122 maxOffset = as.numeric(opt$maxoffset)
99
100
101 123
102 if (opt$iwNorm=='none'){ 124 if (opt$iwNorm=='none'){
103 iwNorm = FALSE 125 iwNorm = FALSE
104 iwNormFun = NULL 126 iwNormFun = NULL
105 }else if (opt$iwNorm=='gauss'){ 127 }else if (opt$iwNorm=='gauss'){
113 iwNormFun = msPurity::iwNormQE.5() 135 iwNormFun = msPurity::iwNormQE.5()
114 } 136 }
115 137
116 print('FIRST ROWS OF PEAK FILE') 138 print('FIRST ROWS OF PEAK FILE')
117 print(head(df)) 139 print(head(df))
118 print(mzml_file) 140 print(mzML_file)
119 predicted <- msPurity::dimsPredictPuritySingle(df$mz, 141 predicted <- msPurity::dimsPredictPuritySingle(df$mz,
120 filepth=mzml_file, 142 filepth=mzML_file,
121 minOffset=minOffset, 143 minOffset=minOffset,
122 maxOffset=maxOffset, 144 maxOffset=maxOffset,
123 ppm=opt$ppm, 145 ppm=opt$ppm,
124 mzML=TRUE, 146 mzML=TRUE,
125 sim = sim, 147 sim = sim,
131 ) 153 )
132 predicted <- cbind(df, predicted) 154 predicted <- cbind(df, predicted)
133 155
134 print(head(predicted)) 156 print(head(predicted))
135 print(file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv')) 157 print(file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'))
136 write.table(predicted, file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'), row.names=FALSE, sep='\t') 158
159 write.table(predicted,
160 file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'),
161 row.names=FALSE, sep='\t')
162
163