Mercurial > repos > tomnl > mspurity_flagremove
comparison dimsPredictPuritySingle.R @ 15:b71677d4f958 draft default tip
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc
| author | tomnl |
|---|---|
| date | Wed, 27 Nov 2019 12:31:10 +0000 |
| parents | ca0ac330f1a4 |
| children |
comparison
equal
deleted
inserted
replaced
| 14:568e12663295 | 15:b71677d4f958 |
|---|---|
| 2 library(optparse) | 2 library(optparse) |
| 3 print(sessionInfo()) | 3 print(sessionInfo()) |
| 4 | 4 |
| 5 option_list <- list( | 5 option_list <- list( |
| 6 make_option(c("--mzML_file"), type="character"), | 6 make_option(c("--mzML_file"), type="character"), |
| 7 make_option(c("--mzML_files"), type="character"), | |
| 8 make_option(c("--mzML_filename"), type="character", default=''), | |
| 9 make_option(c("--mzML_galaxy_names"), type="character", default=''), | |
| 7 make_option(c("--peaks_file"), type="character"), | 10 make_option(c("--peaks_file"), type="character"), |
| 8 make_option(c("-o", "--out_dir"), type="character"), | 11 make_option(c("-o", "--out_dir"), type="character"), |
| 9 make_option("--minOffset", default=0.5), | 12 make_option("--minoffset", default=0.5), |
| 10 make_option("--maxOffset", default=0.5), | 13 make_option("--maxoffset", default=0.5), |
| 11 make_option("--ilim", default=0.05), | 14 make_option("--ilim", default=0.05), |
| 12 make_option("--ppm", default=4), | 15 make_option("--ppm", default=4), |
| 13 make_option("--dimspy", action="store_true"), | 16 make_option("--dimspy", action="store_true"), |
| 14 make_option("--sim", action="store_true"), | 17 make_option("--sim", action="store_true"), |
| 15 make_option("--remove_nas", action="store_true"), | 18 make_option("--remove_nas", action="store_true"), |
| 23 opt<- parse_args(OptionParser(option_list=option_list)) | 26 opt<- parse_args(OptionParser(option_list=option_list)) |
| 24 | 27 |
| 25 print(sessionInfo()) | 28 print(sessionInfo()) |
| 26 print(opt) | 29 print(opt) |
| 27 | 30 |
| 28 if (is.null(opt$dimspy)){ | 31 print(opt$mzML_files) |
| 32 print(opt$mzML_galaxy_names) | |
| 29 | 33 |
| 30 df <- read.table(opt$peaks_file, header = TRUE, sep='\t') | 34 str_to_vec <- function(x){ |
| 31 filename = NA | 35 print(x) |
| 32 mzml_file <- opt$mzML_file | 36 x <- trimws(strsplit(x, ',')[[1]]) |
| 33 }else{ | 37 return(x[x != ""]) |
| 34 indf <- read.table(opt$peaks_file, | 38 } |
| 35 header = TRUE, sep='\t', stringsAsFactors = FALSE) | |
| 36 | |
| 37 | 39 |
| 38 if (file.exists(opt$mzML_file)){ | 40 find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename){ |
| 39 mzml_file <- opt$mzML_file | 41 mzML_filename <- trimws(mzML_filename) |
| 40 }else{ | 42 mzML_files <- str_to_vec(mzML_files) |
| 41 | 43 galaxy_names <- str_to_vec(galaxy_names) |
| 42 filename = colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] | 44 if (mzML_filename %in% galaxy_names){ |
| 43 print(filename) | 45 return(mzML_files[galaxy_names==mzML_filename]) |
| 44 # check if the data file is mzML or RAW (can only use mzML currently) so | 46 }else{ |
| 45 # we expect an mzML file of the same name in the same folder | 47 stop(paste("mzML file not found - ", mzML_filename)) |
| 46 indf$i <- indf[,colnames(indf)==filename] | 48 } |
| 47 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename]) | 49 } |
| 48 | |
| 49 filename = sub("raw", "mzML", filename, ignore.case = TRUE) | |
| 50 print(filename) | |
| 51 | |
| 52 mzml_file <- file.path(opt$mzML_file, filename) | |
| 53 | |
| 54 } | |
| 55 | |
| 56 df <- indf[4:nrow(indf),] | |
| 57 if ('blank_flag' %in% colnames(df)){ | |
| 58 df <- df[df$blank_flag==1,] | |
| 59 } | |
| 60 colnames(df)[colnames(df)=='m.z'] <- 'mz' | |
| 61 | |
| 62 if ('nan' %in% df$mz){ | |
| 63 df[df$mz=='nan',]$mz <- NA | |
| 64 } | |
| 65 df$mz <- as.numeric(df$mz) | |
| 66 | |
| 67 | 50 |
| 68 | 51 |
| 69 | 52 if (is.null(opt$dimspy)){ |
| 53 df <- read.table(opt$peaks_file, header = TRUE, sep='\t') | |
| 54 if (file.exists(opt$mzML_file)){ | |
| 55 mzML_file <- opt$mzML_file | |
| 56 }else if (!is.null(opt$mzML_files)){ | |
| 57 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, | |
| 58 opt$mzML_filename) | |
| 59 }else{ | |
| 60 mzML_file <- file.path(opt$mzML_file, filename) | |
| 61 } | |
| 62 }else{ | |
| 63 indf <- read.table(opt$peaks_file, | |
| 64 header = TRUE, sep='\t', stringsAsFactors = FALSE) | |
| 65 | |
| 66 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] | |
| 67 print(filename) | |
| 68 # check if the data file is mzML or RAW (can only use mzML currently) so | |
| 69 # we expect an mzML file of the same name in the same folder | |
| 70 indf$i <- indf[,colnames(indf)==filename] | |
| 71 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename]) | |
| 72 | |
| 73 filename = sub("raw", "mzML", filename, ignore.case = TRUE) | |
| 74 print(filename) | |
| 75 | |
| 76 | |
| 77 if (file.exists(opt$mzML_file)){ | |
| 78 mzML_file <- opt$mzML_file | |
| 79 }else if (!is.null(opt$mzML_files)){ | |
| 80 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) | |
| 81 }else{ | |
| 82 mzML_file <- file.path(opt$mzML_file, filename) | |
| 83 } | |
| 84 | |
| 85 # Update the dimspy output with the correct information | |
| 86 df <- indf[4:nrow(indf),] | |
| 87 if ('blank_flag' %in% colnames(df)){ | |
| 88 df <- df[df$blank_flag==1,] | |
| 89 } | |
| 90 colnames(df)[colnames(df)=='m.z'] <- 'mz' | |
| 91 | |
| 92 if ('nan' %in% df$mz){ | |
| 93 df[df$mz=='nan',]$mz <- NA | |
| 94 } | |
| 95 df$mz <- as.numeric(df$mz) | |
| 70 } | 96 } |
| 71 | 97 |
| 72 if (!is.null(opt$remove_nas)){ | 98 if (!is.null(opt$remove_nas)){ |
| 73 df <- df[!is.na(df$mz),] | 99 df <- df[!is.na(df$mz),] |
| 74 } | 100 } |
| 84 isotopes <- FALSE | 110 isotopes <- FALSE |
| 85 }else{ | 111 }else{ |
| 86 isotopes <- TRUE | 112 isotopes <- TRUE |
| 87 } | 113 } |
| 88 | 114 |
| 89 | |
| 90 | |
| 91 if (is.null(opt$sim)){ | 115 if (is.null(opt$sim)){ |
| 92 sim=FALSE | 116 sim=FALSE |
| 93 }else{ | 117 }else{ |
| 94 sim=TRUE | 118 sim=TRUE |
| 95 } | 119 } |
| 96 | 120 |
| 97 minOffset = as.numeric(opt$minOffset) | 121 minOffset = as.numeric(opt$minoffset) |
| 98 maxOffset = as.numeric(opt$maxOffset) | 122 maxOffset = as.numeric(opt$maxoffset) |
| 99 | |
| 100 | |
| 101 | 123 |
| 102 if (opt$iwNorm=='none'){ | 124 if (opt$iwNorm=='none'){ |
| 103 iwNorm = FALSE | 125 iwNorm = FALSE |
| 104 iwNormFun = NULL | 126 iwNormFun = NULL |
| 105 }else if (opt$iwNorm=='gauss'){ | 127 }else if (opt$iwNorm=='gauss'){ |
| 113 iwNormFun = msPurity::iwNormQE.5() | 135 iwNormFun = msPurity::iwNormQE.5() |
| 114 } | 136 } |
| 115 | 137 |
| 116 print('FIRST ROWS OF PEAK FILE') | 138 print('FIRST ROWS OF PEAK FILE') |
| 117 print(head(df)) | 139 print(head(df)) |
| 118 print(mzml_file) | 140 print(mzML_file) |
| 119 predicted <- msPurity::dimsPredictPuritySingle(df$mz, | 141 predicted <- msPurity::dimsPredictPuritySingle(df$mz, |
| 120 filepth=mzml_file, | 142 filepth=mzML_file, |
| 121 minOffset=minOffset, | 143 minOffset=minOffset, |
| 122 maxOffset=maxOffset, | 144 maxOffset=maxOffset, |
| 123 ppm=opt$ppm, | 145 ppm=opt$ppm, |
| 124 mzML=TRUE, | 146 mzML=TRUE, |
| 125 sim = sim, | 147 sim = sim, |
| 131 ) | 153 ) |
| 132 predicted <- cbind(df, predicted) | 154 predicted <- cbind(df, predicted) |
| 133 | 155 |
| 134 print(head(predicted)) | 156 print(head(predicted)) |
| 135 print(file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv')) | 157 print(file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv')) |
| 136 write.table(predicted, file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'), row.names=FALSE, sep='\t') | 158 |
| 159 write.table(predicted, | |
| 160 file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'), | |
| 161 row.names=FALSE, sep='\t') | |
| 162 | |
| 163 |
