Mercurial > repos > tomnl > track_rt_raw
comparison anticipated_purity_lcms.R @ 0:b96fc0da0a32 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f9591483bef88aef71a9f0cc0ffc75cf6eab480c
| author | tomnl |
|---|---|
| date | Mon, 05 Mar 2018 10:05:57 -0500 |
| parents | |
| children | 26fd52ed6d21 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b96fc0da0a32 |
|---|---|
| 1 library(msPurity) | |
| 2 library(optparse) | |
| 3 | |
| 4 option_list <- list( | |
| 5 make_option(c("--xset_path"), type="character"), | |
| 6 make_option(c("-o", "--out_dir"), type="character"), | |
| 7 make_option(c("--mzML_path"), type="character"), | |
| 8 make_option("--minOffset", default=0.5), | |
| 9 make_option("--maxOffset", default=0.5), | |
| 10 make_option("--ilim", default=0.05), | |
| 11 make_option("--iwNorm", default="none", type="character"), | |
| 12 make_option("--exclude_isotopes", action="store_true"), | |
| 13 make_option("--isotope_matrix", type="character"), | |
| 14 make_option("--purityType", default="purityFWHMmedian"), | |
| 15 make_option("--singleFile", default=0), | |
| 16 make_option("--cores", default=4), | |
| 17 make_option("--xgroups", type="character"), | |
| 18 make_option("--rdata_name", default='xset'), | |
| 19 make_option("--camera_xcms", default='xset'), | |
| 20 make_option("--files", type="character"), | |
| 21 make_option("--galaxy_files", type="character"), | |
| 22 make_option("--choose_class", type="character"), | |
| 23 make_option("--ignore_files", type="character"), | |
| 24 make_option("--rtraw_columns", action="store_true") | |
| 25 ) | |
| 26 | |
| 27 # store options | |
| 28 opt<- parse_args(OptionParser(option_list=option_list)) | |
| 29 | |
| 30 print(sessionInfo()) | |
| 31 print(opt) | |
| 32 | |
| 33 if (!is.null(opt$xgroups)){ | |
| 34 xgroups = as.numeric(strsplit(opt$xgroups, ',')[[1]]) | |
| 35 }else{ | |
| 36 xgroups = NULL | |
| 37 } | |
| 38 | |
| 39 | |
| 40 | |
| 41 print(xgroups) | |
| 42 | |
| 43 if (!is.null(opt$remove_nas)){ | |
| 44 df <- df[!is.na(df$mz),] | |
| 45 } | |
| 46 | |
| 47 if (is.null(opt$isotope_matrix)){ | |
| 48 im <- NULL | |
| 49 }else{ | |
| 50 im <- read.table(opt$isotope_matrix, | |
| 51 header = TRUE, sep='\t', stringsAsFactors = FALSE) | |
| 52 } | |
| 53 | |
| 54 if (is.null(opt$exclude_isotopes)){ | |
| 55 isotopes <- FALSE | |
| 56 }else{ | |
| 57 isotopes <- TRUE | |
| 58 } | |
| 59 | |
| 60 if (is.null(opt$rtraw_columns)){ | |
| 61 rtraw_columns <- FALSE | |
| 62 }else{ | |
| 63 rtraw_columns <- TRUE | |
| 64 } | |
| 65 | |
| 66 loadRData <- function(rdata_path, xset_name){ | |
| 67 #loads an RData file, and returns the named xset object if it is there | |
| 68 load(rdata_path) | |
| 69 print(ls()) | |
| 70 return(get(ls()[ls() == xset_name])) | |
| 71 } | |
| 72 | |
| 73 target_obj <- loadRData(opt$xset_path, opt$rdata_name) | |
| 74 | |
| 75 if (opt$camera_xcms=='camera'){ | |
| 76 xset <- target_obj@xcmsSet | |
| 77 }else{ | |
| 78 xset <- target_obj | |
| 79 } | |
| 80 | |
| 81 print(xset) | |
| 82 | |
| 83 minOffset = as.numeric(opt$minOffset) | |
| 84 maxOffset = as.numeric(opt$maxOffset) | |
| 85 | |
| 86 | |
| 87 if (opt$iwNorm=='none'){ | |
| 88 iwNorm = FALSE | |
| 89 iwNormFun = NULL | |
| 90 }else if (opt$iwNorm=='gauss'){ | |
| 91 iwNorm = TRUE | |
| 92 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) | |
| 93 }else if (opt$iwNorm=='rcosine'){ | |
| 94 iwNorm = TRUE | |
| 95 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) | |
| 96 }else if (opt$iwNorm=='QE5'){ | |
| 97 iwNorm = TRUE | |
| 98 iwNormFun = msPurity::iwNormQE.5() | |
| 99 } | |
| 100 | |
| 101 print(xset@filepaths) | |
| 102 | |
| 103 | |
| 104 | |
| 105 if (!is.null(opt$files)){ | |
| 106 updated_filepaths <- trimws(strsplit(opt$files, ',')[[1]]) | |
| 107 updated_filepaths <- updated_filepaths[updated_filepaths != ""] | |
| 108 print(updated_filepaths) | |
| 109 updated_filenames = basename(updated_filepaths) | |
| 110 original_filenames = basename(xset@filepaths) | |
| 111 update_idx = match(updated_filenames, original_filenames) | |
| 112 | |
| 113 if (!is.null(opt$galaxy_files)){ | |
| 114 galaxy_files <- trimws(strsplit(opt$galaxy_files, ',')[[1]]) | |
| 115 galaxy_files <- galaxy_files[galaxy_files != ""] | |
| 116 xset@filepaths <- galaxy_files[update_idx] | |
| 117 }else{ | |
| 118 xset@filepaths <- updated_filepaths[update_idx] | |
| 119 } | |
| 120 } | |
| 121 | |
| 122 if (!is.null(opt$choose_class)){ | |
| 123 classes <- trimws(strsplit(opt$choose_class, ',')[[1]]) | |
| 124 | |
| 125 | |
| 126 ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes) | |
| 127 | |
| 128 print('choose class') | |
| 129 print(ignore_files_class) | |
| 130 }else{ | |
| 131 ignore_files_class <- NA | |
| 132 } | |
| 133 | |
| 134 if (!is.null(opt$ignore_files)){ | |
| 135 ignore_files_string <- trimws(strsplit(opt$ignore_files, ',')[[1]]) | |
| 136 filenames <- rownames(xset@phenoData) | |
| 137 ignore_files <- which(filenames %in% ignore_files_string) | |
| 138 | |
| 139 ignore_files <- unique(c(ignore_files, ignore_files_class)) | |
| 140 ignore_files <- ignore_files[ignore_files != ""] | |
| 141 }else{ | |
| 142 if (anyNA(ignore_files_class)){ | |
| 143 ignore_files <- NULL | |
| 144 }else{ | |
| 145 ignore_files <- ignore_files_class | |
| 146 } | |
| 147 | |
| 148 } | |
| 149 | |
| 150 print('ignore_files') | |
| 151 print(ignore_files) | |
| 152 | |
| 153 | |
| 154 ppLCMS <- msPurity::purityX(xset=xset, | |
| 155 offsets=c(minOffset, maxOffset), | |
| 156 cores=opt$cores, | |
| 157 xgroups=xgroups, | |
| 158 purityType=opt$purityType, | |
| 159 ilim = opt$ilim, | |
| 160 isotopes = isotopes, | |
| 161 im = im, | |
| 162 iwNorm = iwNorm, | |
| 163 iwNormFun = iwNormFun, | |
| 164 singleFile = opt$singleFile, | |
| 165 fileignore = ignore_files, | |
| 166 rtraw_columns=rtraw_columns) | |
| 167 | |
| 168 | |
| 169 dfp <- ppLCMS@predictions | |
| 170 | |
| 171 # to make compatable with deconrank | |
| 172 colnames(dfp)[colnames(dfp)=='grpid'] = 'peakID' | |
| 173 colnames(dfp)[colnames(dfp)=='median'] = 'medianPurity' | |
| 174 colnames(dfp)[colnames(dfp)=='mean'] = 'meanPurity' | |
| 175 colnames(dfp)[colnames(dfp)=='sd'] = 'sdPurity' | |
| 176 colnames(dfp)[colnames(dfp)=='stde'] = 'sdePurity' | |
| 177 colnames(dfp)[colnames(dfp)=='RSD'] = 'cvPurity' | |
| 178 colnames(dfp)[colnames(dfp)=='pknm'] = 'pknmPurity' | |
| 179 if(sum(is.na(dfp$medianPurity))>0){ | |
| 180 dfp[is.na(dfp$medianPurity),]$medianPurity = 0 | |
| 181 } | |
| 182 | |
| 183 print('saving tsv') | |
| 184 print(head(dfp)) | |
| 185 write.table(dfp, file.path(opt$out_dir, 'anticipated_purity_lcms.tsv'), row.names=FALSE, sep='\t') | |
| 186 print('saving RData') | |
| 187 save.image(file.path(opt$out_dir, 'anticipated_purity_lcms.RData')) |
