Mercurial > repos > artbio > ngsplot
diff lib/parse.args.r @ 0:3ca58369469c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/ngsplot commit b'e9fcc157a7f2f2fa9d6ac9a58d425ff17c975f5c\n'
author | artbio |
---|---|
date | Wed, 06 Dec 2017 19:01:53 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/parse.args.r Wed Dec 06 19:01:53 2017 -0500 @@ -0,0 +1,521 @@ +# Parse command line arguments and extract them into an associate array. +# Check if the required arguments are all satisfied. +parseArgs <- function(args, manditories) { + if(length(args) %% 2 == 1 || length(args) == 0) { + cat('Unpaired argument and value.\n') + return(NULL) + } + n.i <- seq(1, length(args), by=2) + v.i <- seq(2, length(args), by=2) + args.name <- args[n.i] + args.value <- args[v.i] + + # Check if required argument values are supplied. + miss_tag <- F + man.bool <- manditories %in% args.name + if(!all(man.bool)){ + cat(paste('Missing argument: ', paste(manditories[!man.bool], + collapse=','), '.', sep='') + ) + miss_tag <- T + } + if(miss_tag){ + res <- NULL + }else{ + res <- args.value + names(res) <- args.name + } + res +} + +ConfigTbl <- function(args.tbl, fraglen) { +# Create configuration table from "-C" argument. +# Args: +# args.tbl: named vector of program arguments. +# fraglen: fragment length. +# Returns: dataframe of configuration. + + covfile <- args.tbl['-C'] + + suppressWarnings( + ctg.tbl <- tryCatch( + read.table(covfile, colClasses='character', comment.char='#'), + error=function(e) { + if('-E' %in% names(args.tbl)) { + glist <- args.tbl['-E'] + } else { + glist <- '-1' + } + if('-T' %in% names(args.tbl)) { + title <- args.tbl['-T'] + } else { + title <- 'Noname' + } + data.frame(cov=covfile, glist=glist, title=title, + fraglen=as.character(fraglen), + color=NA, stringsAsFactors=F) + } + ) + ) + + # Read a config file. + if(ncol(ctg.tbl) < 3) { + stop("Configuration file must contain at least 3 columns! +Insufficient information provided.\n") + } + colnames(ctg.tbl)[1:3] <- c('cov', 'glist', 'title') + if(ncol(ctg.tbl) >= 4) { + colnames(ctg.tbl)[4] <- 'fraglen' + fraglen.sp <- strsplit(ctg.tbl$fraglen, ":") + if(!all(sapply(fraglen.sp, function(x) { + length(x) == 1 || length(x) == 2}))) { + stop("Fragment length format must be X or X:Y; X and Y are +integers.\n") + } + if(!all(as.integer(unlist(fraglen.sp)) > 0)) { + stop("Fragment length must be positive integers! Check your +configuration file.\n") + } + } else { + ctg.tbl <- data.frame(ctg.tbl, fraglen=as.character(fraglen), + stringsAsFactors=F) + } + if(ncol(ctg.tbl) >= 5) { + colnames(ctg.tbl)[5] <- 'color' + # Validate color specifications. + col.validated <- col2rgb(ctg.tbl$color) + } else { + ctg.tbl <- data.frame(ctg.tbl, color=NA) + } + ctg.tbl +} + +CheckRegionAllowed <- function(reg2plot, anno.tbl) { +# Check if region to plot is an allowed value. + + region.allowed <- c(as.vector(unique(anno.tbl$Region)), "bed") + if(!reg2plot %in% region.allowed) { + stop(paste(c("Unknown region specified. Must be one of:", + region.allowed, "\n"), collapse=" ")) + } +} + +CoverageVars <- function(args.tbl, reg2plot) { +# Setup variables from program arguments. +# Args: +# args.tbl: named vector of program arguments. +# reg2plot: string describing region to plot. +# Returns: list of variables. + + vl <- list() # vl: configured variable list + + #### Switch for debug #### + if('-Debug' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-Debug']) >= 0) + vl$debug <- as.integer(args.tbl['-Debug']) + } else { + vl$debug <- as.integer(0) + } + + #### Switch for Galaxy usage #### + if('-Galaxy' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-Galaxy']) >= 0) + vl$galaxy <- as.integer(args.tbl['-Galaxy']) + vl$avgname <- args.tbl['-O2'] + vl$heatmapname <- args.tbl['-O3'] + } else { + vl$galaxy <- as.integer(0) + } + + ######## Coverage-generation parameters ######## + #### Flanking region size. #### + if('-L' %in% names(args.tbl)){ + vl$flanksize <- as.integer(args.tbl['-L']) + stopifnot(vl$flanksize >= 0) + } else { + flank.tbl <- setNames( + c(2000, 2000, 2000, 500, 500, 1500, 1000, 1000), + c('tss','tes','genebody','exon','cgi', 'enhancer', 'dhs','bed')) + vl$flanksize <- as.integer(flank.tbl[reg2plot]) + } + + #### Flanking size factor. #### + if('-N' %in% names(args.tbl) && !('-L' %in% names(args.tbl))) { + stopifnot(as.numeric(args.tbl['-N']) >= 0) + vl$flankfactor <- as.numeric(args.tbl['-N']) + } else { + vl$flankfactor <- 0.0 + } + + #### Robust statistics. #### + if('-RB' %in% names(args.tbl)){ + stopifnot(as.numeric(args.tbl['-RB']) >= 0) + vl$robust <- as.numeric(args.tbl['-RB']) + }else{ + vl$robust <- .0 # percentage to be trimmed on both ends. + } + + #### Random sampling rate. #### + if('-S' %in% names(args.tbl)){ + vl$samprate <- as.numeric(args.tbl['-S']) + stopifnot(vl$samprate > 0 && vl$samprate <= 1) + }else{ + vl$samprate <- 1.0 + } + + ##### Set cores number. #### + if('-P' %in% names(args.tbl)){ + stopifnot(as.integer(args.tbl['-P']) >= 0) + vl$cores.number <- as.integer(args.tbl['-P']) + }else{ + vl$cores.number <- as.integer(0) + } + + #### Algorithm for coverage vector normalization #### + if('-AL' %in% names(args.tbl)) { + vl$cov.algo <- args.tbl['-AL'] + al.allowed <- c('spline', 'bin') + stopifnot(vl$cov.algo %in% al.allowed) + } else { + vl$cov.algo <- 'spline' + } + + #### Gene chunk size #### + if('-CS' %in% names(args.tbl)) { + vl$gcs <- as.integer(args.tbl['-CS']) + stopifnot(vl$gcs > 0) + } else { + vl$gcs <- 100 + } + + #### Mapping quality cutoff #### + if('-MQ' %in% names(args.tbl)) { + vl$map.qual <- as.integer(args.tbl['-MQ']) + stopifnot(vl$map.qual >= 0) + } else { + vl$map.qual <- 20 + } + + #### Fragment length #### + if('-FL' %in% names(args.tbl)) { + vl$fraglen <- as.integer(args.tbl['-FL']) + stopifnot(vl$fraglen > 0) + } else { + vl$fraglen <- 150 + } + vl$bufsize <- vl$fraglen # buffer added to both ends of the coverage vec. + + #### Strand-specific coverage #### + if('-SS' %in% names(args.tbl)) { + spec.allowed <- c('both', 'same', 'opposite') + stopifnot(args.tbl['-SS'] %in% spec.allowed) + vl$strand.spec <- args.tbl['-SS'] + } else { + vl$strand.spec <- 'both' + } + + #### Shall interval size be larger than flanking size? #### + if('-IN' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-IN']) >= 0) + vl$inttag <- as.integer(args.tbl['-IN']) + } else { + vl$inttag <- NA + } + + #### Image output forbidden tag. #### + if('-FI' %in% names(args.tbl)){ + stopifnot(as.integer(args.tbl['-FI']) >= 0) + vl$fi_tag <- as.integer(args.tbl['-FI']) + }else{ + vl$fi_tag <- as.integer(0) + } + + vl +} + +PlotVars <- function(args.tbl, existing.vl=vector('character'), + prof.misc=list(), low.count=NULL, go.paras=list()) { +# Setup replot variables. +# Args: +# args.tbl: argument table. +# existing.vl: existing variable name character list. +# prof.misc: misc. avg prof variable list. +# go.paras: gene ordering parameters. +# Returns: list of updated variables. + + ## Plotting-related parameters: + updated.vl <- list() + ### Misc. parameters: + #### Font size. #### + if('-FS' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-FS']) > 0) + updated.vl$font.size <- as.integer(args.tbl['-FS']) + } else if(!'font.size' %in% existing.vl) { + updated.vl$font.size <- 12 + } + + ### Avg. profiles parameters: + #### Plot width. #### + if('-WD' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-WD']) > 0) + updated.vl$plot.width <- as.integer(args.tbl['-WD']) + } else if(!'plot.width' %in% existing.vl) { + updated.vl$plot.width <- 8 + } + + #### Plot height. #### + if('-HG' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-HG']) > 0) + updated.vl$plot.height <- as.integer(args.tbl['-HG']) + } else if(!'plot.height' %in% existing.vl) { + updated.vl$plot.height <- 7 + } + + #### Shall standard errors be plotted around average profiles? #### + if('-SE' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-SE']) >= 0) + updated.vl$se <- as.integer(args.tbl['-SE']) + } else if(!'se' %in% existing.vl) { + updated.vl$se <- 1 + } + + #### Smooth function radius. #### + if('-MW' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-MW']) >= 1) + updated.vl$mw <- as.integer(args.tbl['-MW']) + } else if(!'mw' %in% existing.vl) { + updated.vl$mw <- 1 + } + + #### Shaded area alpha. #### + if('-H' %in% names(args.tbl)) { + stopifnot(as.numeric(args.tbl['-H']) >= 0 && + as.numeric(args.tbl['-H']) < 1) + updated.vl$shade.alp <- as.numeric(args.tbl['-H']) + } else if(!'shade.alp' %in% existing.vl) { + updated.vl$shade.alp <- 0 + } + + #### Misc. options for avg. profiles. #### + updated.vl$prof.misc <- prof.misc + if('-YAS' %in% names(args.tbl)) { + ystr <- args.tbl['-YAS'] + if(ystr != 'auto') { + yp <- unlist(strsplit(ystr, ',')) + if(length(yp) == 2) { + y.min <- as.numeric(yp[1]) + y.max <- as.numeric(yp[2]) + stopifnot(y.min < y.max) + } else { + stop("-YAS must be 'auto' or a pair of numerics separated +by ','\n") + } + updated.vl$prof.misc$yscale <- c(y.min, y.max) + } else { + updated.vl$prof.misc$yscale <- 'auto' + } + } else if(!'yscale' %in% names(prof.misc)) { + updated.vl$prof.misc$yscale <- 'auto' + } + if('-LEG' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-LEG']) >= 0) + updated.vl$prof.misc$legend <- as.integer(args.tbl['-LEG']) + } else if(!'legend' %in% names(prof.misc)) { + updated.vl$prof.misc$legend <- T + } + if('-BOX' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-BOX']) >= 0) + updated.vl$prof.misc$box <- as.integer(args.tbl['-BOX']) + } else if(!'box' %in% names(prof.misc)) { + updated.vl$prof.misc$box <- T + } + if('-VLN' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-VLN']) >= 0) + updated.vl$prof.misc$vline <- as.integer(args.tbl['-VLN']) + } else if(!'vline' %in% names(prof.misc)) { + updated.vl$prof.misc$vline <- T + } + if('-XYL' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-XYL']) >= 0) + updated.vl$prof.misc$xylab <- as.integer(args.tbl['-XYL']) + } else if(!'xylab' %in% names(prof.misc)) { + updated.vl$prof.misc$xylab <- T + } + if('-LWD' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-LWD']) > 0) + updated.vl$prof.misc$line.wd <- as.integer(args.tbl['-LWD']) + } else if(!'line.wd' %in% names(prof.misc)) { + updated.vl$prof.misc$line.wd <- 3 + } + + ### Heatmap parameters: + #### Gene order algorithm #### + if('-GO' %in% names(args.tbl)){ + go.allowed <- c('total', 'max', 'prod', 'diff', 'hc', 'none', 'km') + stopifnot(args.tbl['-GO'] %in% go.allowed) + updated.vl$go.algo <- args.tbl['-GO'] + } else if(!'go.algo' %in% existing.vl){ + updated.vl$go.algo <- 'total' # hierarchical clustering. + } + + #### Reduce ratio to control a heatmap height #### + if('-RR' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-RR']) > 0) + updated.vl$rr <- as.integer(args.tbl['-RR']) + } else if(!'rr' %in% existing.vl) { + updated.vl$rr <- 30 + } + + #### Color scale string. #### + if('-SC' %in% names(args.tbl)) { + if(!args.tbl['-SC'] %in% c('local', 'region', 'global')) { + scale.pair <- unlist(strsplit(args.tbl['-SC'], ",")) + if(length(scale.pair) != 2 || is.na(as.numeric(scale.pair[1])) || + is.na(as.numeric(scale.pair[2]))) { + stop("Color scale format error: must be local, region, global +or a pair of numerics separated by ','\n") + } + } + updated.vl$color.scale <- args.tbl['-SC'] + } else if(!'color.scale' %in% existing.vl) { + updated.vl$color.scale <- 'local' + } + + #### Flooding fraction. #### + if('-FC' %in% names(args.tbl)) { + stopifnot(as.numeric(args.tbl['-FC']) >= 0 && + as.numeric(args.tbl['-FC']) < 1) + updated.vl$flood.frac <- as.numeric(args.tbl['-FC']) + } else if(!'flood.frac' %in% existing.vl) { + updated.vl$flood.frac <- .02 + } + + #### Heatmap color. #### + if('-CO' %in% names(args.tbl)) { + updated.vl$hm.color <- as.character(args.tbl['-CO']) + } else if(!'hm.color' %in% existing.vl){ + updated.vl$hm.color <- "default" + } + + #### Color distribution. #### + if('-CD' %in% names(args.tbl)) { + stopifnot(as.numeric(args.tbl['-CD']) > 0) + updated.vl$color.distr <- as.numeric(args.tbl['-CD']) + } else if(!'color.distr' %in% existing.vl) { + updated.vl$color.distr <- .6 + } + + #### Low count cutoff for rank-based normalization #### + if('-LOW' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-LOW']) >= 0) + updated.vl$low.count <- as.integer(args.tbl['-LOW']) + } else if(!'low.count' %in% existing.vl) { + updated.vl$low.count <- 10 + } else { # ensure low.count is not empty. + updated.vl$low.count <- low.count + } + if(!is.null(low.count)) { + updated.vl$low.count.ratio <- updated.vl$low.count / low.count + } + + #### Misc. options for heatmap. #### + updated.vl$go.paras <- go.paras + if('-KNC' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-KNC']) > 0) + updated.vl$go.paras$knc <- as.integer(args.tbl['-KNC']) + } else if(!'knc' %in% names(go.paras)) { + updated.vl$go.paras$knc <- 5 + } + if('-MIT' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-MIT']) > 0) + updated.vl$go.paras$max.iter <- as.integer(args.tbl['-MIT']) + } else if(!'max.iter' %in% names(go.paras)) { + updated.vl$go.paras$max.iter <- 20 + } + if('-NRS' %in% names(args.tbl)) { + stopifnot(as.integer(args.tbl['-NRS']) > 0) + updated.vl$go.paras$nrs <- as.integer(args.tbl['-NRS']) + } else if(!'nrs' %in% names(go.paras)) { + updated.vl$go.paras$nrs <- 30 + } + + + updated.vl +} + +CheckHMColorConfig <- function(hm.color, bam.pair) { + if(hm.color != 'default') { + v.colors <- unlist(strsplit(hm.color, ":")) + if(bam.pair && length(v.colors) != 2 && length(v.colors) != 3 || + !bam.pair && length(v.colors) != 1) { + stop("Heatmap color specifications must correspond to bam-pair!\n") + } + } +} + +EchoPlotArgs <- function() { + cat("## Plotting-related parameters:\n") + cat("### Misc. parameters:\n") + cat(" -FS Font size(default=12)\n") + cat("### Avg. profiles parameters:\n") + cat(" -WD Image width(default=8)\n") + cat(" -HG Image height(default=7)\n") + cat(" -SE Shall standard errors be plotted?(0 or 1)\n") + cat(" -MW Moving window width to smooth avg. profiles, must be integer\n") + cat(" 1=no(default); 3=slightly; 5=somewhat; 9=quite; 13=super.\n") + cat(" -H Opacity of shaded area, suggested value:[0, 0.5]\n") + cat(" default=0, i.e. no shading, just lines\n") + cat(" -YAS Y-axis scale: auto(default) or min_val,max_val(custom scale)\n") + cat(" -LEG Draw legend? 1(default) or 0\n") + cat(" -BOX Draw box around plot? 1(default) or 0\n") + cat(" -VLN Draw vertical lines? 1(default) or 0\n") + cat(" -XYL Draw X- and Y-axis labels? 1(default) or 0\n") + cat(" -LWD Line width(default=3)\n") + cat("### Heatmap parameters:\n") + cat(" -GO Gene order algorithm used in heatmaps: total(default), hc, max,\n") + cat(" prod, diff, km and none(according to gene list supplied)\n") + cat(" -LOW Low count cutoff(default=10) in rank-based normalization\n") + cat(" -KNC K-means or HC number of clusters(default=5)\n") + cat(" -MIT Maximum number of iterations(default=20) for K-means\n") + cat(" -NRS Number of random starts(default=30) in K-means\n") + cat(" -RR Reduce ratio(default=30). The parameter controls the heatmap height\n") + cat(" The smaller the value, the taller the heatmap\n") + cat(" -SC Color scale used to map values to colors in a heatmap\n") + cat(" local(default): base on each individual heatmap\n") + cat(" region: base on all heatmaps belong to the same region\n") + cat(" global: base on all heatmaps together\n") + cat(" min_val,max_val: custom scale using a pair of numerics\n") + cat(" -FC Flooding fraction:[0, 1), default=0.02\n") + cat(" -CO Color for heatmap. For bam-pair, use color-tri(neg_color:[neu_color]:pos_color)\n") + cat(" Hint: must use R colors, such as darkgreen, yellow and blue2\n") + cat(" The neutral color is optional(default=black)\n") + cat(" -CD Color distribution for heatmap(default=0.6). Must be a positive number\n") + cat(" Hint: lower values give more widely spaced colors at the negative end\n") + cat(" In other words, they shift the neutral color to positive values\n") + cat(" If set to 1, the neutral color represents 0(i.e. no bias)\n") + +} + +EchoCoverageArgs <- function() { + cat("## Coverage-generation parameters:\n") + cat(" -F Further information provided to select database table or plottype:\n") + cat(" This is a string of description separated by comma.\n") + cat(" E.g. protein_coding,K562,rnaseq(order of descriptors does not matter)\n") + cat(" means coding genes in K562 cell line drawn in rnaseq mode.\n") + cat(" -D Gene database: ensembl(default), refseq\n") + cat(" -L Flanking region size(will override flanking factor)\n") + cat(" -N Flanking region factor\n") + cat(" -RB The fraction of extreme values to be trimmed on both ends\n") + cat(" default=0, 0.05 means 5% of extreme values will be trimmed\n") + cat(" -S Randomly sample the regions for plot, must be:(0, 1]\n") + cat(" -P #CPUs to use. Set 0(default) for auto detection\n") + cat(" -AL Algorithm used to normalize coverage vectors: spline(default), bin\n") + cat(" -CS Chunk size for loading genes in batch(default=100)\n") + cat(" -MQ Mapping quality cutoff to filter reads(default=20)\n") + cat(" -FL Fragment length used to calculate physical coverage(default=150)\n") + cat(" -SS Strand-specific coverage calculation: both(default), same, opposite\n") + cat(" -IN Shall interval be larger than flanking in plot?(0 or 1, default=automatic)\n") + cat(" -FI Forbid image output if set to 1(default=0)\n") +} +############### End arguments configuration ##################### +#################################################################