# HG changeset patch # User iuc # Date 1720463497 0 # Node ID 6b235ac52fafff342379196c3bd9471752365e96 # Parent 3aa2c26cc99052ca0f7e6f7ffc321ab6cea6ae3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit fd148a124034b44d0d61db3eec32ff991d8c152c diff -r 3aa2c26cc990 -r 6b235ac52faf diffbind.R --- a/diffbind.R Tue Aug 31 08:02:07 2021 +0000 +++ b/diffbind.R Mon Jul 08 18:31:37 2024 +0000 @@ -1,6 +1,7 @@ ## Setup R error handling to go to stderr options(show.error.messages = F, error = function() { - cat(geterrmessage(), file = stderr()); q("no", 1, F) + cat(geterrmessage(), file = stderr()) + q("no", 1, F) }) # we need that to not crash galaxy with an UTF8 error on German LC settings. Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") @@ -14,15 +15,18 @@ options(stringAsfactors = FALSE, useFancyQuotes = FALSE) args <- commandArgs(trailingOnly = TRUE) -#get options, using the spec as defined by the enclosed list. -#we read the options from the default: commandArgs(TRUE). +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). spec <- matrix(c( "infile", "i", 1, "character", "outfile", "o", 1, "character", + "method", "m", 1, "character", "scorecol", "n", 1, "integer", "lowerbetter", "l", 1, "logical", "summits", "s", 1, "integer", "th", "t", 1, "double", + "minoverlap", "O", 1, "integer", + "use_blacklist", "B", 0, "logical", "format", "f", 1, "character", "plots", "p", 2, "character", "bmatrix", "b", 0, "logical", @@ -30,15 +34,14 @@ "infoOpt", "a", 0, "logical", "verbose", "v", 2, "integer", "help", "h", 0, "logical" -), byrow = TRUE, ncol = 4); +), byrow = TRUE, ncol = 4) -opt <- getopt(spec); - +opt <- getopt(spec) # if help was asked for print a friendly message # and exit with a non-zero error code if (!is.null(opt$help)) { - cat(getopt(spec, usage = TRUE)); - q(status = 1); + cat(getopt(spec, usage = TRUE)) + q(status = 1) } parser <- newJSONParser() @@ -54,23 +57,30 @@ samples <- sapply(strsplit(peaks, "-"), `[`, 2) if (length(ctrls) != 0) { - sample_table <- data.frame(SampleID = samples, - Condition = groups, - bamReads = bams, - bamControl = ctrls, - Peaks = peaks, - Tissue = samples) # using "Tissue" column to display ids as labels in PCA plot + sample_table <- data.frame( + SampleID = samples, + Condition = groups, + bamReads = bams, + bamControl = ctrls, + Peaks = peaks, + Tissue = samples + ) # using "Tissue" column to display ids as labels in PCA plot } else { - - sample_table <- data.frame(SampleID = samples, - Replicate = samples, - Condition = groups, - bamReads = bams, - Peaks = peaks, - Tissue = samples) + sample_table <- data.frame( + SampleID = samples, + Replicate = samples, + Condition = groups, + bamReads = bams, + Peaks = peaks, + Tissue = samples + ) } -sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) +sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter, minOverlap = opt$minoverlap) + +if (!is.null(opt$use_blacklist)) { + sample <- dba.blacklist(sample, blacklist = TRUE) +} if (!is.null(opt$summits)) { sample_count <- dba.count(sample, summits = opt$summits) @@ -79,17 +89,25 @@ } sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) -sample_analyze <- dba.analyze(sample_contrast) -diff_bind <- dba.report(sample_analyze, th = opt$th) + +if (opt$method == "DBA_DESEQ2") { + method <- DBA_DESEQ2 +} else if (opt$method == "DBA_EDGER") { + method <- DBA_EDGER +} + +sample_analyze <- dba.analyze(sample_contrast, method = method, bBlacklist = FALSE, bGreylist = FALSE) + +diff_bind <- dba.report(sample_analyze, th = opt$th, method = method) # Generate plots if (!is.null(opt$plots)) { pdf(opt$plots) - orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) - dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) - dba.plotMA(sample_analyze, th = opt$th) - dba.plotVolcano(sample_analyze, th = opt$th) - dba.plotBox(sample_analyze, th = opt$th) + orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th, method = method) + dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3, method = method) + dba.plotMA(sample_analyze, th = opt$th, method = method) + dba.plotVolcano(sample_analyze, th = opt$th, method = method) + dba.plotBox(sample_analyze, th = opt$th, method = method) dev.off() } @@ -97,46 +115,54 @@ res_sorted <- diff_bind[order(diff_bind$FDR), ] # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) if (opt$format == "bed") { - res_sorted <- data.frame(Chrom = seqnames(res_sorted), + res_sorted <- data.frame( + Chrom = seqnames(res_sorted), Start = start(res_sorted) - 1, End = end(res_sorted), Name = rep("DiffBind", length(res_sorted)), Score = rep("0", length(res_sorted)), - Strand = gsub("\\*", ".", strand(res_sorted))) + Strand = gsub("\\*", ".", strand(res_sorted)) + ) } else if (opt$format == "interval") { - # Output as interval + # Output as interval df <- as.data.frame(res_sorted) extrainfo <- NULL for (i in seq_len(nrow(df))) { extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") } - res_sorted <- data.frame(Chrom = seqnames(res_sorted), + res_sorted <- data.frame( + Chrom = seqnames(res_sorted), Start = start(res_sorted) - 1, End = end(res_sorted), Name = rep("DiffBind", length(res_sorted)), Score = rep("0", length(res_sorted)), Strand = gsub("\\*", ".", strand(res_sorted)), - Comment = extrainfo) + Comment = extrainfo + ) } else { # Output as 0-based tabular - res_sorted <- data.frame(Chrom = seqnames(res_sorted), + res_sorted <- data.frame( + Chrom = seqnames(res_sorted), Start = start(res_sorted) - 1, End = end(res_sorted), Name = rep("DiffBind", length(res_sorted)), Score = rep("0", length(res_sorted)), Strand = gsub("\\*", ".", strand(res_sorted)), - mcols(res_sorted)) + mcols(res_sorted) + ) } write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) # Output binding affinity scores if (!is.null(opt$bmatrix)) { - bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) + bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME, minOverlap = opt$minoverlap) # Output as 0-based tabular - bmat <- data.frame(Chrom = bmat[, 1], + bmat <- data.frame( + Chrom = bmat[, 1], Start = bmat[, 2] - 1, End = bmat[, 3], - bmat[, 4:ncol(bmat)]) + bmat[, 4:ncol(bmat)] + ) write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) } diff -r 3aa2c26cc990 -r 6b235ac52faf diffbind.xml --- a/diffbind.xml Tue Aug 31 08:02:07 2021 +0000 +++ b/diffbind.xml Mon Jul 08 18:31:37 2024 +0000 @@ -1,13 +1,18 @@ - + differential binding analysis of ChIP-Seq peak data + + 3.12.0 + 0 + 22.05 + diffbind + diffbind - bioconductor-diffbind - r-base - r-getopt - r-rjson + bioconductor-diffbind + r-getopt + bioconductor-edger &2 echo "Group $g.groupName: same number of Peak and Bam files needs to be given" && exit 1 && + #end if + $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($peak_files.items())]} ) + $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($bam_files.items())]} ) #if str( $g.bamcontrol ) != 'None': #for $ctrl in $g.bamcontrol: #set $ctrl_name = re.sub('[^\w\-]', '_', str($ctrl.element_identifier)) #set $ctrl_file = $ctrl_name + "-bamcontrol.bam" - #set ctrl_index = $ctrl_name + "-bamcontrol.bai" + #set $ctrl_index = $ctrl_name + "-bamcontrol.bai" #if $ctrl_file not in json.dumps($temp_factor): ln -s '${ctrl}' '${ctrl_file}' && ln -s '${ctrl.metadata.bam_index}' '${ctrl_index}' && #end if - $bam_controls.append($ctrl_file) + #set $bam_controls[str($ctrl.element_identifier)] = str($ctrl_file) #end for - $temp_factor.append( {str($g.groupName): $bam_controls} ) + #if len($peak_files.keys()) != len($bam_files.keys()) + >&2 echo "Group $g.groupName: same number of Peak and Bam control files needs to be given" && exit 1 && + #end if + $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($bam_controls.items())]} ) #end if #end for @@ -82,13 +93,15 @@ -i '#echo json.dumps(temp_factor_names)#' -o '$outfile' + -m '$method' -t $th -f $out.format -p '$plots' - - #if $scorecol: - -n "$scorecol" + -O $minoverlap + #if $use_blacklist: + -B #end if + -n $scorecol #if $lowerbetter: -l "$lowerbetter" #end if @@ -126,8 +139,17 @@ - - + + + + + + + + + + + @@ -214,6 +236,28 @@ + + + + + + + + + + + + + + + + + + + + + + @@ -264,6 +308,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +