Mercurial > repos > rnateam > chipseeker
comparison chipseeker.R @ 7:e23396bf7bdb draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/chipseeker commit d30c91c3b4f71ec45b72976f7c2f08ea7df1e376-dirty"
| author | rnateam |
|---|---|
| date | Fri, 27 Aug 2021 10:50:18 +0000 |
| parents | 7da70e9119b2 |
| children |
comparison
equal
deleted
inserted
replaced
| 6:7da70e9119b2 | 7:e23396bf7bdb |
|---|---|
| 1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 1 options( |
| 2 show.error.messages = F, | |
| 3 error = function() { | |
| 4 cat(geterrmessage(), file = stderr()) | |
| 5 q("no", 1, F) | |
| 6 } | |
| 7 ) | |
| 2 | 8 |
| 3 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 9 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
| 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 10 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
| 5 | 11 |
| 6 suppressPackageStartupMessages({ | 12 suppressPackageStartupMessages({ |
| 7 library(ChIPseeker) | 13 library(ChIPseeker) |
| 8 library(GenomicFeatures) | 14 library(GenomicFeatures) |
| 9 library(rtracklayer) | 15 library(rtracklayer) |
| 10 library(optparse) | 16 library(optparse) |
| 17 library(ggupset) | |
| 11 }) | 18 }) |
| 12 | 19 |
| 13 option_list <- list( | 20 option_list <- list( |
| 14 make_option(c("-i","--infile"), type="character", help="Peaks file to be annotated"), | 21 make_option(c("-i", "--infile"), type = "character", help = "Peaks file to be annotated"), |
| 15 make_option(c("-H","--header"), type="logical", help="Peaks file contains header row"), | 22 make_option(c("-H", "--header"), type = "logical", help = "Peaks file contains header row"), |
| 16 make_option(c("-G","--gtf"), type="character", help="GTF to create TxDb."), | 23 make_option(c("-G", "--gtf"), type = "character", help = "GTF to create TxDb."), |
| 17 make_option(c("-u","--upstream"), type="integer", help="TSS upstream region"), | 24 make_option(c("-u", "--upstream"), type = "integer", help = "TSS upstream region"), |
| 18 make_option(c("-d","--downstream"), type="integer", help="TSS downstream region"), | 25 make_option(c("-d", "--downstream"), type = "integer", help = "TSS downstream region"), |
| 19 make_option(c("-F","--flankgeneinfo"), type="logical", help="Add flanking gene info"), | 26 make_option(c("-F", "--flankgeneinfo"), type = "logical", help = "Add flanking gene info"), |
| 20 make_option(c("-D","--flankgenedist"), type="integer", help="Flanking gene distance"), | 27 make_option(c("-D", "--flankgenedist"), type = "integer", help = "Flanking gene distance"), |
| 21 make_option(c("-j","--ignoreUpstream"), type="logical", help="Ignore upstream"), | 28 make_option(c("-j", "--ignore_upstream"), type = "logical", help = "Ignore upstream"), |
| 22 make_option(c("-k","--ignoreDownstream"), type="logical", help="Ignore downstream"), | 29 make_option( |
| 23 make_option(c("-f","--format"), type="character", help="Output format (interval or tabular)."), | 30 c("-k", "--ignore_downstream"), |
| 24 make_option(c("-p","--plots"), type="logical", help="PDF of plots."), | 31 type = "logical", |
| 25 make_option(c("-r","--rdata"), type="logical", help="Output RData file.") | 32 help = "Ignore downstream" |
| 26 ) | 33 ), |
| 34 make_option(c("-f", "--format"), type = "character", help = "Output format (interval or tabular)."), | |
| 35 make_option(c("-p", "--plots"), type = "logical", help = "PDF of plots."), | |
| 36 make_option(c("-r", "--rdata"), type = "logical", help = "Output RData file.") | |
| 37 ) | |
| 27 | 38 |
| 28 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | 39 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) |
| 29 args = parse_args(parser) | 40 args <- parse_args(parser) |
| 30 | 41 |
| 31 peaks = args$infile | 42 peaks <- args$infile |
| 32 gtf = args$gtf | 43 gtf <- args$gtf |
| 33 up = args$upstream | 44 up <- args$upstream |
| 34 down = args$downstream | 45 down <- args$downstream |
| 35 format = args$format | 46 format <- args$format |
| 36 | 47 |
| 37 if (!is.null(args$flankgeneinfo)) { | 48 if (!is.null(args$flankgeneinfo)) { |
| 38 flankgeneinfo <- TRUE | 49 flankgeneinfo <- TRUE |
| 39 } else { | 50 } else { |
| 40 flankgeneinfo <- FALSE | 51 flankgeneinfo <- FALSE |
| 41 } | 52 } |
| 42 | 53 |
| 43 if (!is.null(args$ignoreUpstream)) { | 54 if (!is.null(args$ignore_upstream)) { |
| 44 ignoreUpstream <- TRUE | 55 ignore_upstream <- TRUE |
| 45 } else { | 56 } else { |
| 46 ignoreUpstream <- FALSE | 57 ignore_upstream <- FALSE |
| 47 } | 58 } |
| 48 | 59 |
| 49 if (!is.null(args$ignoreDownstream)) { | 60 if (!is.null(args$ignore_downstream)) { |
| 50 ignoreDownstream <- TRUE | 61 ignore_downstream <- TRUE |
| 51 } else { | 62 } else { |
| 52 ignoreDownstream <- FALSE | 63 ignore_downstream <- FALSE |
| 53 } | 64 } |
| 54 | 65 |
| 55 if (!is.null(args$header)) { | 66 if (!is.null(args$header)) { |
| 56 header <- TRUE | 67 header <- TRUE |
| 57 } else { | 68 } else { |
| 58 header <- FALSE | 69 header <- FALSE |
| 59 } | 70 } |
| 60 | 71 |
| 61 peaks <- readPeakFile(peaks, header=header) | 72 peaks <- readPeakFile(peaks, header = header) |
| 62 | 73 |
| 63 # Make TxDb from GTF | 74 # Make TxDb from GTF |
| 64 txdb <- makeTxDbFromGFF(gtf, format="gtf") | 75 txdb <- makeTxDbFromGFF(gtf, format = "gtf") |
| 65 | 76 |
| 66 # Annotate peaks | 77 # Annotate peaks |
| 67 peakAnno <- annotatePeak(peaks, TxDb=txdb, | 78 peak_anno <- annotatePeak( |
| 68 tssRegion=c(-up, down), | 79 peaks, |
| 69 addFlankGeneInfo=flankgeneinfo, | 80 TxDb = txdb, |
| 70 flankDistance=args$flankgenedist, | 81 tssRegion = c(-up, down), |
| 71 ignoreUpstream=ignoreUpstream, | 82 addFlankGeneInfo = flankgeneinfo, |
| 72 ignoreDownstream=ignoreDownstream) | 83 flankDistance = args$flankgenedist, |
| 84 ignoreUpstream = ignore_upstream, | |
| 85 ignoreDownstream = ignore_downstream | |
| 86 ) | |
| 73 | 87 |
| 74 # Add gene name | 88 # Add gene name |
| 75 features <- import(gtf, format="gtf") | 89 features <- import(gtf, format = "gtf") |
| 76 ann <- unique(mcols(features)[, c("gene_id", "gene_name")]) | 90 ann <- unique(mcols(features)[, c("gene_id", "gene_name")]) |
| 77 res <- as.data.frame(peakAnno) | 91 res <- as.data.frame(peak_anno) |
| 78 res <- merge(res, ann, by.x="geneId", by.y="gene_id") | 92 res <- merge(res, ann, by.x = "geneId", by.y = "gene_id") |
| 79 names(res)[names(res) == "gene_name"] <- "geneName" | 93 names(res)[names(res) == "gene_name"] <- "geneName" |
| 80 | 94 |
| 81 #Extract metadata cols, 1st is geneId, rest should be from col 7 to end | 95 #Extract metadata cols, 1st is geneId, rest should be from col 7 to end |
| 82 metacols <- res[, c(7:ncol(res), 1)] | 96 metacols <- res[, c(7:ncol(res), 1)] |
| 83 # Convert from 1-based to 0-based format | 97 # Convert from 1-based to 0-based format |
| 84 if (format == "interval") { | 98 if (format == "interval") { |
| 85 metacols <- apply(as.data.frame(metacols), 1, function(col) paste(col, collapse="|")) | 99 metacols <- |
| 86 resout <- data.frame(res$seqnames, | 100 apply(as.data.frame(metacols), 1, function(col) |
| 87 res$start - 1, | 101 paste(col, collapse = "|")) |
| 88 res$end, | 102 resout <- data.frame(res$seqnames, |
| 89 metacols) | 103 res$start - 1, |
| 90 colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment") | 104 res$end, |
| 105 metacols) | |
| 106 colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment") | |
| 91 } else { | 107 } else { |
| 92 resout <- data.frame(res$seqnames, | 108 resout <- data.frame(res$seqnames, |
| 93 res$start - 1, | 109 res$start - 1, |
| 94 res$end, | 110 res$end, |
| 95 metacols) | 111 metacols) |
| 96 colnames(resout)[1:3] <- c("Chrom", "Start", "End") | 112 colnames(resout)[1:3] <- c("Chrom", "Start", "End") |
| 97 } | 113 } |
| 98 write.table(resout, file="out.tab", sep="\t", row.names=FALSE, quote=FALSE) | 114 write.table( |
| 115 resout, | |
| 116 file = "out.tab", | |
| 117 sep = "\t", | |
| 118 row.names = FALSE, | |
| 119 quote = FALSE | |
| 120 ) | |
| 99 | 121 |
| 100 if (!is.null(args$plots)) { | 122 if (!is.null(args$plots)) { |
| 101 pdf("out.pdf", width=14) | 123 pdf("out.pdf", width = 14) |
| 102 plotAnnoPie(peakAnno) | 124 plotAnnoPie(peak_anno) |
| 103 p1 <- plotAnnoBar(peakAnno) | 125 p1 <- plotAnnoBar(peak_anno) |
| 104 print(p1) | 126 print(p1) |
| 105 vennpie(peakAnno) | 127 vennpie(peak_anno) |
| 106 upsetplot(peakAnno) | 128 upsetplot(peak_anno) |
| 107 p2 <- plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS") | 129 p2 <- |
| 108 print(p2) | 130 plotDistToTSS(peak_anno, title = "Distribution of transcription factor-binding loci\nrelative to TSS") |
| 109 dev.off() | 131 print(p2) |
| 110 rm(p1, p2) | 132 dev.off() |
| 133 rm(p1, p2) | |
| 111 } | 134 } |
| 112 | 135 |
| 113 ## Output RData file | 136 ## Output RData file |
| 114 | 137 |
| 115 if (!is.null(args$rdata)) { | 138 if (!is.null(args$rdata)) { |
