Mercurial > repos > rnateam > chipseeker
comparison chipseeker.R @ 7:e23396bf7bdb draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/chipseeker commit d30c91c3b4f71ec45b72976f7c2f08ea7df1e376-dirty"
| author | rnateam | 
|---|---|
| date | Fri, 27 Aug 2021 10:50:18 +0000 | 
| parents | 7da70e9119b2 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 6:7da70e9119b2 | 7:e23396bf7bdb | 
|---|---|
| 1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 1 options( | 
| 2 show.error.messages = F, | |
| 3 error = function() { | |
| 4 cat(geterrmessage(), file = stderr()) | |
| 5 q("no", 1, F) | |
| 6 } | |
| 7 ) | |
| 2 | 8 | 
| 3 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 9 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 
| 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 10 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 
| 5 | 11 | 
| 6 suppressPackageStartupMessages({ | 12 suppressPackageStartupMessages({ | 
| 7 library(ChIPseeker) | 13 library(ChIPseeker) | 
| 8 library(GenomicFeatures) | 14 library(GenomicFeatures) | 
| 9 library(rtracklayer) | 15 library(rtracklayer) | 
| 10 library(optparse) | 16 library(optparse) | 
| 17 library(ggupset) | |
| 11 }) | 18 }) | 
| 12 | 19 | 
| 13 option_list <- list( | 20 option_list <- list( | 
| 14 make_option(c("-i","--infile"), type="character", help="Peaks file to be annotated"), | 21 make_option(c("-i", "--infile"), type = "character", help = "Peaks file to be annotated"), | 
| 15 make_option(c("-H","--header"), type="logical", help="Peaks file contains header row"), | 22 make_option(c("-H", "--header"), type = "logical", help = "Peaks file contains header row"), | 
| 16 make_option(c("-G","--gtf"), type="character", help="GTF to create TxDb."), | 23 make_option(c("-G", "--gtf"), type = "character", help = "GTF to create TxDb."), | 
| 17 make_option(c("-u","--upstream"), type="integer", help="TSS upstream region"), | 24 make_option(c("-u", "--upstream"), type = "integer", help = "TSS upstream region"), | 
| 18 make_option(c("-d","--downstream"), type="integer", help="TSS downstream region"), | 25 make_option(c("-d", "--downstream"), type = "integer", help = "TSS downstream region"), | 
| 19 make_option(c("-F","--flankgeneinfo"), type="logical", help="Add flanking gene info"), | 26 make_option(c("-F", "--flankgeneinfo"), type = "logical", help = "Add flanking gene info"), | 
| 20 make_option(c("-D","--flankgenedist"), type="integer", help="Flanking gene distance"), | 27 make_option(c("-D", "--flankgenedist"), type = "integer", help = "Flanking gene distance"), | 
| 21 make_option(c("-j","--ignoreUpstream"), type="logical", help="Ignore upstream"), | 28 make_option(c("-j", "--ignore_upstream"), type = "logical", help = "Ignore upstream"), | 
| 22 make_option(c("-k","--ignoreDownstream"), type="logical", help="Ignore downstream"), | 29 make_option( | 
| 23 make_option(c("-f","--format"), type="character", help="Output format (interval or tabular)."), | 30 c("-k", "--ignore_downstream"), | 
| 24 make_option(c("-p","--plots"), type="logical", help="PDF of plots."), | 31 type = "logical", | 
| 25 make_option(c("-r","--rdata"), type="logical", help="Output RData file.") | 32 help = "Ignore downstream" | 
| 26 ) | 33 ), | 
| 34 make_option(c("-f", "--format"), type = "character", help = "Output format (interval or tabular)."), | |
| 35 make_option(c("-p", "--plots"), type = "logical", help = "PDF of plots."), | |
| 36 make_option(c("-r", "--rdata"), type = "logical", help = "Output RData file.") | |
| 37 ) | |
| 27 | 38 | 
| 28 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | 39 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) | 
| 29 args = parse_args(parser) | 40 args <- parse_args(parser) | 
| 30 | 41 | 
| 31 peaks = args$infile | 42 peaks <- args$infile | 
| 32 gtf = args$gtf | 43 gtf <- args$gtf | 
| 33 up = args$upstream | 44 up <- args$upstream | 
| 34 down = args$downstream | 45 down <- args$downstream | 
| 35 format = args$format | 46 format <- args$format | 
| 36 | 47 | 
| 37 if (!is.null(args$flankgeneinfo)) { | 48 if (!is.null(args$flankgeneinfo)) { | 
| 38 flankgeneinfo <- TRUE | 49 flankgeneinfo <- TRUE | 
| 39 } else { | 50 } else { | 
| 40 flankgeneinfo <- FALSE | 51 flankgeneinfo <- FALSE | 
| 41 } | 52 } | 
| 42 | 53 | 
| 43 if (!is.null(args$ignoreUpstream)) { | 54 if (!is.null(args$ignore_upstream)) { | 
| 44 ignoreUpstream <- TRUE | 55 ignore_upstream <- TRUE | 
| 45 } else { | 56 } else { | 
| 46 ignoreUpstream <- FALSE | 57 ignore_upstream <- FALSE | 
| 47 } | 58 } | 
| 48 | 59 | 
| 49 if (!is.null(args$ignoreDownstream)) { | 60 if (!is.null(args$ignore_downstream)) { | 
| 50 ignoreDownstream <- TRUE | 61 ignore_downstream <- TRUE | 
| 51 } else { | 62 } else { | 
| 52 ignoreDownstream <- FALSE | 63 ignore_downstream <- FALSE | 
| 53 } | 64 } | 
| 54 | 65 | 
| 55 if (!is.null(args$header)) { | 66 if (!is.null(args$header)) { | 
| 56 header <- TRUE | 67 header <- TRUE | 
| 57 } else { | 68 } else { | 
| 58 header <- FALSE | 69 header <- FALSE | 
| 59 } | 70 } | 
| 60 | 71 | 
| 61 peaks <- readPeakFile(peaks, header=header) | 72 peaks <- readPeakFile(peaks, header = header) | 
| 62 | 73 | 
| 63 # Make TxDb from GTF | 74 # Make TxDb from GTF | 
| 64 txdb <- makeTxDbFromGFF(gtf, format="gtf") | 75 txdb <- makeTxDbFromGFF(gtf, format = "gtf") | 
| 65 | 76 | 
| 66 # Annotate peaks | 77 # Annotate peaks | 
| 67 peakAnno <- annotatePeak(peaks, TxDb=txdb, | 78 peak_anno <- annotatePeak( | 
| 68 tssRegion=c(-up, down), | 79 peaks, | 
| 69 addFlankGeneInfo=flankgeneinfo, | 80 TxDb = txdb, | 
| 70 flankDistance=args$flankgenedist, | 81 tssRegion = c(-up, down), | 
| 71 ignoreUpstream=ignoreUpstream, | 82 addFlankGeneInfo = flankgeneinfo, | 
| 72 ignoreDownstream=ignoreDownstream) | 83 flankDistance = args$flankgenedist, | 
| 84 ignoreUpstream = ignore_upstream, | |
| 85 ignoreDownstream = ignore_downstream | |
| 86 ) | |
| 73 | 87 | 
| 74 # Add gene name | 88 # Add gene name | 
| 75 features <- import(gtf, format="gtf") | 89 features <- import(gtf, format = "gtf") | 
| 76 ann <- unique(mcols(features)[, c("gene_id", "gene_name")]) | 90 ann <- unique(mcols(features)[, c("gene_id", "gene_name")]) | 
| 77 res <- as.data.frame(peakAnno) | 91 res <- as.data.frame(peak_anno) | 
| 78 res <- merge(res, ann, by.x="geneId", by.y="gene_id") | 92 res <- merge(res, ann, by.x = "geneId", by.y = "gene_id") | 
| 79 names(res)[names(res) == "gene_name"] <- "geneName" | 93 names(res)[names(res) == "gene_name"] <- "geneName" | 
| 80 | 94 | 
| 81 #Extract metadata cols, 1st is geneId, rest should be from col 7 to end | 95 #Extract metadata cols, 1st is geneId, rest should be from col 7 to end | 
| 82 metacols <- res[, c(7:ncol(res), 1)] | 96 metacols <- res[, c(7:ncol(res), 1)] | 
| 83 # Convert from 1-based to 0-based format | 97 # Convert from 1-based to 0-based format | 
| 84 if (format == "interval") { | 98 if (format == "interval") { | 
| 85 metacols <- apply(as.data.frame(metacols), 1, function(col) paste(col, collapse="|")) | 99 metacols <- | 
| 86 resout <- data.frame(res$seqnames, | 100 apply(as.data.frame(metacols), 1, function(col) | 
| 87 res$start - 1, | 101 paste(col, collapse = "|")) | 
| 88 res$end, | 102 resout <- data.frame(res$seqnames, | 
| 89 metacols) | 103 res$start - 1, | 
| 90 colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment") | 104 res$end, | 
| 105 metacols) | |
| 106 colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment") | |
| 91 } else { | 107 } else { | 
| 92 resout <- data.frame(res$seqnames, | 108 resout <- data.frame(res$seqnames, | 
| 93 res$start - 1, | 109 res$start - 1, | 
| 94 res$end, | 110 res$end, | 
| 95 metacols) | 111 metacols) | 
| 96 colnames(resout)[1:3] <- c("Chrom", "Start", "End") | 112 colnames(resout)[1:3] <- c("Chrom", "Start", "End") | 
| 97 } | 113 } | 
| 98 write.table(resout, file="out.tab", sep="\t", row.names=FALSE, quote=FALSE) | 114 write.table( | 
| 115 resout, | |
| 116 file = "out.tab", | |
| 117 sep = "\t", | |
| 118 row.names = FALSE, | |
| 119 quote = FALSE | |
| 120 ) | |
| 99 | 121 | 
| 100 if (!is.null(args$plots)) { | 122 if (!is.null(args$plots)) { | 
| 101 pdf("out.pdf", width=14) | 123 pdf("out.pdf", width = 14) | 
| 102 plotAnnoPie(peakAnno) | 124 plotAnnoPie(peak_anno) | 
| 103 p1 <- plotAnnoBar(peakAnno) | 125 p1 <- plotAnnoBar(peak_anno) | 
| 104 print(p1) | 126 print(p1) | 
| 105 vennpie(peakAnno) | 127 vennpie(peak_anno) | 
| 106 upsetplot(peakAnno) | 128 upsetplot(peak_anno) | 
| 107 p2 <- plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS") | 129 p2 <- | 
| 108 print(p2) | 130 plotDistToTSS(peak_anno, title = "Distribution of transcription factor-binding loci\nrelative to TSS") | 
| 109 dev.off() | 131 print(p2) | 
| 110 rm(p1, p2) | 132 dev.off() | 
| 133 rm(p1, p2) | |
| 111 } | 134 } | 
| 112 | 135 | 
| 113 ## Output RData file | 136 ## Output RData file | 
| 114 | 137 | 
| 115 if (!is.null(args$rdata)) { | 138 if (!is.null(args$rdata)) { | 
