Mercurial > repos > galaxyp > custom_pro_db_annotation_data_manager
diff data_manager/customProDB_annotation.R @ 0:663ee21a8609 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot/data_manager_customProDB commit 141369f97aa2804d2bbfd9ed620ea2a5574994c2-dirty
author | galaxyp |
---|---|
date | Thu, 21 Jan 2016 18:19:52 -0500 |
parents | |
children | 20a1b026b798 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/customProDB_annotation.R Thu Jan 21 18:19:52 2016 -0500 @@ -0,0 +1,89 @@ +#!/usr/bin/env Rscript + +initial.options <- commandArgs(trailingOnly = FALSE) +script_parent_dir <- dirname(sub("--file=", "", initial.options[grep("--file=", initial.options)])) + +## begin warning handler +withCallingHandlers({ + +library(methods) # Because Rscript does not always do this + +options('useFancyQuotes' = FALSE) + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("RGalaxy")) + + +option_list <- list() +option_list$dbkey <- make_option('--dbkey', type='character') +option_list$outputFile <- make_option('--outputFile', type='character') + +opt <- parse_args(OptionParser(option_list=option_list)) + + +customProDB_annotation <- function( + dbkey = GalaxyCharacterParam(required=TRUE), + outputFile = GalaxyOutput("output","json")) +{ + if (!file.exists(outputFile)) + { + gstop("json params file does not exist") + } + + suppressPackageStartupMessages(library(rjson)) + params = fromJSON(file=outputFile) + target_directory = params$output_data[[1]]$extra_files_path + data_description = params$param_dict$advanced$data_description + data_id = params$param_dict$advanced$data_id + dir.create(target_directory) + + tryCatch( + { + #file.remove(outputFile) + }, error=function(err) + { + gstop("failed to remove json params file after reading") + }) + + ucscTableCodingFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgSeq.cdsExon=on&hgSeq.granularity=gene&hgSeq.casing=exon&hgSeq.repMasking=lower&hgta_doGenomicDna=get+sequence&hgta_group=genes&hgta_track=refGene&hgta_table=refGene&hgta_regionType=genome", sep="") + ucscTableProteinFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgta_geneSeqType=protein&hgta_doGenePredSequence=submit&hgta_track=refGene&hgta_table=refGene", sep="") + codingFastaFilepath = paste(target_directory, "/", dbkey, ".cds.fa", sep="") + proteinFastaFilepath = paste(target_directory, "/", dbkey, ".protein.fa", sep="") + suppressPackageStartupMessages(library(customProDB)) + options(timeout=3600) + cat(paste("Downloading coding FASTA from:", ucscTableCodingFastaURL, "\n")) + download.file(ucscTableCodingFastaURL, codingFastaFilepath, quiet=T, mode='wb') + cat(paste("Downloading protein FASTA from:", ucscTableProteinFastaURL, "\n")) + download.file(ucscTableProteinFastaURL, proteinFastaFilepath, quiet=T, mode='wb') + customProDB::PrepareAnnotationRefseq(genome=dbkey, CDSfasta=codingFastaFilepath, pepfasta=proteinFastaFilepath, annotation_path=target_directory) + + outputPath = paste("customProDB/", dbkey, sep="") + output = list(data_tables = list()) + output[["data_tables"]][["customProDB"]]=c(path=outputPath, name=dbkey, value=dbkey) + write(toJSON(output), file=outputFile) +} + + +params <- list() +for(param in names(opt)) +{ + if (!param == "help") + params[param] <- opt[param] +} + +setClass("GalaxyRemoteError", contains="character") +wrappedFunction <- function(f) +{ + tryCatch(do.call(f, params), + error=function(e) new("GalaxyRemoteError", conditionMessage(e))) +} + + +suppressPackageStartupMessages(library(RGalaxy)) +do.call(customProDB_annotation, params) + +## end warning handler +}, warning = function(w) { + cat(paste("Warning:", conditionMessage(w), "\n")) + invokeRestart("muffleWarning") +})