changeset 0:663ee21a8609 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot/data_manager_customProDB commit 141369f97aa2804d2bbfd9ed620ea2a5574994c2-dirty
author galaxyp
date Thu, 21 Jan 2016 18:19:52 -0500
parents
children 4c77cf5a2977
files data_manager/customProDB_annotation.R data_manager/customProDB_annotation.xml data_manager_conf.xml
diffstat 3 files changed, 142 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/customProDB_annotation.R	Thu Jan 21 18:19:52 2016 -0500
@@ -0,0 +1,89 @@
+#!/usr/bin/env Rscript
+
+initial.options <- commandArgs(trailingOnly = FALSE)
+script_parent_dir <- dirname(sub("--file=", "", initial.options[grep("--file=", initial.options)]))
+
+## begin warning handler
+withCallingHandlers({
+
+library(methods) # Because Rscript does not always do this
+
+options('useFancyQuotes' = FALSE)
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("RGalaxy"))
+
+
+option_list <- list()
+option_list$dbkey <- make_option('--dbkey', type='character')
+option_list$outputFile <- make_option('--outputFile', type='character')
+
+opt <- parse_args(OptionParser(option_list=option_list))
+
+
+customProDB_annotation <- function(
+	dbkey = GalaxyCharacterParam(required=TRUE), 
+	outputFile = GalaxyOutput("output","json"))
+{
+    if (!file.exists(outputFile))
+    {
+        gstop("json params file does not exist")
+    }
+
+    suppressPackageStartupMessages(library(rjson))
+    params = fromJSON(file=outputFile)
+    target_directory = params$output_data[[1]]$extra_files_path
+    data_description = params$param_dict$advanced$data_description
+    data_id = params$param_dict$advanced$data_id
+    dir.create(target_directory)
+
+    tryCatch(
+    {
+        #file.remove(outputFile)
+    }, error=function(err)
+    {
+        gstop("failed to remove json params file after reading")
+    })
+
+    ucscTableCodingFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgSeq.cdsExon=on&hgSeq.granularity=gene&hgSeq.casing=exon&hgSeq.repMasking=lower&hgta_doGenomicDna=get+sequence&hgta_group=genes&hgta_track=refGene&hgta_table=refGene&hgta_regionType=genome", sep="")
+    ucscTableProteinFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgta_geneSeqType=protein&hgta_doGenePredSequence=submit&hgta_track=refGene&hgta_table=refGene", sep="")
+    codingFastaFilepath = paste(target_directory, "/", dbkey, ".cds.fa", sep="")
+    proteinFastaFilepath = paste(target_directory, "/", dbkey, ".protein.fa", sep="")
+    suppressPackageStartupMessages(library(customProDB))
+    options(timeout=3600)
+    cat(paste("Downloading coding FASTA from:", ucscTableCodingFastaURL, "\n"))
+    download.file(ucscTableCodingFastaURL, codingFastaFilepath, quiet=T, mode='wb')
+    cat(paste("Downloading protein FASTA from:", ucscTableProteinFastaURL, "\n"))
+    download.file(ucscTableProteinFastaURL, proteinFastaFilepath, quiet=T, mode='wb')
+    customProDB::PrepareAnnotationRefseq(genome=dbkey, CDSfasta=codingFastaFilepath, pepfasta=proteinFastaFilepath, annotation_path=target_directory)
+    
+    outputPath = paste("customProDB/", dbkey, sep="")
+    output = list(data_tables = list())
+    output[["data_tables"]][["customProDB"]]=c(path=outputPath, name=dbkey, value=dbkey)
+    write(toJSON(output), file=outputFile)
+}
+
+
+params <- list()
+for(param in names(opt))
+{
+    if (!param == "help")
+        params[param] <- opt[param]
+}
+
+setClass("GalaxyRemoteError", contains="character")
+wrappedFunction <- function(f)
+{
+    tryCatch(do.call(f, params),
+        error=function(e) new("GalaxyRemoteError", conditionMessage(e)))
+}
+
+
+suppressPackageStartupMessages(library(RGalaxy))
+do.call(customProDB_annotation, params)
+
+## end warning handler
+}, warning = function(w) {
+    cat(paste("Warning:", conditionMessage(w), "\n"))
+    invokeRestart("muffleWarning")
+})
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/customProDB_annotation.xml	Thu Jan 21 18:19:52 2016 -0500
@@ -0,0 +1,31 @@
+<tool id="custom_pro_db_annotation_data_manager" name="CustomProDB Annotation" tool_type="manage_data" version="0.0.1">
+    <description>builder</description>
+    <command interpreter="Rscript --vanilla">customProDB_annotation.R
+      --outputFile "${out_file}"
+      --dbkey "${dbkey}"
+      2&gt;&amp;1
+    </command>
+    <stdio>
+        <exit_code range=":-1" />
+        <exit_code range="1:" />
+    </stdio>
+    <inputs>
+        <!--<param name="all_fasta_source" type="select" label="Source FASTA Sequence">
+            <options from_data_table="all_fasta"/>
+        </param>
+        <param type="text" name="sequence_name" value="" label="Name of sequence" />
+        <param type="text" name="sequence_id" value="" label="ID for sequence" />-->
+        <param type="text" name="dbkey" value="" label="UCSC dbKey for reference genome" />
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json"/>
+    </outputs>
+
+    <help>
+
+.. class:: infomark
+
+**Notice:** If you leave name, description, or id blank, it will be generated automatically. 
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Thu Jan 21 18:19:52 2016 -0500
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<data_managers>
+
+    <data_manager tool_file="data_manager/customProDB_annotation.xml" id="custom_pro_db_annotation_builder" version="0.0.1">
+        <data_table name="customProDB">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" output_ref="out_file" >
+                    <move type="directory" relativize_symlinks="True">
+                        <!-- <source>${path}</source>--> <!-- out_file.extra_files_path is used as base by default --> <!-- if no source, eg for type=directory, then refers to base -->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/customProDB/${value}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/customProDB/${value}/${path}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+
+</data_managers>