changeset 2:3088377510dc draft

"planemo upload commit 59afcdaf7afdf574c475f0faae73127f0e563328"
author galaxyp
date Wed, 12 Aug 2020 21:36:03 +0000
parents a1775ba76f0a
children faab3160cfcf
files MT2MQ.R MT2MQ.xml
diffstat 2 files changed, 70 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/MT2MQ.R	Fri Jun 26 15:13:32 2020 +0000
+++ b/MT2MQ.R	Wed Aug 12 21:36:03 2020 +0000
@@ -2,10 +2,10 @@
 
 # Load libraries
 suppressPackageStartupMessages(library(tidyverse))
-#default_locale()
+suppressPackageStartupMessages(library(taxize))
 
 # Set parameters from arguments
-args = commandArgs(trailingOnly = TRUE)
+args <- commandArgs(trailingOnly = TRUE)
 data <- args[1]
   # data: full path to file or directory:
   #   - if in functional or f-t mode, should be a tsv file of HUMAnN2 gene families, after regrouping and renaming to GO, joining samples, and renormalizing to CPM.
@@ -18,49 +18,67 @@
 ontology <- unlist(strsplit(args[3], split = ","))
   # ontology: only for function or f-t mode. A string of the GO namespace(s) to include, separated by commas.
   #   ex: to include all: "molecular_function,biological_process,cellular_component"
-outfile <- args[4]
-  # outfile: full path with pathname and extension for output
+
+int_file <- args[4]
+  # int_file: full path and file name and extension to write intensity file
+
+func_file <- args[5]
+  # func_file: full path and file name and extension to write func file
+
+tax_file <- args[6]
+  # tax_file: full path and file name and extension to write tax file
+
 
 # Functional mode
-if (mode == "f"){
-  out <- read.delim(file=data, header=TRUE, sep='\t') %>% 
-    filter(!grepl(".+g__.+",X..Gene.Family)) %>% 
-    separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% 
-    separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% 
-    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% 
-    filter(namespace %in% ontology) %>% 
+if (mode == "f") {
+  int <- read.delim(file = data, header = TRUE, sep = "\t") %>%
+    filter(!grepl(".+g__.+", X..Gene.Family)) %>%
+    separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>%
+    separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>%
+    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>%
+    filter(namespace %in% ontology) %>%
     select(id, name, namespace, 4:ncol(.))
+  func <- int %>%
+    select(id) %>%
+    mutate(gos = id)
+  write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE)
+  write.table(x = func, file = func_file, quote = FALSE, sep = "\t", row.names = FALSE)
 }
 
 # Taxonomic mode
-if (mode == "t"){
+if (mode == "t") {
   files <- dir(path = data)
-  out <- tibble(filename = files) %>% 
-    mutate(file_contents= map(filename, ~read.delim(file=file.path(data, .), header=TRUE, sep = "\t"))) %>% 
-    unnest(cols = c(file_contents)) %>% 
-    rename(sample = filename) %>% 
-    separate(col = sample, into = c("sample",NA), sep=".tsv") %>% 
-    pivot_wider(names_from = sample, values_from = abundance) %>% 
-    mutate(rank = "genus") %>% 
-    rename(name = genus) %>% 
-    mutate(id = row_number(name)) %>% # filler for taxon id but should eventually find a way to get id from ncbi database
+  int <- tibble(filename = files) %>%
+    mutate(file_contents = map(filename, ~read.delim(file = file.path(data, .), header = TRUE, sep = "\t"))) %>%
+    unnest(cols = c(file_contents)) %>%
+    rename(sample = filename) %>%
+    separate(col = sample, into = c("sample", NA), sep = ".tsv") %>%
+    pivot_wider(names_from = sample, values_from = abundance) %>%
+    mutate(rank = "genus") %>%
+    rename(name = genus) %>%
+    mutate(name = as.character(name)) %>%
+    mutate(id = get_uid(name, key = NULL, messages = FALSE)) %>%
     select(id, name, rank, 2:ncol(.))
+  tax <- int %>%
+    select(id) %>%
+    mutate(tax = id)
+  write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE)
+  write.table(x = tax, file = tax_file, quote = FALSE, sep = "\t", row.names = FALSE)
 }
 
 # Function-taxonomy mode
-if (mode == "ft"){
-  out <- read.delim(file=data, header=TRUE, sep='\t') %>% 
-    filter(grepl(".+g__.+",X..Gene.Family)) %>% 
-    separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% 
-    separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% 
-    separate(col = name, into = c("name", "taxa"), sep="\\|", extra = "merge") %>%
-    separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% select(-"Extra") %>%
-    mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% 
-    mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% 
-    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% 
-    filter(namespace %in% ontology) %>% 
+if (mode == "ft") {
+  ft <- read.delim(file = data, header = TRUE, sep = "\t") %>%
+    filter(grepl(".+g__.+", X..Gene.Family)) %>%
+    separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>%
+    separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>%
+    separate(col = name, into = c("name", "taxa"), sep = "\\|", extra = "merge") %>%
+    separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>%
+    select(-"Extra") %>%
+    mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>%
+    mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>%
+    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>%
+    filter(namespace %in% ontology) %>%
     select(id, name, namespace, 4:ncol(.))
+  write.table(x = ft, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE)
 }
-
-# Write file
-write.table(x = out, file = outfile, quote = FALSE, sep = "\t", row.names = FALSE)
--- a/MT2MQ.xml	Fri Jun 26 15:13:32 2020 +0000
+++ b/MT2MQ.xml	Wed Aug 12 21:36:03 2020 +0000
@@ -1,18 +1,19 @@
-<tool id="mt2mq" name="MT2MQ" version="1.0">
+<tool id="mt2mq" name="MT2MQ" version="1.1.0">
     <description>Tool to prepare metatranscriptomic outputs from ASaiM for Metaquantome</description>
     <requirements>
         <requirement type="package" version="1.2.1">r-tidyverse</requirement>
+        <requirement type="package" version="0.9.97">r-taxize</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
 
 #if $options.mode == "f" or $options.mode == "ft":
-    Rscript '$__tool_directory__/MT2MQ.R' '$options.input_files' '$options.mode' $options.ontology '$mq_output'
+    Rscript --vanilla '$__tool_directory__/MT2MQ.R' '$options.input_files' '$options.mode' '$options.ontology' '$int_output' '$func_output' '$tax_output'
 #elif $options.mode == "t":
     mkdir in_dir
     #for $input in $options.input_files:
         && cp '$input' 'in_dir/${input.name.rsplit('.',1)[0]}'
     #end for
-    && Rscript '$__tool_directory__/MT2MQ.R' in_dir t NA '$mq_output'
+    && Rscript --vanilla '$__tool_directory__/MT2MQ.R' in_dir t NA '$int_output' '$func_output' '$tax_output'
 #end if
 
     ]]>
@@ -49,7 +50,14 @@
     </inputs>
     
     <outputs>
-        <data name="mq_output" format="tabular" label="${options.mode}_output.tabular"/>
+        <data name="int_output" format="tabular" label="${options.mode}_int.tabular"/>
+        <data name="func_output" format="tabular" label="func.tabular">
+            <filter>options['mode'] == "f"</filter>
+        </data>
+        <data name="tax_output" format="tabular" label="tax.tabular">
+            <filter>options['mode'] == "t"</filter>
+        </data>
+
     </outputs>
     
     
@@ -60,7 +68,7 @@
                 <param name="input_files" value="T4A.tsv,T4B.tsv,T4C.tsv,T7A.tsv,T7B.tsv,T7C.tsv" ftype="tsv"/>
                 <param name="ontology" value="NA"/>
             </conditional>
-            <output name="mq_output">
+            <output name="int_output">
                 <assert_contents>
                     <has_text text="rank"/>
                     <has_text text="genus"/>
@@ -74,7 +82,7 @@
                 <param name="input_files" value="T4T7_func.tsv" ftype="tsv"/>
                 <param name="ontology" value="molecular_function"/>
             </conditional>
-            <output name="mq_output">
+            <output name="int_output">
                 <assert_contents>
                     <has_text text="namespace"/>
                     <has_text text="molecular_function"/>
@@ -88,7 +96,7 @@
                 <param name="input_files" value="T4T7_func.tsv" ftype="tsv"/>
                 <param name="ontology" value="biological_process"/>
             </conditional>
-            <output name="mq_output">
+            <output name="int_output">
                 <assert_contents>
                     <has_text text="namespace"/>
                     <has_text text="genus"/>
@@ -111,21 +119,16 @@
 
 - **Taxonomic**: takes in genus-level MetaPhlAn2 results for each sample. The input files should be named as the sample. 
 
-	- Output: a single tabular file formatted for use as input for Metaquantome's taxonomic mode.
+	- Output: a taxonomy file and an intensity file to use in Metaquantome's taxonomy mode. The "peptide" column name is "id" and the taxon column name is "tax".
 
 - **Functional**: takes in a single file of HUMAnN2 results, regrouped and renamed to GO terms, with all samples joined together into one table, and renormalized to CPM. See the MT2MQ functional workflow for these processing steps. User can choose which GO namespace(s) to include.
 
-	- Output: a single tabular file formatted for use as input for Metaquantome's functional mode.
+	- Output: a function file and an intensity file to use in Metaquantome's functional mode. The "peptide" column name is "id" and the functional column name is "gos". 
 
 - **Functional/taxonomic**: takes the same input as the functional mode. User can choose which GO namespace(s) to include.
 
 	- Output: a single tabular file including all GO terms and the taxa which express them and their abundances for each sample. This file *cannot* be used as input for Metaquantome.
 
-**Outputs**:
-------------
-
-MT2MQ produces a single tabular output, formatted to be used as input for Metaquantome or for other analysis.
-
     ]]></help>
     
     <citations>