# HG changeset patch # User galaxyp # Date 1597268163 0 # Node ID 3088377510dc1f72af244700ddbd21eba3d2363e # Parent a1775ba76f0ac40cb2c5854fc2226fc3fc7c90bb "planemo upload commit 59afcdaf7afdf574c475f0faae73127f0e563328" diff -r a1775ba76f0a -r 3088377510dc MT2MQ.R --- a/MT2MQ.R Fri Jun 26 15:13:32 2020 +0000 +++ b/MT2MQ.R Wed Aug 12 21:36:03 2020 +0000 @@ -2,10 +2,10 @@ # Load libraries suppressPackageStartupMessages(library(tidyverse)) -#default_locale() +suppressPackageStartupMessages(library(taxize)) # Set parameters from arguments -args = commandArgs(trailingOnly = TRUE) +args <- commandArgs(trailingOnly = TRUE) data <- args[1] # data: full path to file or directory: # - if in functional or f-t mode, should be a tsv file of HUMAnN2 gene families, after regrouping and renaming to GO, joining samples, and renormalizing to CPM. @@ -18,49 +18,67 @@ ontology <- unlist(strsplit(args[3], split = ",")) # ontology: only for function or f-t mode. A string of the GO namespace(s) to include, separated by commas. # ex: to include all: "molecular_function,biological_process,cellular_component" -outfile <- args[4] - # outfile: full path with pathname and extension for output + +int_file <- args[4] + # int_file: full path and file name and extension to write intensity file + +func_file <- args[5] + # func_file: full path and file name and extension to write func file + +tax_file <- args[6] + # tax_file: full path and file name and extension to write tax file + # Functional mode -if (mode == "f"){ - out <- read.delim(file=data, header=TRUE, sep='\t') %>% - filter(!grepl(".+g__.+",X..Gene.Family)) %>% - separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% - separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% - mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% - filter(namespace %in% ontology) %>% +if (mode == "f") { + int <- read.delim(file = data, header = TRUE, sep = "\t") %>% + filter(!grepl(".+g__.+", X..Gene.Family)) %>% + separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>% + separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>% + mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% + filter(namespace %in% ontology) %>% select(id, name, namespace, 4:ncol(.)) + func <- int %>% + select(id) %>% + mutate(gos = id) + write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE) + write.table(x = func, file = func_file, quote = FALSE, sep = "\t", row.names = FALSE) } # Taxonomic mode -if (mode == "t"){ +if (mode == "t") { files <- dir(path = data) - out <- tibble(filename = files) %>% - mutate(file_contents= map(filename, ~read.delim(file=file.path(data, .), header=TRUE, sep = "\t"))) %>% - unnest(cols = c(file_contents)) %>% - rename(sample = filename) %>% - separate(col = sample, into = c("sample",NA), sep=".tsv") %>% - pivot_wider(names_from = sample, values_from = abundance) %>% - mutate(rank = "genus") %>% - rename(name = genus) %>% - mutate(id = row_number(name)) %>% # filler for taxon id but should eventually find a way to get id from ncbi database + int <- tibble(filename = files) %>% + mutate(file_contents = map(filename, ~read.delim(file = file.path(data, .), header = TRUE, sep = "\t"))) %>% + unnest(cols = c(file_contents)) %>% + rename(sample = filename) %>% + separate(col = sample, into = c("sample", NA), sep = ".tsv") %>% + pivot_wider(names_from = sample, values_from = abundance) %>% + mutate(rank = "genus") %>% + rename(name = genus) %>% + mutate(name = as.character(name)) %>% + mutate(id = get_uid(name, key = NULL, messages = FALSE)) %>% select(id, name, rank, 2:ncol(.)) + tax <- int %>% + select(id) %>% + mutate(tax = id) + write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE) + write.table(x = tax, file = tax_file, quote = FALSE, sep = "\t", row.names = FALSE) } # Function-taxonomy mode -if (mode == "ft"){ - out <- read.delim(file=data, header=TRUE, sep='\t') %>% - filter(grepl(".+g__.+",X..Gene.Family)) %>% - separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% - separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% - separate(col = name, into = c("name", "taxa"), sep="\\|", extra = "merge") %>% - separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% select(-"Extra") %>% - mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% - mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% - mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% - filter(namespace %in% ontology) %>% +if (mode == "ft") { + ft <- read.delim(file = data, header = TRUE, sep = "\t") %>% + filter(grepl(".+g__.+", X..Gene.Family)) %>% + separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>% + separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>% + separate(col = name, into = c("name", "taxa"), sep = "\\|", extra = "merge") %>% + separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% + select(-"Extra") %>% + mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% + mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% + mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% + filter(namespace %in% ontology) %>% select(id, name, namespace, 4:ncol(.)) + write.table(x = ft, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE) } - -# Write file -write.table(x = out, file = outfile, quote = FALSE, sep = "\t", row.names = FALSE) diff -r a1775ba76f0a -r 3088377510dc MT2MQ.xml --- a/MT2MQ.xml Fri Jun 26 15:13:32 2020 +0000 +++ b/MT2MQ.xml Wed Aug 12 21:36:03 2020 +0000 @@ -1,18 +1,19 @@ - + Tool to prepare metatranscriptomic outputs from ASaiM for Metaquantome r-tidyverse + r-taxize @@ -49,7 +50,14 @@ - + + + options['mode'] == "f" + + + options['mode'] == "t" + + @@ -60,7 +68,7 @@ - + @@ -74,7 +82,7 @@ - + @@ -88,7 +96,7 @@ - + @@ -111,21 +119,16 @@ - **Taxonomic**: takes in genus-level MetaPhlAn2 results for each sample. The input files should be named as the sample. - - Output: a single tabular file formatted for use as input for Metaquantome's taxonomic mode. + - Output: a taxonomy file and an intensity file to use in Metaquantome's taxonomy mode. The "peptide" column name is "id" and the taxon column name is "tax". - **Functional**: takes in a single file of HUMAnN2 results, regrouped and renamed to GO terms, with all samples joined together into one table, and renormalized to CPM. See the MT2MQ functional workflow for these processing steps. User can choose which GO namespace(s) to include. - - Output: a single tabular file formatted for use as input for Metaquantome's functional mode. + - Output: a function file and an intensity file to use in Metaquantome's functional mode. The "peptide" column name is "id" and the functional column name is "gos". - **Functional/taxonomic**: takes the same input as the functional mode. User can choose which GO namespace(s) to include. - Output: a single tabular file including all GO terms and the taxa which express them and their abundances for each sample. This file *cannot* be used as input for Metaquantome. -**Outputs**: ------------- - -MT2MQ produces a single tabular output, formatted to be used as input for Metaquantome or for other analysis. - ]]>