# HG changeset patch # User artbio # Date 1657057263 0 # Node ID 2823044430c13c95f88bd60033815cfc3b36fb91 # Parent 134b19acf8767547b93dd445ffe7186ceaf617aa planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit bad92e3210a78b5ebf47d6950f4dba10c1cbf07d diff -r 134b19acf876 -r 2823044430c1 mutational_patterns.R --- a/mutational_patterns.R Wed Oct 27 00:46:07 2021 +0000 +++ b/mutational_patterns.R Tue Jul 05 21:41:03 2022 +0000 @@ -1,7 +1,8 @@ # load packages that are provided in the conda env -options(show.error.messages = F, +options(show.error.messages = FALSE, error = function() { - cat(geterrmessage(), file = stderr()); q("no", 1, F) + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") @@ -202,16 +203,19 @@ # (For larger datasets it is wise to perform more iterations by changing the nrun parameter # to achieve stability and avoid local minima) nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) - # Assign signature names - colnames(nmf_res$signatures) <- paste0("SBS", 1:opt$newsignum) - rownames(nmf_res$contribution) <- paste0("SBS", 1:opt$newsignum) + # Assign signature COSMICv3.2 names + cosmic_signatures <- get_known_signatures() + nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = 0.85) + sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures) + plot_cosine_sim <- plot_cosine_heatmap(sim_matrix) + grid.arrange(plot_cosine_sim) # Plot the 96-profile of the signatures: p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) - newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = T), + newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), "[", new_sig_matrix$substitution, "]", - gsub("^.\\.", "", new_sig_matrix$context, perl = T)) + gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") grid.arrange(p5) @@ -329,8 +333,8 @@ fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) # Plot contribution barplots - pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "absolute") - pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "relative") + pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute") + pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative") if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs pc3_data <- pc3$data pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + @@ -397,7 +401,7 @@ level = rep("nolabels", length(fit_res_contrib[, 1])), fit_res_contrib, sum = rowSums(fit_res_contrib)) - worklist <- worklist[order(worklist[, "sum"], decreasing = T), ] + worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ] worklist <- worklist[1:opt$signum, ] worklist <- worklist[, -length(worklist[1, ])] worklist <- melt(worklist) @@ -405,10 +409,10 @@ } else { worklist <- list() for (i in levels(factor(levels_table$level))) { - fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]] + worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] sum <- rowSums(as.data.frame(worklist[[i]])) worklist[[i]] <- cbind(worklist[[i]], sum) - worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = T), ] + worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ] worklist[[i]] <- worklist[[i]][1:opt$signum, ] worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] } @@ -424,7 +428,7 @@ p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + geom_bar(width = 1, stat = "identity") + geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + - coord_polar("y", start = 0) + facet_wrap(.~sample) + + coord_polar("y", start = 0) + facet_wrap(. ~ sample) + labs(x = "", y = "Samples", fill = tag) + scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), values = signature_colors[levels(worklist$signature)], @@ -452,7 +456,7 @@ output_table <- data.frame(sample = rownames(output_table), output_table) colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) } - write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) + write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE) } # calculate all pairwise cosine similarities diff -r 134b19acf876 -r 2823044430c1 mutational_patterns.xml --- a/mutational_patterns.xml Wed Oct 27 00:46:07 2021 +0000 +++ b/mutational_patterns.xml Tue Jul 05 21:41:03 2022 +0000 @@ -1,15 +1,15 @@ - + from genomic variations in vcf files - bioconductor-mutationalpatterns - openssl - r-ggplot2 - r-optparse - r-rjson + bioconductor-mutationalpatterns + + r-ggplot2 + r-optparse + r-rjson r-nmf r-gridextra - bioconductor-bsgenome.hsapiens.ucsc.hg19 - bioconductor-bsgenome.hsapiens.ucsc.hg38 + bioconductor-bsgenome.hsapiens.ucsc.hg19 + bioconductor-bsgenome.hsapiens.ucsc.hg38 + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -315,34 +343,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 134b19acf876 -r 2823044430c1 test-data/cosmic_output1.pdf Binary file test-data/cosmic_output1.pdf has changed diff -r 134b19acf876 -r 2823044430c1 test-data/cosmic_output_v3.pdf Binary file test-data/cosmic_output_v3.pdf has changed diff -r 134b19acf876 -r 2823044430c1 test-data/denovo_1.RData Binary file test-data/denovo_1.RData has changed diff -r 134b19acf876 -r 2823044430c1 test-data/denovo_output1.pdf Binary file test-data/denovo_output1.pdf has changed diff -r 134b19acf876 -r 2823044430c1 test-data/sig_contrib_table.tsv --- a/test-data/sig_contrib_table.tsv Wed Oct 27 00:46:07 2021 +0000 +++ b/test-data/sig_contrib_table.tsv Tue Jul 05 21:41:03 2022 +0000 @@ -1,5 +1,5 @@ sample SBS1 SBS2 SBS3 SBS4 SBS5 SBS6 SBS7 SBS8 SBS9 SBS10 SBS11 SBS12 SBS13 SBS14 SBS15 SBS16 SBS17 SBS18 SBS19 SBS20 SBS21 SBS22 SBS23 SBS24 SBS25 SBS26 SBS27 SBS28 SBS29 SBS30 -ovary-I.vcf 0.107676737465178 0.0107477401085281 0.263118370815298 0 0.0975192598491968 0.00654300960428024 0 0.121325654310372 0.0675812234604709 0 0.0475946682228466 0.0483240788134274 0.00116766365611527 0 0.0195773794486755 0.128367970358363 0 0 0 0 0 0 0 0 0.0564468988265929 0 0.000297382010539655 0.0237119630501146 0 0 -ovary-H.vcf 0.105483533021363 0.0160722050763363 0.220294683323486 0 0.270646531625838 0 0 0.0632630764001629 0.0651960109780565 0 0.0265849338024135 0 0.00937728828228106 0 0 0.169077458531394 0 0 0 0 0 0 0 0 0.0141372418969508 0 0.0064233897483352 0.0334436473133834 0 0 -lung-G.vcf 0.210828794798222 0.0160042182355152 0.174216713183146 0 0.248848735805233 0 0 0.0079146795135654 0.122852241624819 0 0.0316731744534412 0.0400882001887818 0 0 0 0.0415901687752403 0 0 0.0349534584507835 0 0 0 0 0 0.0686079325155911 0 0.0013880107407027 0.00103367171495887 0 0 -lung-F.vcf 0.272200051777229 0.00523782090972898 0.176429226414213 0 0.107322684968036 0 0 0.0922420552015618 0.0176321903825532 0 0.0308049189266778 0.00421945577891649 0 0 0 0.195343895329217 0 0 0 0 0 0 0 0 0.026099693090191 0 0.0050497865751646 0.0410158172026456 0 0.0264024034438657 +6 0.272200051777229 0.00523782090972898 0.176429226414213 0 0.107322684968036 0 0 0.0922420552015618 0.0176321903825532 0 0.0308049189266778 0.00421945577891649 0 0 0 0.195343895329217 0 0 0 0 0 0 0 0 0.026099693090191 0 0.0050497865751646 0.0410158172026456 0 0.0264024034438657 +7 0.210828794798222 0.0160042182355152 0.174216713183146 0 0.248848735805233 0 0 0.0079146795135654 0.122852241624819 0 0.0316731744534412 0.0400882001887818 0 0 0 0.0415901687752403 0 0 0.0349534584507835 0 0 0 0 0 0.0686079325155911 0 0.0013880107407027 0.00103367171495887 0 0 +8 0.105483533021363 0.0160722050763363 0.220294683323486 0 0.270646531625838 0 0 0.0632630764001629 0.0651960109780565 0 0.0265849338024135 0 0.00937728828228106 0 0 0.169077458531394 0 0 0 0 0 0 0 0 0.0141372418969508 0 0.0064233897483352 0.0334436473133834 0 0 +9 0.107676737465178 0.0107477401085281 0.263118370815298 0 0.0975192598491968 0.00654300960428024 0 0.121325654310372 0.0675812234604709 0 0.0475946682228466 0.0483240788134274 0.00116766365611527 0 0.0195773794486755 0.128367970358363 0 0 0 0 0 0 0 0 0.0564468988265929 0 0.000297382010539655 0.0237119630501146 0 0 diff -r 134b19acf876 -r 2823044430c1 test-data/sig_contrib_table_v3.tsv --- a/test-data/sig_contrib_table_v3.tsv Wed Oct 27 00:46:07 2021 +0000 +++ b/test-data/sig_contrib_table_v3.tsv Tue Jul 05 21:41:03 2022 +0000 @@ -1,5 +1,5 @@ sample SBS1 SBS2 SBS3 SBS4 SBS5 SBS6 SBS7a SBS7b SBS7c SBS7d SBS8 SBS9 SBS10a SBS10b SBS10c SBS10d SBS11 SBS12 SBS13 SBS14 SBS15 SBS16 SBS17a SBS17b SBS18 SBS19 SBS20 SBS21 SBS22 SBS23 SBS24 SBS25 SBS26 SBS27 SBS28 SBS29 SBS30 SBS31 SBS32 SBS33 SBS34 SBS35 SBS36 SBS37 SBS38 SBS39 SBS40 SBS41 SBS42 SBS43 SBS44 SBS45 SBS46 SBS47 SBS48 SBS49 SBS50 SBS51 SBS52 SBS53 SBS54 SBS55 SBS56 SBS57 SBS58 SBS59 SBS60 SBS84 SBS85 SBS86 SBS87 SBS88 SBS89 SBS90 SBS91 SBS92 SBS93 SBS94 -ovary-I.vcf 0.0248743382983054 0 0 0 0 0.0382804313784858 0.000279867877099847 0 0.0523368831013261 0 0.0882193437709409 0 0.00105713908429325 0 0 0 0 0 0.00790452113929918 0 0.0228952649764221 0.035824437322864 0 0 0.0157694785042366 0.00112106011995225 0 0.00901729432205061 0 0 0.0067314503274015 0.0424173427597118 0.00897709595233141 0 0.0134288190900345 0 0.0315118740824124 0.0274274413781644 0.0459722043929115 0.00919119609371442 0.0135875438956959 0 0 0.0813737079036436 0 0.134441510514793 0 0 0 0.00968535679523162 0.0232988957406608 0 0 0 0 0 0.00901138983347702 0.0232219272273402 0 0 0.0140535953847184 0 0 0.0588372989742489 0.126837942673735 0.000414033475201199 0 0 0 0 0.0123975208041955 0 0 0 0 0.00960179280510004 0 0 -ovary-H.vcf 0.0526112623195139 0 0 0.00259659019141931 0 0 0.00441492436160073 0 0.028442412784458 0.00371282908990969 0.0771150268514295 0 0.00469080414948085 0 0 0 0 0 0.0131075005010316 0 0 0.0564087542136608 0 0.00228775187171679 0 0.0197458393231762 0 0.00177238312166065 0 0 0.00886883514891954 0 0.00387092295756819 0.0034150231168899 0.0174808677883371 0 0.0435652656329046 0.001821180738257 0.0264091265962629 0.0107100101651646 0.0174453430547186 0 0 0.0961708355601418 0 0.121116601538177 0 0 0 0 0.0571796516660221 0 0 0 0 0.000214057521674493 0.0111211317159625 0.00829256700318903 0 0.00164798457109997 0.0258483341879308 0.00644458809280953 0 0.0846638702721653 0.122478370005574 0.00222471578608652 0 0.000420172000753205 0 0.00163043517541805 0.000141322538953654 0 0.0574873294316273 0.00242537895433406 0 0 0 0 -lung-G.vcf 0.0823617355865553 0 0 0 0 0.0525713539199432 0.00861488625601096 0 0.0339595873379564 0 0.0456255244916971 0.0145585252714382 0 0.000421381081383047 0 0 0 0 0.00522670784278192 0 0 0.0293382321445568 0.00139649570607906 0 0.0194196611974925 0.0386127956083362 0 0.013789732383022 0 0 0.0100733150661604 0.0349706130241875 0 0 0 0 0.0434093655847984 0.00181825891236839 0.0303474622606777 0 0.0187029901709085 0 0 0.120376065787024 0 0.0599280433448042 0 0 0.00132966985611013 0 0 0 0 0.0039271065334873 0 0 0 0.0280674592931216 0 0 0.035137879879521 0 0 0.0812463129866582 0.0917684488653737 0 0 0.0124734516843239 0 0.00901316909994899 0.00681265646517624 0 0.0647011123580966 0 0 0 0 0 -lung-F.vcf 0.125106083697333 0 0.0894527459328343 0 0 0 0 0 0.0324668080771622 0 0.0672874649502956 0 0.00268401866175131 0.00917023093306937 0 0 0 0 0 0 0 0.0451906045983062 0 0 0.00910592930802089 0.0167722110675364 0 0.0038298515888168 0 0 0.009531234514015 0 0 0 0.0232363343868912 0 0.062330351090351 0.00493364473237903 0.0324520312030243 0.00486738928923164 0.0100557162196348 0 0 0.0888476521405422 0 0.0556313157931215 0 0 0 0 0.020753584501296 0 0 0.0183541664469706 0 0.000330058861573153 0.00181335415290947 0.0123934694096205 0 0.00196180738612452 0.0392499133494657 0 0 0.0247940538178981 0.120319900452309 0.00358101555748017 0 0.0122742608109216 0.000420655172516775 0 0.0145849808127487 0 0.0316331756761206 0.00458398540772828 0 0 0 0 +6 0.125106083697333 0 0.0894527459328343 0 0 0 0 0 0.0324668080771622 0 0.0672874649502956 0 0.00268401866175131 0.00917023093306937 0 0 0 0 0 0 0 0.0451906045983062 0 0 0.00910592930802089 0.0167722110675364 0 0.0038298515888168 0 0 0.009531234514015 0 0 0 0.0232363343868912 0 0.062330351090351 0.00493364473237903 0.0324520312030243 0.00486738928923164 0.0100557162196348 0 0 0.0888476521405422 0 0.0556313157931215 0 0 0 0 0.020753584501296 0 0 0.0183541664469706 0 0.000330058861573153 0.00181335415290947 0.0123934694096205 0 0.00196180738612452 0.0392499133494657 0 0 0.0247940538178981 0.120319900452309 0.00358101555748017 0 0.0122742608109216 0.000420655172516775 0 0.0145849808127487 0 0.0316331756761206 0.00458398540772828 0 0 0 0 +7 0.0823617355865553 0 0 0 0 0.0525713539199432 0.00861488625601096 0 0.0339595873379564 0 0.0456255244916971 0.0145585252714382 0 0.000421381081383047 0 0 0 0 0.00522670784278192 0 0 0.0293382321445568 0.00139649570607906 0 0.0194196611974925 0.0386127956083362 0 0.013789732383022 0 0 0.0100733150661604 0.0349706130241875 0 0 0 0 0.0434093655847984 0.00181825891236839 0.0303474622606777 0 0.0187029901709085 0 0 0.120376065787024 0 0.0599280433448042 0 0 0.00132966985611013 0 0 0 0 0.0039271065334873 0 0 0 0.0280674592931216 0 0 0.035137879879521 0 0 0.0812463129866582 0.0917684488653737 0 0 0.0124734516843239 0 0.00901316909994899 0.00681265646517624 0 0.0647011123580966 0 0 0 0 0 +8 0.0526112623195139 0 0 0.00259659019141931 0 0 0.00441492436160073 0 0.028442412784458 0.00371282908990969 0.0771150268514295 0 0.00469080414948085 0 0 0 0 0 0.0131075005010316 0 0 0.0564087542136608 0 0.00228775187171679 0 0.0197458393231762 0 0.00177238312166065 0 0 0.00886883514891954 0 0.00387092295756819 0.0034150231168899 0.0174808677883371 0 0.0435652656329046 0.001821180738257 0.0264091265962629 0.0107100101651646 0.0174453430547186 0 0 0.0961708355601418 0 0.121116601538177 0 0 0 0 0.0571796516660221 0 0 0 0 0.000214057521674493 0.0111211317159625 0.00829256700318903 0 0.00164798457109997 0.0258483341879308 0.00644458809280953 0 0.0846638702721653 0.122478370005574 0.00222471578608652 0 0.000420172000753205 0 0.00163043517541805 0.000141322538953654 0 0.0574873294316273 0.00242537895433406 0 0 0 0 +9 0.0248743382983054 0 0 0 0 0.0382804313784858 0.000279867877099847 0 0.0523368831013261 0 0.0882193437709409 0 0.00105713908429325 0 0 0 0 0 0.00790452113929918 0 0.0228952649764221 0.035824437322864 0 0 0.0157694785042366 0.00112106011995225 0 0.00901729432205061 0 0 0.0067314503274015 0.0424173427597118 0.00897709595233141 0 0.0134288190900345 0 0.0315118740824124 0.0274274413781644 0.0459722043929115 0.00919119609371442 0.0135875438956959 0 0 0.0813737079036436 0 0.134441510514793 0 0 0 0.00968535679523162 0.0232988957406608 0 0 0 0 0 0.00901138983347702 0.0232219272273402 0 0 0.0140535953847184 0 0 0.0588372989742489 0.126837942673735 0.000414033475201199 0 0 0 0 0.0123975208041955 0 0 0 0 0.00960179280510004 0 0 diff -r 134b19acf876 -r 2823044430c1 test-data/sigmatrix.tab --- a/test-data/sigmatrix.tab Wed Oct 27 00:46:07 2021 +0000 +++ b/test-data/sigmatrix.tab Tue Jul 05 21:41:03 2022 +0000 @@ -1,4 +1,4 @@ -Type SBS1 SBS2 SBS3 SBS4 +Type SBSA SBSB SBS5-like SBSC A[C>A]A 2.537216e-02 2.002096e-02 1.710480e-02 2.141184e-02 A[C>A]C 9.450873e-03 1.458332e-02 1.433290e-02 8.146530e-15 A[C>A]G 2.154737e-06 2.245761e-03 7.951470e-03 3.165761e-03 diff -r 134b19acf876 -r 2823044430c1 test-data/spectrum_output1.pdf Binary file test-data/spectrum_output1.pdf has changed diff -r 134b19acf876 -r 2823044430c1 test-data/user_output.pdf Binary file test-data/user_output.pdf has changed