# HG changeset patch
# User lecorguille
# Date 1458681648 14400
# Node ID 924ff5a24122b9c629464b221902d188e5c5fa31
planemo upload commit 0921bc80e825d98964cf7a72c57facdb593ae355-dirty
diff -r 000000000000 -r 924ff5a24122 Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile Tue Mar 22 17:20:48 2016 -0400
@@ -0,0 +1,23 @@
+# USAGE: make [install|clean]
+
+# -------- VARIABLE --------
+
+OBJ=anova.tgz
+DEP=abims_anova.r abims_anova.xml static test-data
+
+
+# ------------------------
+
+all: $(OBJ)
+
+$(OBJ): $(DEP)
+ tar --exclude=".svn" -zchf $@ $^
+
+# ------------------------
+
+install: $(OBJ)
+ mv *.tgz ~
+
+clean:
+ rm *.tgz
+
diff -r 000000000000 -r 924ff5a24122 abims_anova.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/abims_anova.r Tue Mar 22 17:20:48 2016 -0400
@@ -0,0 +1,132 @@
+#!/usr/local/public/bin/Rscript
+# version="1.1"
+
+# date: 06-06-2012
+# update: 18-02-2014
+# **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr
+
+# abims_anova.r version 20140218
+
+library(batch)
+
+
+# function avova
+anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") {
+
+
+ if (sep=="tabulation") sep="\t"
+ if (sep=="semicolon") sep=";"
+ if (sep=="comma") sep=","
+
+ anova_formula_operator = "+"
+ if (interaction) anova_formula_operator = "*"
+
+ # -- import --
+ data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA")
+
+ if (mode == "row") data=t(data)
+
+ sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"")
+ rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab))
+
+
+ # -- group --
+ match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab))
+ if (sum(is.na(match_data_sampleinfoTab)) > 0) {
+ write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr())
+ write("You may need to use change the mode (column/row)", stderr())
+ write("10 first sample names in the data matrix:", stderr())
+ write(head(colnames(data)), stderr())
+ write("10 first sample names in the sample info:", stderr())
+ write(head(rownames(sampleinfoTab)), stderr())
+ quit("no",status=10)
+ }
+
+
+ # -- anova --
+
+ # formula
+ grps=list()
+ anova_formula_s = "data ~ "
+ cat("\ncontrasts:\n")
+ for (i in 1:length(condition)) {
+ grps[[i]] = factor(sampleinfoTab[,condition[i]][match_data_sampleinfoTab])
+ anova_formula_s = paste(anova_formula_s, "grps[[",i,"]]",anova_formula_operator, sep="")
+ cat(condition[i],"\t",levels(grps[[i]]),"\n")
+ # write("Current groups: ", stderr())
+ # write(grp[[i]], stderr())
+ }
+ anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1)
+ anova_formula = as.formula(anova_formula_s)
+
+
+
+ # anova
+ manovaObjectList = manova(anova_formula)
+ manovaList = summary.aov(manovaObjectList)
+
+ # condition renaming
+ manovaRownames = gsub(" ","",rownames(manovaList[[1]]))
+ manovaNbrPvalue = length(manovaRownames)-1
+ manovaRownames = manovaRownames[-(manovaNbrPvalue+1)]
+
+ for (i in 1:length(condition)) {
+ manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames)
+ anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s)
+ }
+
+ # log
+ cat("\nanova_formula",anova_formula_s,"\n")
+
+ # p-value
+ aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]})
+ if(length(condition) == 1) aovPValue = t(aovPValue)
+ rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="")
+
+ # p-value adjusted
+ if(length(condition) == 1) {
+ aovAdjPValue = t(p.adjust(aovPValue,method=method))
+ } else {
+ aovAdjPValue = apply(aovPValue,2,p.adjust, method=method)
+ }
+ rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="")
+
+ # selection
+ colSumThreshold = colSums(aovAdjPValue <= threshold)
+ if (selection_method == "intersection") {
+ datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )]
+ } else {
+ datafiltered = data[,colSumThreshold != 0]
+ }
+
+ #data=rbind(data, aovPValue, aovAdjPValue)
+ data=rbind(data, aovAdjPValue)
+
+
+ if (mode == "row") {
+ data=t(data)
+ datafiltered=t(datafiltered)
+ }
+
+ # -- output / return --
+ write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA)
+ write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA)
+
+ # log
+ cat("\nthreshold:",threshold,"\n")
+ cat("result:",nrow(datafiltered),"/",nrow(data),"\n")
+
+ quit("no",status=0)
+}
+
+# log
+cat("ANOVA\n\n")
+cat("Arguments\n")
+args <- commandArgs(trailingOnly = TRUE)
+print(args)
+
+listArguments = parseCommandArgs(evaluate=FALSE)
+do.call(anova, listArguments)
+
+
+
diff -r 000000000000 -r 924ff5a24122 abims_anova.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/abims_anova.xml Tue Mar 22 17:20:48 2016 -0400
@@ -0,0 +1,257 @@
+
+
+ N-way anova. With ou Without interactions
+
+
+ R
+ Rscript
+
+
+
+
+
+
+
+abims_anova.r file "$input" sampleinfo "$sampleinfo" mode "$mode"
+condition "c('$condition_1'
+#for $i, $s in enumerate( $conditions )
+,'${s.condition}'
+#end for
+)"
+interaction $interaction method $method threshold $threshold selection_method $selection_method sep "$sep" dec "$dec" outputdatapvalue $dataMatrixPValue outputdatafiltered $dataMatrixFiltered
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr
+
+---------------------------------------------------
+
+=====
+Anova
+=====
+
+-----------
+Description
+-----------
+
+Analysis of variance (ANOVA) is used to analyze the differences between group means and their associated procedures,
+in which the observed variance in a particular variable is partitioned into components attributable to different sources of variation.
+
+
+
+-----------
+Input files
+-----------
+
++---------------------------+------------+
+| Parameter : num + label | Format |
++===========================+============+
+| 1 : Data Matrix file | Tabular |
++---------------------------+------------+
+| 2 : Sample Metadata file | Tabular |
++---------------------------+------------+
+
+
+
+------------
+Output files
+------------
+
+
+
+***.anova_pvalue.tabular**
+
+ | A tabular file which represents for each metabolite (row), the value of the intensity in each sample (column) + two columns (aovPValue and aovAdjPValue).
+
+***.anova_filtered.tabular**
+
+ | The tabular file xset.anova_pvalue.tabular containing only the metabolites that have been filtered by aovAdjPValue.
+
+
+------
+
+.. class:: infomark
+
+The outputs ***.anova_filtered.tabular** or ***.anova_pvalue.tabular** are tabular files. You can continue your analysis using it in the following tools:
+ | PCA
+ | Hierarchical Clustering
+
+
+
+---------------------------------------------------
+
+---------------
+Working example
+---------------
+
+
+Input files
+-----------
+
+**>A part of an example of Data Matrix file input**
+
+
++--------+------------------+----------------+
+| Name | Bur-eH_FSP_12 | Bur-eH_FSP_24 |
++========+==================+================+
+|M202T601| 91206595.7559783 |106808979.08546 |
++--------+------------------+----------------+
+|M234T851| 27249137.275504 |28824971.3177926|
++--------+------------------+----------------+
+
+**>A part of an example of Sample Metadata file input**
+
+
++---------------------------+------------+------------+------------+
+| Sample name | class | time | batch |
++===========================+============+============+============+
+| Bur-eH_FSP_12 | Bur-eH | 12 | 1 |
++---------------------------+------------+------------+------------+
+| Bur-eH_FSP_24 | Bur-eH | 24 | 1 |
++---------------------------+------------+------------+------------+
+| Bur-NI_FSP_12 | Bur-NI | 12 | 2 |
++---------------------------+------------+------------+------------+
+| Bur-NI_FSP_24 | Bur-NI | 24 | 2 |
++---------------------------+------------+------------+------------+
+
+Parameters
+----------
+
+ | Mode -> **row**
+ | column name of condition -> **class**
+ | Separator of columns: -> **tabulation**
+ | Decimal separator -> **.**
+ | PValue adjusted method -> **BH**
+ | Threshold -> **0.001**
+
+
+
+Output files
+------------
+
+**Part of an example of xset.anova_filtered.tabular:**
+
+.. image:: anova_pvalue.png
+
+**Part of an example of xset.anova_pvalue.tabular:**
+
+.. image:: anova_filtered.png
+
+
+---------------------------------------------------
+
+
+---------------------------
+Position in workflow4metabo
+---------------------------
+
+
+**Upstream tools**
+
++---------------------------+----------------------------------------+--------+------------------------+
+| Name | Output file | Format | parameter |
++===========================+========================================+========+========================+
+|xcms.diffreport |xset.diffreport.data_matrix.tsv | Tabular| Data table file |
++---------------------------+----------------------------------------+--------+------------------------+
+|CAMERA.annotateDiffreport |xset.annotatediffreport.data_matrix.tsv | Tabular| Data table file |
++---------------------------+----------------------------------------+--------+------------------------+
+|xcms.xcmsSet |sample_info.tab | Tabular| sample info table file |
++---------------------------+----------------------------------------+--------+------------------------+
+
+
+**Downstream tools**
+
++---------------------------+---------------------------------------------------------+--------+
+| Name | Output file | Format |
++===========================+=========================================================+========+
+|PCA |xset.anova_pvalue.tabular OR xset.anova_filtered.tabular | Tabular|
++---------------------------+---------------------------------------------------------+--------+
+|Hierarchical Clustering |xset.anova_pvalue.tabular OR xset.anova_filtered.tabular | Tabular|
++---------------------------+---------------------------------------------------------+--------+
+
+
+**General schema of the metabolomic workflow**
+
+.. image:: anova_workflow.png
+
+
+
+
+ Ronald A. Fisher in the 1930s
+
+
+
diff -r 000000000000 -r 924ff5a24122 static/images/anova_filtered.png
Binary file static/images/anova_filtered.png has changed
diff -r 000000000000 -r 924ff5a24122 static/images/anova_pvalue.png
Binary file static/images/anova_pvalue.png has changed
diff -r 000000000000 -r 924ff5a24122 static/images/anova_workflow.png
Binary file static/images/anova_workflow.png has changed
diff -r 000000000000 -r 924ff5a24122 test-data/dataMatrix.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dataMatrix.tsv Tue Mar 22 17:20:48 2016 -0400
@@ -0,0 +1,16 @@
+dataMatrix HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208
+HMDB03193 76043 412165 44943 27242 436566 173175 242549 57066 559869 3732 339188 471368 262271 127285 451270 212500 79673 NA 891129 43907
+HMDB01101 30689 6877586 52217 3158 10789748 229568 4763576 3878773 976436 831937 608298 1605075 72021 442510 1107705 1464339 31250 2724553 72900 32742
+HMDB10348 47259 544877 60885 34582 529874 168264 176500 76457 610110 16262 279156 524468 451573 591487 433529 161069 214392 13781 1580343 39315
+HMDB59717 357351 1030464 301983 67604 306862 1028110 1530493 270027 1378535 289677 808334 1132813 871209 895435 715190 1563158 784738 146195 994336 239030
+HMDB00822 483755 579287 1132413 157113 1577570 1469735 1085454 477909 814755 245417 610681 763706 2406336 827531 992508 569605 355321 150259 1334200 271010
+HMDB13189 2644620 727587 1661412 619181 136278 2755434 593863 837865 3526136 2003278 1608814 3446611 1941527 113937 3132404 2893445 2092753 1034666 1517319 841661
+HMDB00299 250551 1046138 456162 159386 1013302 808657 614370 250403 768004 242085 504108 1014041 1362408 1057660 1110050 566050 411886 142233 1992420 284775
+HMDB00191 560002 771533 575790 392284 888498 785428 645785 591569 960658 910201 639437 1092885 1409045 2292023 1246459 1945577 710519 773384 1061418 622898
+HMDB00518 34236 58249 85944 NA 342102 129886 175800 13154 230242 NA 440223 315368 10657 419508 48673 28361 514579 23108 867108 73831
+HMDB00715 1252089 2547452 905408 371059 4983588 5140022 2658555 814523 2558923 859466 4184204 3865723 3236644 2615560 3820724 3577833 2295288 625924 7517724 1341900
+HMDB01032 2569205 26023086 1604999 430453 8103558 26222916 257139 675754 59906109 263055 31151730 18648127 14989438 1554658 20249262 5588731 871010 15920 9120781 44276
+HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 3359238 2540044
+HMDB04824 374028 1144386 539206 178517 1046190 959381 605191 310260 1253319 477259 477995 825691 1157093 1089284 1411802 1020206 782673 346761 1824553 387811
+HMDB00512 53304 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 438839 85554
+HMDB00251 368600 616555 94936 622468 180988 293988 352855 767894 268331 167246 310918 1248919 577184 10985 335711 403815 80614 63393 454489 616061
diff -r 000000000000 -r 924ff5a24122 test-data/dataMatrix.tsv_anova_filtered.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dataMatrix.tsv_anova_filtered.tabular Tue Mar 22 17:20:48 2016 -0400
@@ -0,0 +1,3 @@
+ HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208
+HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 3359238 2540044
+HMDB00512 53304 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 438839 85554
diff -r 000000000000 -r 924ff5a24122 test-data/dataMatrix.tsv_anova_pvalue.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dataMatrix.tsv_anova_pvalue.tabular Tue Mar 22 17:20:48 2016 -0400
@@ -0,0 +1,16 @@
+ HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208 pvalueadjusted.BH.age pvalueadjusted.BH.gender
+HMDB03193 76043 412165 44943 27242 436566 173175 242549 57066 559869 3732 339188 471368 262271 127285 451270 212500 79673 NA 891129 43907 0.740856975457218 0.740856975457218
+HMDB01101 30689 6877586 52217 3158 10789748 229568 4763576 3878773 976436 831937 608298 1605075 72021 442510 1107705 1464339 31250 2724553 72900 32742 0.347623051099051 0.234554902330543
+HMDB10348 47259 544877 60885 34582 529874 168264 176500 76457 610110 16262 279156 524468 451573 591487 433529 161069 214392 13781 1580343 39315 0.720303175717481 0.720303175717481
+HMDB59717 357351 1030464 301983 67604 306862 1028110 1530493 270027 1378535 289677 808334 1132813 871209 895435 715190 1563158 784738 146195 994336 239030 0.252979062487671 0.517822514350462
+HMDB00822 483755 579287 1132413 157113 1577570 1469735 1085454 477909 814755 245417 610681 763706 2406336 827531 992508 569605 355321 150259 1334200 271010 0.808532959173048 0.808532959173048
+HMDB13189 2644620 727587 1661412 619181 136278 2755434 593863 837865 3526136 2003278 1608814 3446611 1941527 113937 3132404 2893445 2092753 1034666 1517319 841661 0.329174238111018 0.329174238111018
+HMDB00299 250551 1046138 456162 159386 1013302 808657 614370 250403 768004 242085 504108 1014041 1362408 1057660 1110050 566050 411886 142233 1992420 284775 0.995859884733937 0.995859884733937
+HMDB00191 560002 771533 575790 392284 888498 785428 645785 591569 960658 910201 639437 1092885 1409045 2292023 1246459 1945577 710519 773384 1061418 622898 0.15341384456659 0.15341384456659
+HMDB00518 34236 58249 85944 NA 342102 129886 175800 13154 230242 NA 440223 315368 10657 419508 48673 28361 514579 23108 867108 73831 0.439012867631325 0.596200901535843
+HMDB00715 1252089 2547452 905408 371059 4983588 5140022 2658555 814523 2558923 859466 4184204 3865723 3236644 2615560 3820724 3577833 2295288 625924 7517724 1341900 0.814406453193777 0.814406453193777
+HMDB01032 2569205 26023086 1604999 430453 8103558 26222916 257139 675754 59906109 263055 31151730 18648127 14989438 1554658 20249262 5588731 871010 15920 9120781 44276 0.288354536353544 0.288354536353544
+HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 3359238 2540044 0.659785284053633 0.00376251274734483
+HMDB04824 374028 1144386 539206 178517 1046190 959381 605191 310260 1253319 477259 477995 825691 1157093 1089284 1411802 1020206 782673 346761 1824553 387811 0.646714644805001 0.646714644805001
+HMDB00512 53304 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 438839 85554 0.13976111393526 0.0491480895853803
+HMDB00251 368600 616555 94936 622468 180988 293988 352855 767894 268331 167246 310918 1248919 577184 10985 335711 403815 80614 63393 454489 616061 0.565910223149305 0.565910223149305
diff -r 000000000000 -r 924ff5a24122 test-data/sampleMetadata.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sampleMetadata.tsv Tue Mar 22 17:20:48 2016 -0400
@@ -0,0 +1,21 @@
+sampleMetadata injectionOrder mode age bmi gender
+HU_017 2 pos 40-50 23.03 M
+HU_028 7 pos 40-50 23.92 F
+HU_034 9 pos 50-60 23.37 M
+HU_051 20 pos 20-30 23.23 F
+HU_060 24 pos 50-60 28.72 F
+HU_078 34 pos 40-50 25.18 M
+HU_091 42 pos 60-70 26.12 M
+HU_093 43 pos 50-60 21.71 M
+HU_099 46 pos 20-30 21.3 M
+HU_110 53 pos 50-60 20.9 F
+HU_130 63 pos 30-40 26.06 M
+HU_134 67 pos 40-50 22.89 M
+HU_138 68 pos 40-50 21.88 M
+HU_149 72 pos 30-40 19.49 F
+HU_152 75 pos 20-30 17.58 F
+HU_175 87 pos 30-40 21.26 F
+HU_178 88 pos 60-70 32.87 F
+HU_185 95 pos 40-50 21.09 F
+HU_204 104 pos 30-40 29.06 M
+HU_208 106 pos 20-30 18.61 F
diff -r 000000000000 -r 924ff5a24122 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Mar 22 17:20:48 2016 -0400
@@ -0,0 +1,6 @@
+
+
+
+
+
+