# HG changeset patch # User lecorguille # Date 1458681648 14400 # Node ID 924ff5a24122b9c629464b221902d188e5c5fa31 planemo upload commit 0921bc80e825d98964cf7a72c57facdb593ae355-dirty diff -r 000000000000 -r 924ff5a24122 Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,23 @@ +# USAGE: make [install|clean] + +# -------- VARIABLE -------- + +OBJ=anova.tgz +DEP=abims_anova.r abims_anova.xml static test-data + + +# ------------------------ + +all: $(OBJ) + +$(OBJ): $(DEP) + tar --exclude=".svn" -zchf $@ $^ + +# ------------------------ + +install: $(OBJ) + mv *.tgz ~ + +clean: + rm *.tgz + diff -r 000000000000 -r 924ff5a24122 abims_anova.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_anova.r Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,132 @@ +#!/usr/local/public/bin/Rscript +# version="1.1" + +# date: 06-06-2012 +# update: 18-02-2014 +# **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +# abims_anova.r version 20140218 + +library(batch) + + +# function avova +anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") { + + + if (sep=="tabulation") sep="\t" + if (sep=="semicolon") sep=";" + if (sep=="comma") sep="," + + anova_formula_operator = "+" + if (interaction) anova_formula_operator = "*" + + # -- import -- + data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA") + + if (mode == "row") data=t(data) + + sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"") + rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab)) + + + # -- group -- + match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab)) + if (sum(is.na(match_data_sampleinfoTab)) > 0) { + write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr()) + write("You may need to use change the mode (column/row)", stderr()) + write("10 first sample names in the data matrix:", stderr()) + write(head(colnames(data)), stderr()) + write("10 first sample names in the sample info:", stderr()) + write(head(rownames(sampleinfoTab)), stderr()) + quit("no",status=10) + } + + + # -- anova -- + + # formula + grps=list() + anova_formula_s = "data ~ " + cat("\ncontrasts:\n") + for (i in 1:length(condition)) { + grps[[i]] = factor(sampleinfoTab[,condition[i]][match_data_sampleinfoTab]) + anova_formula_s = paste(anova_formula_s, "grps[[",i,"]]",anova_formula_operator, sep="") + cat(condition[i],"\t",levels(grps[[i]]),"\n") + # write("Current groups: ", stderr()) + # write(grp[[i]], stderr()) + } + anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1) + anova_formula = as.formula(anova_formula_s) + + + + # anova + manovaObjectList = manova(anova_formula) + manovaList = summary.aov(manovaObjectList) + + # condition renaming + manovaRownames = gsub(" ","",rownames(manovaList[[1]])) + manovaNbrPvalue = length(manovaRownames)-1 + manovaRownames = manovaRownames[-(manovaNbrPvalue+1)] + + for (i in 1:length(condition)) { + manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames) + anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s) + } + + # log + cat("\nanova_formula",anova_formula_s,"\n") + + # p-value + aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]}) + if(length(condition) == 1) aovPValue = t(aovPValue) + rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="") + + # p-value adjusted + if(length(condition) == 1) { + aovAdjPValue = t(p.adjust(aovPValue,method=method)) + } else { + aovAdjPValue = apply(aovPValue,2,p.adjust, method=method) + } + rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="") + + # selection + colSumThreshold = colSums(aovAdjPValue <= threshold) + if (selection_method == "intersection") { + datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )] + } else { + datafiltered = data[,colSumThreshold != 0] + } + + #data=rbind(data, aovPValue, aovAdjPValue) + data=rbind(data, aovAdjPValue) + + + if (mode == "row") { + data=t(data) + datafiltered=t(datafiltered) + } + + # -- output / return -- + write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA) + write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA) + + # log + cat("\nthreshold:",threshold,"\n") + cat("result:",nrow(datafiltered),"/",nrow(data),"\n") + + quit("no",status=0) +} + +# log +cat("ANOVA\n\n") +cat("Arguments\n") +args <- commandArgs(trailingOnly = TRUE) +print(args) + +listArguments = parseCommandArgs(evaluate=FALSE) +do.call(anova, listArguments) + + + diff -r 000000000000 -r 924ff5a24122 abims_anova.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_anova.xml Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,257 @@ + + + N-way anova. With ou Without interactions + + + R + Rscript + + + + + + + +abims_anova.r file "$input" sampleinfo "$sampleinfo" mode "$mode" +condition "c('$condition_1' +#for $i, $s in enumerate( $conditions ) +,'${s.condition}' +#end for +)" +interaction $interaction method $method threshold $threshold selection_method $selection_method sep "$sep" dec "$dec" outputdatapvalue $dataMatrixPValue outputdatafiltered $dataMatrixFiltered + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +--------------------------------------------------- + +===== +Anova +===== + +----------- +Description +----------- + +Analysis of variance (ANOVA) is used to analyze the differences between group means and their associated procedures, +in which the observed variance in a particular variable is partitioned into components attributable to different sources of variation. + + + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : Data Matrix file | Tabular | ++---------------------------+------------+ +| 2 : Sample Metadata file | Tabular | ++---------------------------+------------+ + + + +------------ +Output files +------------ + + + +***.anova_pvalue.tabular** + + | A tabular file which represents for each metabolite (row), the value of the intensity in each sample (column) + two columns (aovPValue and aovAdjPValue). + +***.anova_filtered.tabular** + + | The tabular file xset.anova_pvalue.tabular containing only the metabolites that have been filtered by aovAdjPValue. + + +------ + +.. class:: infomark + +The outputs ***.anova_filtered.tabular** or ***.anova_pvalue.tabular** are tabular files. You can continue your analysis using it in the following tools: + | PCA + | Hierarchical Clustering + + + +--------------------------------------------------- + +--------------- +Working example +--------------- + + +Input files +----------- + +**>A part of an example of Data Matrix file input** + + ++--------+------------------+----------------+ +| Name | Bur-eH_FSP_12 | Bur-eH_FSP_24 | ++========+==================+================+ +|M202T601| 91206595.7559783 |106808979.08546 | ++--------+------------------+----------------+ +|M234T851| 27249137.275504 |28824971.3177926| ++--------+------------------+----------------+ + +**>A part of an example of Sample Metadata file input** + + ++---------------------------+------------+------------+------------+ +| Sample name | class | time | batch | ++===========================+============+============+============+ +| Bur-eH_FSP_12 | Bur-eH | 12 | 1 | ++---------------------------+------------+------------+------------+ +| Bur-eH_FSP_24 | Bur-eH | 24 | 1 | ++---------------------------+------------+------------+------------+ +| Bur-NI_FSP_12 | Bur-NI | 12 | 2 | ++---------------------------+------------+------------+------------+ +| Bur-NI_FSP_24 | Bur-NI | 24 | 2 | ++---------------------------+------------+------------+------------+ + +Parameters +---------- + + | Mode -> **row** + | column name of condition -> **class** + | Separator of columns: -> **tabulation** + | Decimal separator -> **.** + | PValue adjusted method -> **BH** + | Threshold -> **0.001** + + + +Output files +------------ + +**Part of an example of xset.anova_filtered.tabular:** + +.. image:: anova_pvalue.png + +**Part of an example of xset.anova_pvalue.tabular:** + +.. image:: anova_filtered.png + + +--------------------------------------------------- + + +--------------------------- +Position in workflow4metabo +--------------------------- + + +**Upstream tools** + ++---------------------------+----------------------------------------+--------+------------------------+ +| Name | Output file | Format | parameter | ++===========================+========================================+========+========================+ +|xcms.diffreport |xset.diffreport.data_matrix.tsv | Tabular| Data table file | ++---------------------------+----------------------------------------+--------+------------------------+ +|CAMERA.annotateDiffreport |xset.annotatediffreport.data_matrix.tsv | Tabular| Data table file | ++---------------------------+----------------------------------------+--------+------------------------+ +|xcms.xcmsSet |sample_info.tab | Tabular| sample info table file | ++---------------------------+----------------------------------------+--------+------------------------+ + + +**Downstream tools** + ++---------------------------+---------------------------------------------------------+--------+ +| Name | Output file | Format | ++===========================+=========================================================+========+ +|PCA |xset.anova_pvalue.tabular OR xset.anova_filtered.tabular | Tabular| ++---------------------------+---------------------------------------------------------+--------+ +|Hierarchical Clustering |xset.anova_pvalue.tabular OR xset.anova_filtered.tabular | Tabular| ++---------------------------+---------------------------------------------------------+--------+ + + +**General schema of the metabolomic workflow** + +.. image:: anova_workflow.png + + + + + Ronald A. Fisher in the 1930s + + + diff -r 000000000000 -r 924ff5a24122 static/images/anova_filtered.png Binary file static/images/anova_filtered.png has changed diff -r 000000000000 -r 924ff5a24122 static/images/anova_pvalue.png Binary file static/images/anova_pvalue.png has changed diff -r 000000000000 -r 924ff5a24122 static/images/anova_workflow.png Binary file static/images/anova_workflow.png has changed diff -r 000000000000 -r 924ff5a24122 test-data/dataMatrix.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dataMatrix.tsv Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,16 @@ +dataMatrix HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208 +HMDB03193 76043 412165 44943 27242 436566 173175 242549 57066 559869 3732 339188 471368 262271 127285 451270 212500 79673 NA 891129 43907 +HMDB01101 30689 6877586 52217 3158 10789748 229568 4763576 3878773 976436 831937 608298 1605075 72021 442510 1107705 1464339 31250 2724553 72900 32742 +HMDB10348 47259 544877 60885 34582 529874 168264 176500 76457 610110 16262 279156 524468 451573 591487 433529 161069 214392 13781 1580343 39315 +HMDB59717 357351 1030464 301983 67604 306862 1028110 1530493 270027 1378535 289677 808334 1132813 871209 895435 715190 1563158 784738 146195 994336 239030 +HMDB00822 483755 579287 1132413 157113 1577570 1469735 1085454 477909 814755 245417 610681 763706 2406336 827531 992508 569605 355321 150259 1334200 271010 +HMDB13189 2644620 727587 1661412 619181 136278 2755434 593863 837865 3526136 2003278 1608814 3446611 1941527 113937 3132404 2893445 2092753 1034666 1517319 841661 +HMDB00299 250551 1046138 456162 159386 1013302 808657 614370 250403 768004 242085 504108 1014041 1362408 1057660 1110050 566050 411886 142233 1992420 284775 +HMDB00191 560002 771533 575790 392284 888498 785428 645785 591569 960658 910201 639437 1092885 1409045 2292023 1246459 1945577 710519 773384 1061418 622898 +HMDB00518 34236 58249 85944 NA 342102 129886 175800 13154 230242 NA 440223 315368 10657 419508 48673 28361 514579 23108 867108 73831 +HMDB00715 1252089 2547452 905408 371059 4983588 5140022 2658555 814523 2558923 859466 4184204 3865723 3236644 2615560 3820724 3577833 2295288 625924 7517724 1341900 +HMDB01032 2569205 26023086 1604999 430453 8103558 26222916 257139 675754 59906109 263055 31151730 18648127 14989438 1554658 20249262 5588731 871010 15920 9120781 44276 +HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 3359238 2540044 +HMDB04824 374028 1144386 539206 178517 1046190 959381 605191 310260 1253319 477259 477995 825691 1157093 1089284 1411802 1020206 782673 346761 1824553 387811 +HMDB00512 53304 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 438839 85554 +HMDB00251 368600 616555 94936 622468 180988 293988 352855 767894 268331 167246 310918 1248919 577184 10985 335711 403815 80614 63393 454489 616061 diff -r 000000000000 -r 924ff5a24122 test-data/dataMatrix.tsv_anova_filtered.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dataMatrix.tsv_anova_filtered.tabular Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,3 @@ + HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208 +HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 3359238 2540044 +HMDB00512 53304 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 438839 85554 diff -r 000000000000 -r 924ff5a24122 test-data/dataMatrix.tsv_anova_pvalue.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dataMatrix.tsv_anova_pvalue.tabular Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,16 @@ + HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208 pvalueadjusted.BH.age pvalueadjusted.BH.gender +HMDB03193 76043 412165 44943 27242 436566 173175 242549 57066 559869 3732 339188 471368 262271 127285 451270 212500 79673 NA 891129 43907 0.740856975457218 0.740856975457218 +HMDB01101 30689 6877586 52217 3158 10789748 229568 4763576 3878773 976436 831937 608298 1605075 72021 442510 1107705 1464339 31250 2724553 72900 32742 0.347623051099051 0.234554902330543 +HMDB10348 47259 544877 60885 34582 529874 168264 176500 76457 610110 16262 279156 524468 451573 591487 433529 161069 214392 13781 1580343 39315 0.720303175717481 0.720303175717481 +HMDB59717 357351 1030464 301983 67604 306862 1028110 1530493 270027 1378535 289677 808334 1132813 871209 895435 715190 1563158 784738 146195 994336 239030 0.252979062487671 0.517822514350462 +HMDB00822 483755 579287 1132413 157113 1577570 1469735 1085454 477909 814755 245417 610681 763706 2406336 827531 992508 569605 355321 150259 1334200 271010 0.808532959173048 0.808532959173048 +HMDB13189 2644620 727587 1661412 619181 136278 2755434 593863 837865 3526136 2003278 1608814 3446611 1941527 113937 3132404 2893445 2092753 1034666 1517319 841661 0.329174238111018 0.329174238111018 +HMDB00299 250551 1046138 456162 159386 1013302 808657 614370 250403 768004 242085 504108 1014041 1362408 1057660 1110050 566050 411886 142233 1992420 284775 0.995859884733937 0.995859884733937 +HMDB00191 560002 771533 575790 392284 888498 785428 645785 591569 960658 910201 639437 1092885 1409045 2292023 1246459 1945577 710519 773384 1061418 622898 0.15341384456659 0.15341384456659 +HMDB00518 34236 58249 85944 NA 342102 129886 175800 13154 230242 NA 440223 315368 10657 419508 48673 28361 514579 23108 867108 73831 0.439012867631325 0.596200901535843 +HMDB00715 1252089 2547452 905408 371059 4983588 5140022 2658555 814523 2558923 859466 4184204 3865723 3236644 2615560 3820724 3577833 2295288 625924 7517724 1341900 0.814406453193777 0.814406453193777 +HMDB01032 2569205 26023086 1604999 430453 8103558 26222916 257139 675754 59906109 263055 31151730 18648127 14989438 1554658 20249262 5588731 871010 15920 9120781 44276 0.288354536353544 0.288354536353544 +HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 3359238 2540044 0.659785284053633 0.00376251274734483 +HMDB04824 374028 1144386 539206 178517 1046190 959381 605191 310260 1253319 477259 477995 825691 1157093 1089284 1411802 1020206 782673 346761 1824553 387811 0.646714644805001 0.646714644805001 +HMDB00512 53304 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 438839 85554 0.13976111393526 0.0491480895853803 +HMDB00251 368600 616555 94936 622468 180988 293988 352855 767894 268331 167246 310918 1248919 577184 10985 335711 403815 80614 63393 454489 616061 0.565910223149305 0.565910223149305 diff -r 000000000000 -r 924ff5a24122 test-data/sampleMetadata.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampleMetadata.tsv Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,21 @@ +sampleMetadata injectionOrder mode age bmi gender +HU_017 2 pos 40-50 23.03 M +HU_028 7 pos 40-50 23.92 F +HU_034 9 pos 50-60 23.37 M +HU_051 20 pos 20-30 23.23 F +HU_060 24 pos 50-60 28.72 F +HU_078 34 pos 40-50 25.18 M +HU_091 42 pos 60-70 26.12 M +HU_093 43 pos 50-60 21.71 M +HU_099 46 pos 20-30 21.3 M +HU_110 53 pos 50-60 20.9 F +HU_130 63 pos 30-40 26.06 M +HU_134 67 pos 40-50 22.89 M +HU_138 68 pos 40-50 21.88 M +HU_149 72 pos 30-40 19.49 F +HU_152 75 pos 20-30 17.58 F +HU_175 87 pos 30-40 21.26 F +HU_178 88 pos 60-70 32.87 F +HU_185 95 pos 40-50 21.09 F +HU_204 104 pos 30-40 29.06 M +HU_208 106 pos 20-30 18.61 F diff -r 000000000000 -r 924ff5a24122 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,6 @@ + + + + + +