Mercurial > repos > lecorguille > anova
changeset 0:924ff5a24122 draft
planemo upload commit 0921bc80e825d98964cf7a72c57facdb593ae355-dirty
author | lecorguille |
---|---|
date | Tue, 22 Mar 2016 17:20:48 -0400 |
parents | |
children | 11aa01984562 |
files | Makefile abims_anova.r abims_anova.xml static/images/anova_filtered.png static/images/anova_pvalue.png static/images/anova_workflow.png test-data/dataMatrix.tsv test-data/dataMatrix.tsv_anova_filtered.tabular test-data/dataMatrix.tsv_anova_pvalue.tabular test-data/sampleMetadata.tsv tool_dependencies.xml |
diffstat | 11 files changed, 474 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,23 @@ +# USAGE: make [install|clean] + +# -------- VARIABLE -------- + +OBJ=anova.tgz +DEP=abims_anova.r abims_anova.xml static test-data + + +# ------------------------ + +all: $(OBJ) + +$(OBJ): $(DEP) + tar --exclude=".svn" -zchf $@ $^ + +# ------------------------ + +install: $(OBJ) + mv *.tgz ~ + +clean: + rm *.tgz +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_anova.r Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,132 @@ +#!/usr/local/public/bin/Rscript +# version="1.1" + +# date: 06-06-2012 +# update: 18-02-2014 +# **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +# abims_anova.r version 20140218 + +library(batch) + + +# function avova +anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") { + + + if (sep=="tabulation") sep="\t" + if (sep=="semicolon") sep=";" + if (sep=="comma") sep="," + + anova_formula_operator = "+" + if (interaction) anova_formula_operator = "*" + + # -- import -- + data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA") + + if (mode == "row") data=t(data) + + sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"") + rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab)) + + + # -- group -- + match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab)) + if (sum(is.na(match_data_sampleinfoTab)) > 0) { + write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr()) + write("You may need to use change the mode (column/row)", stderr()) + write("10 first sample names in the data matrix:", stderr()) + write(head(colnames(data)), stderr()) + write("10 first sample names in the sample info:", stderr()) + write(head(rownames(sampleinfoTab)), stderr()) + quit("no",status=10) + } + + + # -- anova -- + + # formula + grps=list() + anova_formula_s = "data ~ " + cat("\ncontrasts:\n") + for (i in 1:length(condition)) { + grps[[i]] = factor(sampleinfoTab[,condition[i]][match_data_sampleinfoTab]) + anova_formula_s = paste(anova_formula_s, "grps[[",i,"]]",anova_formula_operator, sep="") + cat(condition[i],"\t",levels(grps[[i]]),"\n") + # write("Current groups: ", stderr()) + # write(grp[[i]], stderr()) + } + anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1) + anova_formula = as.formula(anova_formula_s) + + + + # anova + manovaObjectList = manova(anova_formula) + manovaList = summary.aov(manovaObjectList) + + # condition renaming + manovaRownames = gsub(" ","",rownames(manovaList[[1]])) + manovaNbrPvalue = length(manovaRownames)-1 + manovaRownames = manovaRownames[-(manovaNbrPvalue+1)] + + for (i in 1:length(condition)) { + manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames) + anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s) + } + + # log + cat("\nanova_formula",anova_formula_s,"\n") + + # p-value + aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]}) + if(length(condition) == 1) aovPValue = t(aovPValue) + rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="") + + # p-value adjusted + if(length(condition) == 1) { + aovAdjPValue = t(p.adjust(aovPValue,method=method)) + } else { + aovAdjPValue = apply(aovPValue,2,p.adjust, method=method) + } + rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="") + + # selection + colSumThreshold = colSums(aovAdjPValue <= threshold) + if (selection_method == "intersection") { + datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )] + } else { + datafiltered = data[,colSumThreshold != 0] + } + + #data=rbind(data, aovPValue, aovAdjPValue) + data=rbind(data, aovAdjPValue) + + + if (mode == "row") { + data=t(data) + datafiltered=t(datafiltered) + } + + # -- output / return -- + write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA) + write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA) + + # log + cat("\nthreshold:",threshold,"\n") + cat("result:",nrow(datafiltered),"/",nrow(data),"\n") + + quit("no",status=0) +} + +# log +cat("ANOVA\n\n") +cat("Arguments\n") +args <- commandArgs(trailingOnly = TRUE) +print(args) + +listArguments = parseCommandArgs(evaluate=FALSE) +do.call(anova, listArguments) + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_anova.xml Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,257 @@ +<tool id="abims_anova" name="Anova" version="1.1.3"> + + <description>N-way anova. With ou Without interactions</description> + + <requirements> + <requirement type="package" version="3.1.2">R</requirement> + <requirement type="binary">Rscript</requirement> + </requirements> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <command interpreter="Rscript"> +abims_anova.r file "$input" sampleinfo "$sampleinfo" mode "$mode" +condition "c('$condition_1' +#for $i, $s in enumerate( $conditions ) +,'${s.condition}' +#end for +)" +interaction $interaction method $method threshold $threshold selection_method $selection_method sep "$sep" dec "$dec" outputdatapvalue $dataMatrixPValue outputdatafiltered $dataMatrixFiltered + </command> + + <inputs> + <param name="input" type="data" label="Data Matrix file" format="tabular" help="Matrix of numeric data with headers." /> + <param name="sampleinfo" type="data" label="Sample Metadata file" format="tabular" help="Tabular file with the data metadata : one sample per line and at least two columns : ids and one condition" /> + + <param name="mode" type="select" help="Perform the anova tests on column/row" format="text" optional="true"> + <label>Mode</label> + <option value="row">row</option> + <option value="column">column</option> + </param> + + <param name="condition_1" type="text" label="Condition" value="" help="The column name of the condition. ex: hour or treatment" optional="false" /> + <repeat name="conditions" title="Conditions for N-ways anova"> + <param name="condition" type="text" label="Condition" value="" help="The column name of the condition. ex: hour or treatment" /> + </repeat> + + <param name="interaction" type="boolean" label="Enable interaction response p-values" truevalue="T" falsevalue="F" help="Used if more than 1 conditon. The anova will produse p-value according to the interaction between your condition (ex: condition1:conditions2, condition1:conditions3, condition2:conditions3 and condition1:condition2:conditions3)" /> + + <param name="method" type="select" help="Method used to apply a correction on the pvalue because of the number of test" format="text" optional="true"> + <label>PValue adjusted method</label> + <option value="BH">BH</option> + <option value="holm">holm</option> + <option value="bonferroni">bonferroni</option> + <option value="hochberg">hochberg</option> + <option value="hommel">hommel</option> + <option value="BY">BY</option> + <option value="fdr">fdr</option> + <option value="none">none</option> + </param> + + <param name="threshold" type="float" label="Threshold" value="0.01" help="max adjusted p.value accepted" /> + + <param name="selection_method" type="select" format="text" help="Intersection: all condition p-value must be under the threshold. Union: at least condition p-value must be under the threshold. "> + <label>Selection method</label> + <option value="intersection" selected="true">intersection / strong</option> + <option value="union">union / weak</option> + </param> + + <param name="sep" type="select" format="text"> + <label>Separator of columns</label> + <option value="tabulation">tabulation</option> + <option value="semicolon">;</option> + <option value="comma">,</option> + </param> + + <param name="dec" type="text" label="Decimal separator" value="." help="" /> + + </inputs> + + <outputs> + <data name="dataMatrixPValue" format_source="input" label="${input.name}_anova_pvalue.${input.ext}"/> + <data name="dataMatrixFiltered" format_source="input" label="${input.name}_anova_filtered.${input.ext}"/> + </outputs> + + <tests> + <test> + <param name="input" value="dataMatrix.tsv"/> + <param name="sampleinfo" value="sampleMetadata.tsv"/> + <param name="mode" value="row"/> + <param name="condition_1" value="age"/> + <param name="conditions_0|condition" value="gender"/> + <param name="interaction" value="F"/> + <param name="method" value="BH"/> + <param name="threshold" value="0.05"/> + <param name="selection_method" value="union"/> + <param name="sep" value="tabulation"/> + <param name="dev" value="."/> + <output name="dataMatrixPValue" file="dataMatrix.tsv_anova_pvalue.tabular" /> + <output name="dataMatrixFiltered" file="dataMatrix.tsv_anova_filtered.tabular" /> + </test> + </tests> + + <help> + +.. class:: infomark + +**Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +--------------------------------------------------- + +===== +Anova +===== + +----------- +Description +----------- + +Analysis of variance (ANOVA) is used to analyze the differences between group means and their associated procedures, +in which the observed variance in a particular variable is partitioned into components attributable to different sources of variation. + + + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : Data Matrix file | Tabular | ++---------------------------+------------+ +| 2 : Sample Metadata file | Tabular | ++---------------------------+------------+ + + + +------------ +Output files +------------ + + + +***.anova_pvalue.tabular** + + | A tabular file which represents for each metabolite (row), the value of the intensity in each sample (column) + two columns (aovPValue and aovAdjPValue). + +***.anova_filtered.tabular** + + | The tabular file xset.anova_pvalue.tabular containing only the metabolites that have been filtered by aovAdjPValue. + + +------ + +.. class:: infomark + +The outputs ***.anova_filtered.tabular** or ***.anova_pvalue.tabular** are tabular files. You can continue your analysis using it in the following tools: + | PCA + | Hierarchical Clustering + + + +--------------------------------------------------- + +--------------- +Working example +--------------- + + +Input files +----------- + +**>A part of an example of Data Matrix file input** + + ++--------+------------------+----------------+ +| Name | Bur-eH_FSP_12 | Bur-eH_FSP_24 | ++========+==================+================+ +|M202T601| 91206595.7559783 |106808979.08546 | ++--------+------------------+----------------+ +|M234T851| 27249137.275504 |28824971.3177926| ++--------+------------------+----------------+ + +**>A part of an example of Sample Metadata file input** + + ++---------------------------+------------+------------+------------+ +| Sample name | class | time | batch | ++===========================+============+============+============+ +| Bur-eH_FSP_12 | Bur-eH | 12 | 1 | ++---------------------------+------------+------------+------------+ +| Bur-eH_FSP_24 | Bur-eH | 24 | 1 | ++---------------------------+------------+------------+------------+ +| Bur-NI_FSP_12 | Bur-NI | 12 | 2 | ++---------------------------+------------+------------+------------+ +| Bur-NI_FSP_24 | Bur-NI | 24 | 2 | ++---------------------------+------------+------------+------------+ + +Parameters +---------- + + | Mode -> **row** + | column name of condition -> **class** + | Separator of columns: -> **tabulation** + | Decimal separator -> **.** + | PValue adjusted method -> **BH** + | Threshold -> **0.001** + + + +Output files +------------ + +**Part of an example of xset.anova_filtered.tabular:** + +.. image:: anova_pvalue.png + +**Part of an example of xset.anova_pvalue.tabular:** + +.. image:: anova_filtered.png + + +--------------------------------------------------- + + +--------------------------- +Position in workflow4metabo +--------------------------- + + +**Upstream tools** + ++---------------------------+----------------------------------------+--------+------------------------+ +| Name | Output file | Format | parameter | ++===========================+========================================+========+========================+ +|xcms.diffreport |xset.diffreport.data_matrix.tsv | Tabular| Data table file | ++---------------------------+----------------------------------------+--------+------------------------+ +|CAMERA.annotateDiffreport |xset.annotatediffreport.data_matrix.tsv | Tabular| Data table file | ++---------------------------+----------------------------------------+--------+------------------------+ +|xcms.xcmsSet |sample_info.tab | Tabular| sample info table file | ++---------------------------+----------------------------------------+--------+------------------------+ + + +**Downstream tools** + ++---------------------------+---------------------------------------------------------+--------+ +| Name | Output file | Format | ++===========================+=========================================================+========+ +|PCA |xset.anova_pvalue.tabular OR xset.anova_filtered.tabular | Tabular| ++---------------------------+---------------------------------------------------------+--------+ +|Hierarchical Clustering |xset.anova_pvalue.tabular OR xset.anova_filtered.tabular | Tabular| ++---------------------------+---------------------------------------------------------+--------+ + + +**General schema of the metabolomic workflow** + +.. image:: anova_workflow.png + + </help> + + <citations> + <citation>Ronald A. Fisher in the 1930s</citation> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dataMatrix.tsv Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,16 @@ +dataMatrix HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208 +HMDB03193 76043 412165 44943 27242 436566 173175 242549 57066 559869 3732 339188 471368 262271 127285 451270 212500 79673 NA 891129 43907 +HMDB01101 30689 6877586 52217 3158 10789748 229568 4763576 3878773 976436 831937 608298 1605075 72021 442510 1107705 1464339 31250 2724553 72900 32742 +HMDB10348 47259 544877 60885 34582 529874 168264 176500 76457 610110 16262 279156 524468 451573 591487 433529 161069 214392 13781 1580343 39315 +HMDB59717 357351 1030464 301983 67604 306862 1028110 1530493 270027 1378535 289677 808334 1132813 871209 895435 715190 1563158 784738 146195 994336 239030 +HMDB00822 483755 579287 1132413 157113 1577570 1469735 1085454 477909 814755 245417 610681 763706 2406336 827531 992508 569605 355321 150259 1334200 271010 +HMDB13189 2644620 727587 1661412 619181 136278 2755434 593863 837865 3526136 2003278 1608814 3446611 1941527 113937 3132404 2893445 2092753 1034666 1517319 841661 +HMDB00299 250551 1046138 456162 159386 1013302 808657 614370 250403 768004 242085 504108 1014041 1362408 1057660 1110050 566050 411886 142233 1992420 284775 +HMDB00191 560002 771533 575790 392284 888498 785428 645785 591569 960658 910201 639437 1092885 1409045 2292023 1246459 1945577 710519 773384 1061418 622898 +HMDB00518 34236 58249 85944 NA 342102 129886 175800 13154 230242 NA 440223 315368 10657 419508 48673 28361 514579 23108 867108 73831 +HMDB00715 1252089 2547452 905408 371059 4983588 5140022 2658555 814523 2558923 859466 4184204 3865723 3236644 2615560 3820724 3577833 2295288 625924 7517724 1341900 +HMDB01032 2569205 26023086 1604999 430453 8103558 26222916 257139 675754 59906109 263055 31151730 18648127 14989438 1554658 20249262 5588731 871010 15920 9120781 44276 +HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 3359238 2540044 +HMDB04824 374028 1144386 539206 178517 1046190 959381 605191 310260 1253319 477259 477995 825691 1157093 1089284 1411802 1020206 782673 346761 1824553 387811 +HMDB00512 53304 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 438839 85554 +HMDB00251 368600 616555 94936 622468 180988 293988 352855 767894 268331 167246 310918 1248919 577184 10985 335711 403815 80614 63393 454489 616061
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dataMatrix.tsv_anova_filtered.tabular Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,3 @@ + HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208 +HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 3359238 2540044 +HMDB00512 53304 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 438839 85554
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dataMatrix.tsv_anova_pvalue.tabular Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,16 @@ + HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208 pvalueadjusted.BH.age pvalueadjusted.BH.gender +HMDB03193 76043 412165 44943 27242 436566 173175 242549 57066 559869 3732 339188 471368 262271 127285 451270 212500 79673 NA 891129 43907 0.740856975457218 0.740856975457218 +HMDB01101 30689 6877586 52217 3158 10789748 229568 4763576 3878773 976436 831937 608298 1605075 72021 442510 1107705 1464339 31250 2724553 72900 32742 0.347623051099051 0.234554902330543 +HMDB10348 47259 544877 60885 34582 529874 168264 176500 76457 610110 16262 279156 524468 451573 591487 433529 161069 214392 13781 1580343 39315 0.720303175717481 0.720303175717481 +HMDB59717 357351 1030464 301983 67604 306862 1028110 1530493 270027 1378535 289677 808334 1132813 871209 895435 715190 1563158 784738 146195 994336 239030 0.252979062487671 0.517822514350462 +HMDB00822 483755 579287 1132413 157113 1577570 1469735 1085454 477909 814755 245417 610681 763706 2406336 827531 992508 569605 355321 150259 1334200 271010 0.808532959173048 0.808532959173048 +HMDB13189 2644620 727587 1661412 619181 136278 2755434 593863 837865 3526136 2003278 1608814 3446611 1941527 113937 3132404 2893445 2092753 1034666 1517319 841661 0.329174238111018 0.329174238111018 +HMDB00299 250551 1046138 456162 159386 1013302 808657 614370 250403 768004 242085 504108 1014041 1362408 1057660 1110050 566050 411886 142233 1992420 284775 0.995859884733937 0.995859884733937 +HMDB00191 560002 771533 575790 392284 888498 785428 645785 591569 960658 910201 639437 1092885 1409045 2292023 1246459 1945577 710519 773384 1061418 622898 0.15341384456659 0.15341384456659 +HMDB00518 34236 58249 85944 NA 342102 129886 175800 13154 230242 NA 440223 315368 10657 419508 48673 28361 514579 23108 867108 73831 0.439012867631325 0.596200901535843 +HMDB00715 1252089 2547452 905408 371059 4983588 5140022 2658555 814523 2558923 859466 4184204 3865723 3236644 2615560 3820724 3577833 2295288 625924 7517724 1341900 0.814406453193777 0.814406453193777 +HMDB01032 2569205 26023086 1604999 430453 8103558 26222916 257139 675754 59906109 263055 31151730 18648127 14989438 1554658 20249262 5588731 871010 15920 9120781 44276 0.288354536353544 0.288354536353544 +HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 3359238 2540044 0.659785284053633 0.00376251274734483 +HMDB04824 374028 1144386 539206 178517 1046190 959381 605191 310260 1253319 477259 477995 825691 1157093 1089284 1411802 1020206 782673 346761 1824553 387811 0.646714644805001 0.646714644805001 +HMDB00512 53304 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 438839 85554 0.13976111393526 0.0491480895853803 +HMDB00251 368600 616555 94936 622468 180988 293988 352855 767894 268331 167246 310918 1248919 577184 10985 335711 403815 80614 63393 454489 616061 0.565910223149305 0.565910223149305
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampleMetadata.tsv Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,21 @@ +sampleMetadata injectionOrder mode age bmi gender +HU_017 2 pos 40-50 23.03 M +HU_028 7 pos 40-50 23.92 F +HU_034 9 pos 50-60 23.37 M +HU_051 20 pos 20-30 23.23 F +HU_060 24 pos 50-60 28.72 F +HU_078 34 pos 40-50 25.18 M +HU_091 42 pos 60-70 26.12 M +HU_093 43 pos 50-60 21.71 M +HU_099 46 pos 20-30 21.3 M +HU_110 53 pos 50-60 20.9 F +HU_130 63 pos 30-40 26.06 M +HU_134 67 pos 40-50 22.89 M +HU_138 68 pos 40-50 21.88 M +HU_149 72 pos 30-40 19.49 F +HU_152 75 pos 20-30 17.58 F +HU_175 87 pos 30-40 21.26 F +HU_178 88 pos 60-70 32.87 F +HU_185 95 pos 40-50 21.09 F +HU_204 104 pos 30-40 29.06 M +HU_208 106 pos 20-30 18.61 F
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Mar 22 17:20:48 2016 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="R" version="3.1.2"> + <repository changeset_revision="1ca39eb16186" name="package_r_3_1_2" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>