Mercurial > repos > eschen42 > mqppep_anova
annotate mqppep_anova_script.Rmd @ 0:c1403d18c189 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
author | eschen42 |
---|---|
date | Mon, 07 Mar 2022 19:05:01 +0000 |
parents | |
children | d728198f1ba5 |
rev | line source |
---|---|
0
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
1 --- |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
2 title: "Quant Data Processing Script" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
3 author: "Larry Cheng; Art Eschenlauer" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
4 date: "May 28, 2018; Nov 16, 2021" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
5 output: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
6 html_document: default |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
7 pdf_document: default |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
8 params: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
9 inputFile: "Upstream_Map_pST_outputfile_STEP4.txt" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
10 alphaFile: "alpha_levels.txt" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
11 firstDataColumn: "Intensity" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
12 imputationMethod: !r c("group-median","median","mean","random")[4] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
13 meanPercentile: 1 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
14 sdPercentile: 0.2 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
15 regexSampleNames: "\\.(\\d+)[A-Z]$" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
16 regexSampleGrouping: "(\\d+)" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
17 imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
18 --- |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
19 ```{r setup, include=FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
20 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
21 knitr::opts_chunk$set(echo = FALSE, fig.dim=c(9,10)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
22 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
23 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
24 ## Purpose: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
25 Perform imputation of missing values, quantile normalization, and ANOVA. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
26 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
27 <!-- |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
28 ## Variables to change for each input file |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
29 --> |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
30 ```{r include = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
31 #Input Filename |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
32 inputFile <- params$inputFile |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
33 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
34 #First data column - ideally, this could be detected via regexSampleNames, but for now leave it as is. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
35 firstDataColumn <- params$firstDataColumn |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
36 FDC_is_integer <- TRUE |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
37 firstDataColumn <- withCallingHandlers( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
38 as.integer(firstDataColumn) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
39 , warning = function(w) FDC_is_integer <<- FALSE |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
40 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
41 if (FALSE == FDC_is_integer) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
42 firstDataColumn <- params$firstDataColumn |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
43 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
44 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
45 #False discovery rate adjustment for ANOVA (Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
46 valFDR <- read.table(file = params$alphaFile, sep = "\t", header=F, quote="")[,1] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
47 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
48 #Imputed Data filename |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
49 imputedDataFilename <- params$imputedDataFilename |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
50 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
51 #ANOVA data filename |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
52 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
53 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
54 ```{r include = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
55 #Imputation method, should be one of c("random","group-median","median","mean") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
56 imputationMethod <- params$imputationMethod |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
57 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
58 #Selection of percentile of logvalue data to set the mean for random number generation when using random imputation |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
59 meanPercentile <- params$meanPercentile / 100.0 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
60 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
61 #deviation adjustment-factor for random values; real number. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
62 sdPercentile <- params$sdPercentile |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
63 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
64 #Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
65 regexSampleNames <- params$regexSampleNames |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
66 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
67 #Regular expression to extract Sample Grouping from Sample Name (if error occurs, compare sampleNumbers and tempMatches to see if groupings/pairs line up) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
68 # e.g., "(\\d+)" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
69 regexSampleGrouping <- params$regexSampleGrouping |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
70 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
71 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
72 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
73 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
74 ```{r include = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
75 ### FUNCTIONS |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
76 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
77 #ANOVA filter function |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
78 anovaFunc <- function(x, groupingFactor) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
79 x.aov = aov(as.numeric(x) ~ groupingFactor) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
80 pvalue = summary(x.aov)[[1]][["Pr(>F)"]][1] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
81 pvalue |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
82 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
83 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
84 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
85 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
86 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
87 ### Checking that log-transformed sample distributions are similar: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
88 ```{r echo=FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
89 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
90 library(data.table) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
91 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
92 # read.table reads a file in table format and creates a data frame from it. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
93 # - note that `quote=""` means that quotation marks are treated literally. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
94 fullData <- read.table(file = inputFile, sep = "\t", header=T, quote="", check.names=FALSE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
95 print(colnames(fullData)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
96 #head(fullData) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
97 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
98 if (FALSE == FDC_is_integer) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
99 dataColumnIndices <- grep(firstDataColumn, names(fullData), perl=TRUE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
100 str(dataColumnIndices) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
101 if (length(dataColumnIndices) > 0) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
102 firstDataColumn <- dataColumnIndices[1] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
103 } else { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
104 stop(paste("failed to convert firstDataColumn:", firstDataColumn)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
105 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
106 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
107 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
108 quantData0 <- fullData[firstDataColumn:length(fullData)] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
109 quantData <- fullData[firstDataColumn:length(fullData)] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
110 quantData[quantData==0] <- NA #replace 0 with NA |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
111 quantDataLog <- log10(quantData) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
112 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
113 rownames(quantDataLog) <- fullData$Phosphopeptide |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
114 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
115 summary(quantDataLog) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
116 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
117 #data visualization |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
118 old_par <- par( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
119 mai=par("mai") + c(0.5,0,0,0) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
120 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
121 boxplot( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
122 quantDataLog |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
123 , las=2 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
124 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
125 par(old_par) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
126 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
127 quantDataLog_stack <- stack(quantDataLog) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
128 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
129 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
130 ```{r echo = FALSE, fig.align="left", fig.dim=c(9,5)} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
131 library(ggplot2) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
132 ggplot(quantDataLog_stack, aes(x=values)) + geom_density(aes(group=ind, colour=ind)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
133 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
134 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
135 ### Globally, are phosphopeptide intensities are approximately unimodal? |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
136 ```{r echo = FALSE,fig.align="left", fig.dim=c(9,5)} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
137 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
138 # ref for bquote particularly and plotting math expressions generally: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
139 # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
140 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
141 #identify the location of missing values |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
142 fin <- is.finite(as.numeric(as.matrix(quantDataLog))) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
143 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
144 logvalues <- as.numeric(as.matrix(quantDataLog))[fin] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
145 plot( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
146 density(logvalues) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
147 , main = bquote("Smoothed estimated probability density vs." ~ log[10](intensity)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
148 , xlab = bquote(log[10](intensity)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
149 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
150 hist( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
151 x = as.numeric(as.matrix(quantDataLog)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
152 , breaks = 100 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
153 , main = bquote("Frequency vs." ~ log[10](intensity)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
154 , xlab = bquote(log[10](intensity)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
155 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
156 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
157 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
158 <!-- |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
159 ## Impute missing values |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
160 --> |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
161 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
162 ### Distribution of standard deviations of phosphopeptides, ignoring missing values: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
163 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
164 ```{r echo = FALSE, fig.align="left", fig.dim=c(9,5)} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
165 #determine quantile |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
166 q1 <- quantile(logvalues, probs = meanPercentile)[1] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
167 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
168 #determine standard deviation of quantile to impute |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
169 sd_finite <- function(x) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
170 ok <- is.finite(x) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
171 sd(x[ok]) * sdPercentile |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
172 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
173 sds <- apply(quantDataLog, 1, sd_finite) # 1 = row of matrix (ie, phosphopeptide) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
174 plot( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
175 density(sds, na.rm=T) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
176 , main="Smoothed estimated probability density vs. std. deviation" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
177 , sub="(probability estimation made with Gaussian smoothing)" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
178 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
179 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
180 m1 <- median(sds, na.rm=T) #sd to be used is the median sd |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
181 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
182 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
183 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
184 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
185 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
186 <!-- |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
187 The number of missing values are: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
188 --> |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
189 ```{r echo=FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
190 #Determine number of cells to impute |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
191 temp <- quantData[is.na(quantData)] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
192 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
193 #Determine number of values to impute |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
194 NoToImpute <- length(temp) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
195 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
196 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
197 <!-- |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
198 % of values that are missing: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
199 --> |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
200 ```{r echo=FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
201 pct_missing_values <- length(temp)/(length(logvalues)+length(temp)) * 100 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
202 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
203 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
204 <!-- |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
205 First few rows of data before imputation: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
206 --> |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
207 ## Impute missing values |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
208 ```{r echo = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
209 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
210 #ACE start segment: trt-median based imputation |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
211 # prep for trt-median based imputation |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
212 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
213 # Assuming that regexSampleNames <- "\\.(\\d+)[A-Z]$" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
214 # get factors -> group runs (samples) by ignoring terminal [A-Z] in sample names |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
215 # regexpr(pattern, text, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
216 m <- regexpr(regexSampleNames, names(quantData), perl=TRUE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
217 tempMatches <- regmatches(names(quantData), m) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
218 print("Extracted sample names") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
219 print(tempMatches) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
220 m2 <- regexpr(regexSampleGrouping, tempMatches, perl=TRUE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
221 sampleNumbers <- as.factor(regmatches(tempMatches, m2)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
222 print("Factor levels") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
223 print(sampleNumbers) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
224 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
225 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
226 ```{r echo = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
227 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
228 #ACE hack begin |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
229 #Determine number of cells to impute |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
230 cat( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
231 sprintf("Before imputation, there are:\n %d peptides\n %d missing values (%2.0f%s)" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
232 , sum(rep.int(TRUE, nrow(quantData))) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
233 , sum(is.na(quantData)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
234 , pct_missing_values |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
235 , "%" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
236 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
237 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
238 #ACE hack end |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
239 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
240 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
241 ```{r echo = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
242 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
243 #Impute data |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
244 quantDataImputed <- quantData |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
245 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
246 # Identify which values are missing and need to be imputed |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
247 ind <- which(is.na(quantDataImputed), arr.ind=TRUE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
248 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
249 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
250 ```{r echo = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
251 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
252 # Apply imputation |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
253 switch( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
254 imputationMethod |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
255 , "group-median"={ |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
256 cat("Imputation method: substitute missing value with median peptide-intensity for sample-group\n") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
257 #goodRows <- rep.int(TRUE, nrow(quantDataImputed)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
258 sampleLevelIntegers <- as.integer(sampleNumbers) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
259 for (i in 1:length(levels(sampleNumbers))) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
260 levelCols <- i == sampleLevelIntegers |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
261 ind <- which(is.na(quantDataImputed[,levelCols]), arr.ind=TRUE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
262 quantDataImputed[ind,levelCols] <- apply(quantDataImputed[,levelCols], 1, median, na.rm=T)[ind[,1]] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
263 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
264 goodRows <- !is.na(rowMeans(quantDataImputed)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
265 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
266 , "median"={ |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
267 cat("Imputation method: substitute missing value with median peptide-intensity across all sample classes\n") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
268 quantDataImputed[ind] <- apply(quantDataImputed, 1, median, na.rm=T)[ind[,1]] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
269 goodRows <- !is.na(rowMeans(quantDataImputed)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
270 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
271 , "mean"={ |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
272 cat("Imputation method: substitute missing value with mean peptide-intensity across all sample classes\n") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
273 quantDataImputed[ind] <- apply(quantDataImputed, 1, mean, na.rm=T)[ind[,1]] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
274 goodRows <- !is.na(rowMeans(quantDataImputed)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
275 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
276 , "random"={ |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
277 cat( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
278 sprintf( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
279 "Imputation method: substitute missing value with random intensity N ~ (%0.2f, %0.2f)\n" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
280 , q1, m1 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
281 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
282 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
283 quantDataImputed[is.na(quantDataImputed)] <- 10^rnorm(NoToImpute, mean= q1, sd = m1) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
284 goodRows <- !is.na(rowMeans(quantDataImputed)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
285 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
286 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
287 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
288 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
289 ```{r echo = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
290 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
291 #Determine number of cells to impute |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
292 temp <- quantDataImputed[is.na(quantDataImputed)] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
293 cat( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
294 sprintf( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
295 "After imputation, there are:\n %d missing values\n %d usable peptides\n %d peptides with too many missing values for further analysis" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
296 , sum(is.na(quantDataImputed[goodRows,])) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
297 , sum(goodRows) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
298 , sum(!goodRows) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
299 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
300 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
301 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
302 ```{r echo = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
303 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
304 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
305 # Zap rows where imputation was ineffective |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
306 fullData <- fullData [goodRows, ] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
307 quantData <- quantData [goodRows, ] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
308 quantDataImputed <- quantDataImputed[goodRows, ] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
309 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
310 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
311 ```{r echo = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
312 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
313 d_combined <- (density(as.numeric(as.matrix(log10(quantDataImputed))))) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
314 d_original <- density(as.numeric(as.matrix(log10(quantDataImputed[!is.na(quantData)])))) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
315 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
316 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
317 ```{r echo = FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
318 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
319 if (sum(is.na(quantData)) > 0) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
320 # There ARE missing values |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
321 d_imputed <- (density(as.numeric(as.matrix(log10(quantDataImputed[is.na(quantData)]))))) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
322 } else { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
323 # There are NO missing values |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
324 d_imputed <- d_combined |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
325 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
326 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
327 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
328 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
329 <!-- ```{r echo = FALSE, fig.cap = "Blue = Data before imputation; Red = Imputed data"} --> |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
330 ```{r echo = FALSE, fig.dim=c(9,5)} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
331 ylim <- c(0, max(d_combined$y, d_original$y, d_imputed$y)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
332 plot( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
333 d_combined |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
334 , ylim = ylim |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
335 , sub = "Blue = data before imputation; Red = imputed data" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
336 , main = "Density vs. log10(intensity) before and after imputation" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
337 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
338 lines(d_original, col="blue") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
339 lines(d_imputed, col="red") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
340 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
341 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
342 ## Perform Quantile Normalization |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
343 ```{r echo=FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
344 library(preprocessCore) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
345 # Apply quantile normalization using preprocessCore::normalize.quantiles |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
346 # --- |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
347 # tool repository: http://bioconductor.org/packages/release/bioc/html/preprocessCore.html |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
348 # except this: https://support.bioconductor.org/p/122925/#9135989 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
349 # says to install it like this: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
350 # ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
351 # BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE,lib=.libPaths()[1]) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
352 # ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
353 # conda installation (necessary because of a bug in recent openblas): |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
354 # conda install bioconductor-preprocesscore openblas=0.3.3 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
355 # ... |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
356 # --- |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
357 # normalize.quantiles {preprocessCore} -- Quantile Normalization |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
358 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
359 # Description: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
360 # Using a normalization based upon quantiles, this function normalizes a matrix of probe level intensities. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
361 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
362 # Usage: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
363 # normalize.quantiles(x,copy=TRUE, keep.names=FALSE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
364 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
365 # Arguments: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
366 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
367 # - x: A matrix of intensities where each column corresponds to a chip and each row is a probe. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
368 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
369 # - copy: Make a copy of matrix before normalizing. Usually safer to work with a copy, |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
370 # but in certain situations not making a copy of the matrix, but instead normalizing |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
371 # it in place will be more memory friendly. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
372 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
373 # - keep.names: Boolean option to preserve matrix row and column names in output. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
374 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
375 # Details: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
376 # This method is based upon the concept of a quantile-quantile plot extended to n dimensions. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
377 # No special allowances are made for outliers. If you make use of quantile normalization |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
378 # please cite Bolstad et al, Bioinformatics (2003). |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
379 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
380 # This functions will handle missing data (ie NA values), based on |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
381 # the assumption that the data is missing at random. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
382 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
383 # Note that the current implementation optimizes for better memory usage |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
384 # at the cost of some additional run-time. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
385 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
386 # Value: A normalized matrix. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
387 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
388 # Author: Ben Bolstad, bmbolstad.com |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
389 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
390 # References |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
391 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
392 # - Bolstad, B (2001) Probe Level Quantile Normalization of High Density Oligonucleotide |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
393 # Array Data. Unpublished manuscript http://bmbolstad.com/stuff/qnorm.pdf |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
394 # |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
395 # - Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003) A Comparison of |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
396 # Normalization Methods for High Density Oligonucleotide Array Data Based on Bias |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
397 # and Variance. Bioinformatics 19(2), pp 185-193. DOI 10.1093/bioinformatics/19.2.185 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
398 # http://bmbolstad.com/misc/normalize/normalize.html |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
399 # ... |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
400 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
401 if (TRUE) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
402 quantDataImputed.qn <- normalize.quantiles(as.matrix(quantDataImputed)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
403 } else { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
404 quantDataImputed.qn <- as.matrix(quantDataImputed) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
405 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
406 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
407 quantDataImputed.qn = as.data.frame(quantDataImputed.qn) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
408 names(quantDataImputed.qn) = names(quantDataImputed) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
409 quantDataImputed_QN_log <- log10(quantDataImputed.qn) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
410 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
411 rownames(quantDataImputed_QN_log) <- fullData[,1] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
412 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
413 quantDataImputed.qn.LS = t(scale(t(log10(quantDataImputed.qn)))) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
414 anyNaN <- function (x) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
415 !any(x == "NaN") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
416 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
417 sel = apply(quantDataImputed.qn.LS, 1, anyNaN) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
418 quantDataImputed.qn.LS2 <- quantDataImputed.qn.LS[which(sel),] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
419 quantDataImputed.qn.LS2 = as.data.frame(quantDataImputed.qn.LS2) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
420 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
421 #output quantile normalized data |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
422 dataTableImputed_QN_LT <- cbind(fullData[1:9], quantDataImputed_QN_log) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
423 write.table(dataTableImputed_QN_LT, file = paste(paste(strsplit(imputedDataFilename, ".txt"),"QN_LT",sep="_"),".txt",sep=""), sep = "\t", col.names=TRUE, row.names=FALSE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
424 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
425 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
426 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
427 <!-- ACE insertion begin --> |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
428 ### Checking that normalized, imputed, log-transformed sample distributions are similar: |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
429 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
430 ```{r echo=FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
431 #library(data.table) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
432 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
433 #Save unimputed quantDataLog for plotting below |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
434 unimputedQuantDataLog <- quantDataLog |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
435 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
436 #Log10 transform (after preparing for zero values, which should never happen...) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
437 quantDataImputed.qn[quantDataImputed.qn == 0] <- .000000001 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
438 quantDataLog <- log10(quantDataImputed.qn) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
439 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
440 summary(quantDataLog) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
441 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
442 #Output quantile-normalized log-transformed dataset with imputed, normalized data |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
443 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
444 dataTableImputed <- cbind(fullData[1:9], quantDataLog) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
445 write.table( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
446 dataTableImputed |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
447 , file=imputedDataFilename |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
448 , sep="\t" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
449 , col.names=TRUE |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
450 , row.names=FALSE |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
451 , quote=FALSE |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
452 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
453 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
454 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
455 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
456 #data visualization |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
457 old_par <- par( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
458 mai=par("mai") + c(0.5,0,0,0) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
459 , oma=par("oma") + c(0.5,0,0,0) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
460 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
461 boxplot( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
462 quantDataLog |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
463 , las=2 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
464 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
465 par(old_par) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
466 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
467 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
468 ```{r echo=FALSE, fig.dim=c(9,5)} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
469 quantDataLog_stack <- stack(quantDataLog) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
470 ggplot(quantDataLog_stack, aes(x=values)) + geom_density(aes(group=ind, colour=ind)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
471 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
472 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
473 ## Perform ANOVA filters |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
474 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
475 ```{r,echo=FALSE} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
476 #Make new data frame containing only Phosphopeptides to connect preANOVA to ANOVA (connect_df) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
477 connect_df <- data.frame( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
478 dataTableImputed_QN_LT$Phosphopeptide |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
479 , dataTableImputed_QN_LT[,firstDataColumn] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
480 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
481 colnames(connect_df) <- c("Phosphopeptide","Intensity") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
482 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
483 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
484 ```{r echo=FALSE, fig.dim=c(9,10)} |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
485 # Get factors -> group replicates (as indicated by terminal letter) by the preceding digits |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
486 # For example, group .1A .1B .1C into group 1; .2A .2B .2C, into group 2; etc.. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
487 m <- regexpr(regexSampleNames, names(quantDataImputed_QN_log), perl=TRUE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
488 #ACE str(m) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
489 tempMatches <- regmatches(names(quantDataImputed_QN_log), m) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
490 #ACE str(tempMatches) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
491 numSamples <- length(tempMatches) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
492 #ACE str(numSamples) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
493 m2 <- regexpr(regexSampleGrouping, tempMatches, perl=TRUE) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
494 #ACE str(m2) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
495 #ACE str(regmatches(tempMatches, m2)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
496 sampleNumbers <- as.factor(regmatches(tempMatches, m2)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
497 #ACE str(sampleNumbers) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
498 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
499 if (length(levels(sampleNumbers))<2) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
500 cat("ERROR!!!! Cannot perform ANOVA analysis because it requires two or more factor levels\n") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
501 cat("Unparsed sample names are:\n") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
502 print(names(quantDataImputed_QN_log)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
503 cat(sprintf("Parsing rule for SampleNames is '%s'\n", regexSampleNames)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
504 cat("Parsed names are:\n") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
505 print(tempMatches) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
506 cat(sprintf("Parsing rule for SampleGrouping is '%s'\n", regexSampleGrouping)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
507 cat("Sample group assignments are:\n") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
508 print(regmatches(tempMatches, m2)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
509 } else { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
510 pValueData.anovaPs <- apply(quantDataImputed_QN_log, 1, anovaFunc, groupingFactor=sampleNumbers) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
511 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
512 pValueData.anovaPs.FDR <- p.adjust(pValueData.anovaPs, method="fdr") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
513 pValueData <- data.frame( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
514 phosphopeptide = fullData[,1] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
515 , rawANOVAp = pValueData.anovaPs |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
516 , FDRadjustedANOVAp = pValueData.anovaPs.FDR |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
517 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
518 #ACE rownames(pValueData) <- fullData[,1] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
519 # output ANOVA file to constructed filename, |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
520 # e.g. "Outputfile_pST_ANOVA_STEP5.txt" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
521 # becomes "Outpufile_pST_ANOVA_STEP5_FDR0.05.txt" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
522 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
523 #Re-output quantile-normalized log-transformed dataset with imputed, normalized data to include p-values |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
524 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
525 dataTableImputed <- cbind(fullData[1:9], pValueData[,2:3], quantDataLog) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
526 write.table( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
527 dataTableImputed |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
528 , file=imputedDataFilename |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
529 , sep="\t" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
530 , col.names=TRUE |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
531 , row.names=FALSE |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
532 , quote=FALSE |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
533 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
534 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
535 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
536 pValueData <- pValueData[order(pValueData$FDRadjustedANOVAp),] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
537 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
538 cutoff <- valFDR[1] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
539 for (cutoff in valFDR){ #loop through FDR cutoffs |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
540 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
541 filtered_p <- pValueData[which(pValueData$FDRadjustedANOVAp < cutoff),, drop = FALSE] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
542 filteredData.filtered <- quantDataImputed_QN_log[rownames(filtered_p),, drop = FALSE] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
543 filteredData.filtered <- filteredData.filtered[order(filtered_p$FDRadjustedANOVAp),, drop = FALSE] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
544 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
545 # <!-- ACE insertion start --> |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
546 old_oma <- par("oma") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
547 old_par <- par( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
548 mai=(par("mai") + c(0.7,0,0,0)) * c(1,1,0.3,1) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
549 , oma=old_oma * c(1,1,0.3,1) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
550 , cex.main=0.9 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
551 , cex.axis=0.7 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
552 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
553 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
554 if (nrow(filteredData.filtered) > 0) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
555 boxplot( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
556 filteredData.filtered |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
557 , main = sprintf("Imputed, normalized intensities where adjusted p-value < %0.2f", cutoff) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
558 # no line plot , main = "" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
559 , las = 2 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
560 # , ylim = c(5.5,10) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
561 , ylab = expression(log[10](intensity)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
562 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
563 } else { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
564 cat(sprintf("No peptides were found to have cutoff adjusted p-value < %0.2f\n", cutoff)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
565 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
566 par(old_par) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
567 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
568 #Add Phosphopeptide column to ANOVA filtered table |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
569 ANOVA.filtered_merge <- merge( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
570 x = connect_df |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
571 , y = filteredData.filtered |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
572 , by.x="Intensity" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
573 , by.y=1 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
574 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
575 ANOVA.filtered_merge.order <- rownames(filtered_p) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
576 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
577 ANOVA.filtered_merge.format <- sapply( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
578 X = filtered_p$FDRadjustedANOVAp |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
579 , FUN = function(x) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
580 if (x > 0.0001) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
581 paste0("(%0.",1+ceiling(-log10(x)),"f) %s") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
582 else |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
583 paste0("(%0.4e) %s") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
584 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
585 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
586 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
587 #ANOVA.filtered_merge.format <- paste0("(%0.",1+ceiling(-log10(filtered_p$FDRadjustedANOVAp)),"f) %s") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
588 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
589 ANOVA.filtered <- data.table( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
590 ANOVA.filtered_merge$Phosphopeptide |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
591 , ANOVA.filtered_merge$Intensity |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
592 , ANOVA.filtered_merge[, 2:numSamples+1] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
593 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
594 colnames(ANOVA.filtered) <- c("Phosphopeptide", colnames(filteredData.filtered)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
595 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
596 # merge qualitative columns into the ANOVA data |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
597 output_table <- data.frame(ANOVA.filtered$Phosphopeptide) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
598 output_table <- merge( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
599 x = output_table |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
600 , y = dataTableImputed_QN_LT |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
601 , by.x = "ANOVA.filtered.Phosphopeptide" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
602 , by.y="Phosphopeptide" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
603 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
604 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
605 #Produce heatmap to visualize significance and the effect of imputation |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
606 m <- as.matrix(unimputedQuantDataLog[ANOVA.filtered_merge.order,]) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
607 if (nrow(m) > 0) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
608 rownames_m <- rownames(m) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
609 rownames(m) <- sapply( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
610 X = 1:nrow(m) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
611 , FUN = function(i) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
612 sprintf( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
613 ANOVA.filtered_merge.format[i] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
614 , filtered_p$FDRadjustedANOVAp[i] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
615 , rownames_m[i] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
616 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
617 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
618 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
619 margins <- c( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
620 max(nchar(colnames(m))) * 10 / 16 # col |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
621 , max(nchar(rownames(m))) * 5 / 16 # row |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
622 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
623 how_many_peptides <- min(50, nrow(m)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
624 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
625 op <- par("cex.main") |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
626 try( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
627 if (nrow(m) > 1) { |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
628 par(cex.main=0.6) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
629 heatmap( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
630 m[how_many_peptides:1,] |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
631 , Rowv = NA |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
632 , Colv = NA |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
633 , cexRow = 0.7 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
634 , cexCol = 0.8 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
635 , scale="row" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
636 , margins = margins |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
637 , main = "Heatmap of unimputed, unnormalized intensities" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
638 , xlab = "" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
639 # , main = bquote( |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
640 # .( how_many_peptides ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
641 # ~ " peptides with adjusted p-value <" |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
642 # ~ .(sprintf("%0.2f", cutoff)) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
643 # ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
644 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
645 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
646 ) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
647 #ACE fig_dim knitr::opts_chunk$set(fig.dim = fig_dim) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
648 par(op) |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
649 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
650 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
651 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
652 } |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
653 ``` |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
654 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
655 ## Peptide IDs, etc. |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
656 |
c1403d18c189
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
657 See output files. |