annotate Baseline_Functions.r @ 0:d685e7ba0ed4 draft

Uploaded
author davidvanzessen
date Tue, 15 Jul 2014 08:43:49 -0400
parents
children 94fada165724
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1 #########################################################################################
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2 # License Agreement
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
3 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
4 # THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
5 # ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
6 # APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
7 # OR COPYRIGHT LAW IS PROHIBITED.
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
8 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
9 # BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
10 # BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
11 # TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
12 # CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
13 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
14 # BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
15 # Coded by: Mohamed Uduman & Gur Yaari
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
16 # Copyright 2012 Kleinstein Lab
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
17 # Version: 1.3 (01/23/2014)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
18 #########################################################################################
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
19
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
20 # Global variables
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
21
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
22 FILTER_BY_MUTATIONS = 1000
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
23
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
24 # Nucleotides
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
25 NUCLEOTIDES = c("A","C","G","T")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
26
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
27 # Amino Acids
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
28 AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
29 names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
30 names(AMINO_ACIDS) <- names(AMINO_ACIDS)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
31
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
32 #Amino Acid Traits
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
33 #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
34 #B = "Hydrophobic/Burried" N = "Intermediate/Neutral" S="Hydrophilic/Surface")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
35 TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
36 names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
37 TRAITS_AMINO_ACIDS <- array(NA,21)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
38
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
39 # Codon Table
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
40 CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
41
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
42 # Substitution Model: Smith DS et al. 1996
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
43 substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
44 substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
45 substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
46 load("FiveS_Substitution.RData")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
47
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
48 # Mutability Models: Shapiro GS et al. 2002
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
49 triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
50 triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
51 triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
52 load("FiveS_Mutability.RData")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
53
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
54 # Functions
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
55
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
56 # Translate codon to amino acid
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
57 translateCodonToAminoAcid<-function(Codon){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
58 return(AMINO_ACIDS[Codon])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
59 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
60
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
61 # Translate amino acid to trait change
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
62 translateAminoAcidToTraitChange<-function(AminoAcid){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
63 return(TRAITS_AMINO_ACIDS[AminoAcid])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
64 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
65
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
66 # Initialize Amino Acid Trait Changes
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
67 initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
68 if(!is.null(traitChangeFileName)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
69 tryCatch(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
70 traitChange <- read.delim(traitChangeFileName,sep="\t",header=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
71 , error = function(ex){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
72 cat("Error|Error reading trait changes. Please check file name/path and format.\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
73 q()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
74 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
75 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
76 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
77 traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
78 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
79 TRAITS_AMINO_ACIDS <<- traitChange
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
80 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
81
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
82 # Read in formatted nucleotide substitution matrix
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
83 initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
84 if(!is.null(subsMatFileName)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
85 tryCatch(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
86 subsMat <- read.delim(subsMatFileName,sep="\t",header=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
87 , error = function(ex){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
88 cat("Error|Error reading substitution matrix. Please check file name/path and format.\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
89 q()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
90 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
91 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
92 if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
93 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
94 if(substitutionModel==1)subsMat <- substitution_Literature_Mouse
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
95 if(substitutionModel==2)subsMat <- substitution_Flu_Human
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
96 if(substitutionModel==3)subsMat <- substitution_Flu25_Human
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
97
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
98 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
99
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
100 if(substitutionModel==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
101 subsMat <- matrix(1,4,4)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
102 subsMat[,] = 1/3
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
103 subsMat[1,1] = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
104 subsMat[2,2] = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
105 subsMat[3,3] = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
106 subsMat[4,4] = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
107 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
108
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
109
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
110 NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
111 if(substitutionModel==5){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
112 subsMat <- FiveS_Substitution
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
113 return(subsMat)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
114 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
115 subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
116 return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
117 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
118 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
119
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
120
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
121 # Read in formatted Mutability file
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
122 initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
123 if(!is.null(mutabilityMatFileName)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
124 tryCatch(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
125 mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
126 , error = function(ex){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
127 cat("Error|Error reading mutability matrix. Please check file name/path and format.\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
128 q()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
129 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
130 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
131 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
132 mutabilityMat <- triMutability_Literature_Human
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
133 if(species==2) mutabilityMat <- triMutability_Literature_Mouse
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
134 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
135
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
136 if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)}
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
137
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
138 if(mutabilityModel==5){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
139 mutabilityMat <- FiveS_Mutability
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
140 return(mutabilityMat)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
141 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
142 return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
143 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
144 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
145
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
146 # Read FASTA file formats
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
147 # Modified from read.fasta from the seqinR package
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
148 baseline.read.fasta <-
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
149 function (file = system.file("sequences/sample.fasta", package = "seqinr"),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
150 seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
151 set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
152 strip.desc = FALSE, sizeof.longlong = .Machine$sizeof.longlong,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
153 endian = .Platform$endian, apply.mask = TRUE)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
154 {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
155 seqtype <- match.arg(seqtype)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
156
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
157 lines <- readLines(file)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
158
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
159 if (legacy.mode) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
160 comments <- grep("^;", lines)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
161 if (length(comments) > 0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
162 lines <- lines[-comments]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
163 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
164
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
165
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
166 ind_groups<-which(substr(lines, 1L, 3L) == ">>>")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
167 lines_mod<-lines
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
168
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
169 if(!length(ind_groups)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
170 lines_mod<-c(">>>All sequences combined",lines)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
171 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
172
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
173 ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
174
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
175 lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
176 id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
177 lines[id] <- "THIS IS A FAKE SEQUENCE"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
178 lines[-id] <- lines_mod
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
179 rm(lines_mod)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
180
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
181 ind <- which(substr(lines, 1L, 1L) == ">")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
182 nseq <- length(ind)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
183 if (nseq == 0) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
184 stop("no line starting with a > character found")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
185 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
186 start <- ind + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
187 end <- ind - 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
188
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
189 while( any(which(ind%in%end)) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
190 ind=ind[-which(ind%in%end)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
191 nseq <- length(ind)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
192 if (nseq == 0) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
193 stop("no line starting with a > character found")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
194 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
195 start <- ind + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
196 end <- ind - 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
197 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
198
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
199 end <- c(end[-1], length(lines))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
200 sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = ""))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
201 if (seqonly)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
202 return(sequences)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
203 nomseq <- lapply(seq_len(nseq), function(i) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
204
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
205 #firstword <- strsplit(lines[ind[i]], " ")[[1]][1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
206 substr(lines[ind[i]], 2, nchar(lines[ind[i]]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
207
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
208 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
209 if (seqtype == "DNA") {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
210 if (forceDNAtolower) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
211 sequences <- as.list(tolower(chartr(".","-",sequences)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
212 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
213 sequences <- as.list(toupper(chartr(".","-",sequences)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
214 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
215 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
216 if (as.string == FALSE)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
217 sequences <- lapply(sequences, s2c)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
218 if (set.attributes) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
219 for (i in seq_len(nseq)) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
220 Annot <- lines[ind[i]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
221 if (strip.desc)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
222 Annot <- substr(Annot, 2L, nchar(Annot))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
223 attributes(sequences[[i]]) <- list(name = nomseq[[i]],
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
224 Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA",
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
225 DNA = "SeqFastadna"))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
226 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
227 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
228 names(sequences) <- nomseq
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
229 return(sequences)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
230 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
231
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
232
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
233 # Replaces non FASTA characters in input files with N
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
234 replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
235 gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
236 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
237
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
238 # Find the germlines in the FASTA list
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
239 germlinesInFile <- function(seqIDs){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
240 firstChar = sapply(seqIDs,function(x){substr(x,1,1)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
241 secondChar = sapply(seqIDs,function(x){substr(x,2,2)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
242 return(firstChar==">" & secondChar!=">")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
243 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
244
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
245 # Find the groups in the FASTA list
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
246 groupsInFile <- function(seqIDs){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
247 sapply(seqIDs,function(x){substr(x,1,2)})==">>"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
248 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
249
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
250 # In the process of finding germlines/groups, expand from the start to end of the group
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
251 expandTillNext <- function(vecPosToID){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
252 IDs = names(vecPosToID)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
253 posOfInterests = which(vecPosToID)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
254
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
255 expandedID = rep(NA,length(IDs))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
256 expandedIDNames = gsub(">","",IDs[posOfInterests])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
257 startIndexes = c(1,posOfInterests[-1])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
258 stopIndexes = c(posOfInterests[-1]-1,length(IDs))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
259 expandedID = unlist(sapply(1:length(startIndexes),function(i){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
260 rep(i,stopIndexes[i]-startIndexes[i]+1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
261 }))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
262 names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
263 rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
264 }))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
265 return(expandedID)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
266 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
267
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
268 # Process FASTA (list) to return a matrix[input, germline)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
269 processInputAdvanced <- function(inputFASTA){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
270
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
271 seqIDs = names(inputFASTA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
272 numbSeqs = length(seqIDs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
273 posGermlines1 = germlinesInFile(seqIDs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
274 numbGermlines = sum(posGermlines1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
275 posGroups1 = groupsInFile(seqIDs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
276 numbGroups = sum(posGroups1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
277 consDef = NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
278
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
279 if(numbGermlines==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
280 posGermlines = 2
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
281 numbGermlines = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
282 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
283
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
284 glPositionsSum = cumsum(posGermlines1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
285 glPositions = table(glPositionsSum)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
286 #Find the position of the conservation row
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
287 consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
288 if( length(consDefPos)> 0 ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
289 consDefID = match(consDefPos, glPositionsSum)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
290 #The coservation rows need to be pulled out and stores seperately
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
291 consDef = inputFASTA[consDefID]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
292 inputFASTA = inputFASTA[-consDefID]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
293
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
294 seqIDs = names(inputFASTA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
295 numbSeqs = length(seqIDs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
296 posGermlines1 = germlinesInFile(seqIDs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
297 numbGermlines = sum(posGermlines1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
298 posGroups1 = groupsInFile(seqIDs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
299 numbGroups = sum(posGroups1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
300 if(numbGermlines==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
301 posGermlines = 2
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
302 numbGermlines = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
303 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
304 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
305
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
306 posGroups <- expandTillNext(posGroups1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
307 posGermlines <- expandTillNext(posGermlines1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
308 posGermlines[posGroups1] = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
309 names(posGermlines)[posGroups1] = names(posGroups)[posGroups1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
310 posInput = rep(TRUE,numbSeqs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
311 posInput[posGroups1 | posGermlines1] = FALSE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
312
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
313 matInput = matrix(NA, nrow=sum(posInput), ncol=2)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
314 rownames(matInput) = seqIDs[posInput]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
315 colnames(matInput) = c("Input","Germline")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
316
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
317 vecInputFASTA = unlist(inputFASTA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
318 matInput[,1] = vecInputFASTA[posInput]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
319 matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
320
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
321 germlines = posGermlines[posInput]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
322 groups = posGroups[posInput]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
323
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
324 return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef ))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
325 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
326
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
327
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
328 # Replace leading and trailing dashes in the sequence
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
329 replaceLeadingTrailingDashes <- function(x,readEnd){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
330 iiGap = unlist(gregexpr("-",x[1]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
331 ggGap = unlist(gregexpr("-",x[2]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
332 #posToChange = intersect(iiGap,ggGap)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
333
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
334
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
335 seqIn = replaceLeadingTrailingDashesHelper(x[1])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
336 seqGL = replaceLeadingTrailingDashesHelper(x[2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
337 seqTemplate = rep('N',readEnd)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
338 seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
339 seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
340 # if(posToChange!=-1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
341 # seqIn[posToChange] = "-"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
342 # seqGL[posToChange] = "-"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
343 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
344
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
345 seqIn = c2s(seqIn[1:readEnd])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
346 seqGL = c2s(seqGL[1:readEnd])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
347
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
348 lenGL = nchar(seqGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
349 if(lenGL<readEnd){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
350 seqGL = paste(seqGL,c2s(rep("N",readEnd-lenGL)),sep="")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
351 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
352
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
353 lenInput = nchar(seqIn)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
354 if(lenInput<readEnd){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
355 seqIn = paste(seqIn,c2s(rep("N",readEnd-lenInput)),sep="")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
356 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
357 return( c(seqIn,seqGL) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
358 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
359
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
360 replaceLeadingTrailingDashesHelper <- function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
361 grepResults = gregexpr("-*",x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
362 grepResultsPos = unlist(grepResults)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
363 grepResultsLen = attr(grepResults[[1]],"match.length")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
364 x = s2c(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
365 if(x[1]=="-"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
366 x[1:grepResultsLen[1]] = "N"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
367 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
368 if(x[length(x)]=="-"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
369 x[(length(x)-grepResultsLen[length(grepResultsLen)]+1):length(x)] = "N"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
370 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
371 return(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
372 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
373
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
374
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
375
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
376
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
377 # Check sequences for indels
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
378 checkForInDels <- function(matInputP){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
379 insPos <- checkInsertion(matInputP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
380 delPos <- checkDeletions(matInputP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
381 return(list("Insertions"=insPos, "Deletions"=delPos))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
382 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
383
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
384 # Check sequences for insertions
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
385 checkInsertion <- function(matInputP){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
386 insertionCheck = apply( matInputP,1, function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
387 inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
388 glGaps <- as.vector( gregexpr("-",x[2])[[1]] )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
389 return( is.finite( match(FALSE, glGaps%in%inputGaps ) ) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
390 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
391 return(as.vector(insertionCheck))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
392 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
393 # Fix inserstions
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
394 fixInsertions <- function(matInputP){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
395 insPos <- checkInsertion(matInputP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
396 sapply((1:nrow(matInputP))[insPos],function(rowIndex){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
397 x <- matInputP[rowIndex,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
398 inputGaps <- gregexpr("-",x[1])[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
399 glGaps <- gregexpr("-",x[2])[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
400 posInsertions <- glGaps[!(glGaps%in%inputGaps)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
401 inputInsertionToN <- s2c(x[2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
402 inputInsertionToN[posInsertions]!="-"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
403 inputInsertionToN[posInsertions] <- "N"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
404 inputInsertionToN <- c2s(inputInsertionToN)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
405 matInput[rowIndex,2] <<- inputInsertionToN
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
406 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
407 return(insPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
408 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
409
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
410 # Check sequences for deletions
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
411 checkDeletions <-function(matInputP){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
412 deletionCheck = apply( matInputP,1, function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
413 inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
414 glGaps <- as.vector( gregexpr("-",x[2])[[1]] )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
415 return( is.finite( match(FALSE, inputGaps%in%glGaps ) ) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
416 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
417 return(as.vector(deletionCheck))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
418 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
419 # Fix sequences with deletions
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
420 fixDeletions <- function(matInputP){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
421 delPos <- checkDeletions(matInputP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
422 sapply((1:nrow(matInputP))[delPos],function(rowIndex){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
423 x <- matInputP[rowIndex,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
424 inputGaps <- gregexpr("-",x[1])[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
425 glGaps <- gregexpr("-",x[2])[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
426 posDeletions <- inputGaps[!(inputGaps%in%glGaps)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
427 inputDeletionToN <- s2c(x[1])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
428 inputDeletionToN[posDeletions] <- "N"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
429 inputDeletionToN <- c2s(inputDeletionToN)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
430 matInput[rowIndex,1] <<- inputDeletionToN
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
431 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
432 return(delPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
433 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
434
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
435
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
436 # Trim DNA sequence to the last codon
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
437 trimToLastCodon <- function(seqToTrim){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
438 seqLen = nchar(seqToTrim)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
439 trimmedSeq = s2c(seqToTrim)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
440 poi = seqLen
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
441 tailLen = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
442
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
443 while(trimmedSeq[poi]=="-" || trimmedSeq[poi]=="."){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
444 tailLen = tailLen + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
445 poi = poi - 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
446 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
447
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
448 trimmedSeq = c2s(trimmedSeq[1:(seqLen-tailLen)])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
449 seqLen = nchar(trimmedSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
450 # Trim sequence to last codon
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
451 if( getCodonPos(seqLen)[3] > seqLen )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
452 trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
453
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
454 return(trimmedSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
455 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
456
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
457 # Given a nuclotide position, returns the pos of the 3 nucs that made the codon
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
458 # e.g. nuc 86 is part of nucs 85,86,87
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
459 getCodonPos <- function(nucPos){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
460 codonNum = (ceiling(nucPos/3))*3
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
461 return( (codonNum-2):codonNum)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
462 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
463
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
464 # Given a nuclotide position, returns the codon number
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
465 # e.g. nuc 86 = codon 29
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
466 getCodonNumb <- function(nucPos){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
467 return( ceiling(nucPos/3) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
468 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
469
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
470 # Given a codon, returns all the nuc positions that make the codon
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
471 getCodonNucs <- function(codonNumb){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
472 getCodonPos(codonNumb*3)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
473 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
474
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
475 computeCodonTable <- function(testID=1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
476
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
477 if(testID<=4){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
478 # Pre-compute every codons
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
479 intCounter = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
480 for(pOne in NUCLEOTIDES){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
481 for(pTwo in NUCLEOTIDES){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
482 for(pThree in NUCLEOTIDES){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
483 codon = paste(pOne,pTwo,pThree,sep="")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
484 colnames(CODON_TABLE)[intCounter] = codon
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
485 intCounter = intCounter + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
486 CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
487 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
488 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
489 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
490 chars = c("N","A","C","G","T", "-")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
491 for(a in chars){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
492 for(b in chars){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
493 for(c in chars){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
494 if(a=="N" | b=="N" | c=="N"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
495 #cat(paste(a,b,c),sep="","\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
496 CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
497 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
498 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
499 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
500 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
501
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
502 chars = c("-","A","C","G","T")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
503 for(a in chars){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
504 for(b in chars){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
505 for(c in chars){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
506 if(a=="-" | b=="-" | c=="-"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
507 #cat(paste(a,b,c),sep="","\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
508 CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
509 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
510 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
511 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
512 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
513 CODON_TABLE <<- as.matrix(CODON_TABLE)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
514 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
515 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
516
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
517 collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
518 #print(length(vecInputSeqs))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
519 vecInputSeqs = unique(vecInputSeqs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
520 if(length(vecInputSeqs)==1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
521 return( list( c(vecInputSeqs,glSeq), F) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
522 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
523 charInputSeqs <- sapply(vecInputSeqs, function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
524 s2c(x)[1:readEnd]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
525 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
526 charGLSeq <- s2c(glSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
527 matClone <- sapply(1:readEnd, function(i){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
528 posNucs = unique(charInputSeqs[i,])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
529 posGL = charGLSeq[i]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
530 error = FALSE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
531 if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
532 return(c("-",error))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
533 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
534 if(length(posNucs)==1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
535 return(c(posNucs[1],error))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
536 else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
537 if("N"%in%posNucs){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
538 error=TRUE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
539 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
540 if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
541 return( c(posGL,error) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
542 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
543 #return( c(sample(posNucs[posNucs!="N"],1),error) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
544 if(nonTerminalOnly==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
545 return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
546 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
547 posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
548 posNucsTable = table(posNucs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
549 if(sum(posNucsTable>1)==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
550 return( c(posGL,error) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
551 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
552 return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
553 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
554 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
555
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
556 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
557 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
558 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
559
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
560
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
561 #print(length(vecInputSeqs))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
562 return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
563 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
564 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
565
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
566 # Compute the expected for each sequence-germline pair
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
567 getExpectedIndividual <- function(matInput){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
568 if( any(grep("multicore",search())) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
569 facGL <- factor(matInput[,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
570 facLevels = levels(facGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
571 LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
572 computeMutabilities(facLevels[x])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
573 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
574 facIndex = match(facGL,facLevels)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
575
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
576 LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
577 cInput = rep(NA,nchar(matInput[x,1]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
578 cInput[s2c(matInput[x,1])!="N"] = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
579 LisGLs_MutabilityU[[facIndex[x]]] * cInput
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
580 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
581
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
582 LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
583 computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
584 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
585
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
586 LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
587 #print(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
588 computeMutationTypes(matInput[x,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
589 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
590
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
591 LisGLs_Exp = mclapply(1:dim(matInput)[1], function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
592 computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
593 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
594
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
595 ul_LisGLs_Exp = unlist(LisGLs_Exp)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
596 return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
597 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
598 facGL <- factor(matInput[,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
599 facLevels = levels(facGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
600 LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
601 computeMutabilities(facLevels[x])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
602 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
603 facIndex = match(facGL,facLevels)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
604
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
605 LisGLs_Mutability = lapply(1:nrow(matInput), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
606 cInput = rep(NA,nchar(matInput[x,1]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
607 cInput[s2c(matInput[x,1])!="N"] = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
608 LisGLs_MutabilityU[[facIndex[x]]] * cInput
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
609 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
610
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
611 LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
612 computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
613 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
614
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
615 LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
616 #print(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
617 computeMutationTypes(matInput[x,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
618 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
619
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
620 LisGLs_Exp = lapply(1:dim(matInput)[1], function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
621 computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
622 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
623
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
624 ul_LisGLs_Exp = unlist(LisGLs_Exp)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
625 return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
626
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
627 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
628 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
629
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
630 # Compute mutabilities of sequence based on the tri-nucleotide model
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
631 computeMutabilities <- function(paramSeq){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
632 seqLen = nchar(paramSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
633 seqMutabilites = rep(NA,seqLen)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
634
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
635 gaplessSeq = gsub("-", "", paramSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
636 gaplessSeqLen = nchar(gaplessSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
637 gaplessSeqMutabilites = rep(NA,gaplessSeqLen)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
638
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
639 if(mutabilityModel!=5){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
640 pos<- 3:(gaplessSeqLen)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
641 subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
642 gaplessSeqMutabilites[pos] =
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
643 tapply( c(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
644 getMutability( substr(subSeq,1,3), 3) ,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
645 getMutability( substr(subSeq,2,4), 2),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
646 getMutability( substr(subSeq,3,5), 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
647 ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
648 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
649 #Pos 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
650 subSeq = substr(gaplessSeq,1,3)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
651 gaplessSeqMutabilites[1] = getMutability(subSeq , 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
652 #Pos 2
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
653 subSeq = substr(gaplessSeq,1,4)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
654 gaplessSeqMutabilites[2] = mean( c(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
655 getMutability( substr(subSeq,1,3), 2) ,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
656 getMutability( substr(subSeq,2,4), 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
657 ),na.rm=T
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
658 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
659 seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
660 return(seqMutabilites)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
661 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
662
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
663 pos<- 3:(gaplessSeqLen)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
664 subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
665 gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
666 seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
667 return(seqMutabilites)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
668 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
669
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
670 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
671
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
672 # Returns the mutability of a triplet at a given position
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
673 getMutability <- function(codon, pos=1:3){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
674 triplets <- rownames(mutability)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
675 mutability[ match(codon,triplets) ,pos]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
676 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
677
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
678 getMutability5 <- function(fivemer){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
679 return(mutability[fivemer])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
680 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
681
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
682 # Returns the substitution probabilty
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
683 getTransistionProb <- function(nuc){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
684 substitution[nuc,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
685 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
686
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
687 getTransistionProb5 <- function(fivemer){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
688 if(any(which(fivemer==colnames(substitution)))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
689 return(substitution[,fivemer])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
690 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
691 return(array(NA,4))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
692 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
693 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
694
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
695 # Given a nuc, returns the other 3 nucs it can mutate to
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
696 canMutateTo <- function(nuc){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
697 NUCLEOTIDES[- which(NUCLEOTIDES==nuc)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
698 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
699
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
700 # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
701 canMutateToProb <- function(nuc){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
702 substitution[nuc,canMutateTo(nuc)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
703 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
704
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
705 # Compute targeting, based on precomputed mutatbility & substitution
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
706 computeTargeting <- function(param_strSeq,param_vecMutabilities){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
707
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
708 if(substitutionModel!=5){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
709 vecSeq = s2c(param_strSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
710 matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
711 #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
712 dimnames( matTargeting ) = list(NUCLEOTIDES,1:(length(vecSeq)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
713 return (matTargeting)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
714 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
715
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
716 seqLen = nchar(param_strSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
717 seqsubstitution = matrix(NA,ncol=seqLen,nrow=4)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
718 paramSeq <- param_strSeq
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
719 gaplessSeq = gsub("-", "", paramSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
720 gaplessSeqLen = nchar(gaplessSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
721 gaplessSeqSubstitution = matrix(NA,ncol=gaplessSeqLen,nrow=4)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
722
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
723 pos<- 3:(gaplessSeqLen)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
724 subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
725 gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
726 seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
727 #matTargeting <- param_vecMutabilities %*% seqsubstitution
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
728 matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
729 dimnames( matTargeting ) = list(NUCLEOTIDES,1:(seqLen))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
730 return (matTargeting)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
731 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
732 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
733
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
734 # Compute the mutations types
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
735 computeMutationTypes <- function(param_strSeq){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
736 #cat(param_strSeq,"\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
737 #vecSeq = trimToLastCodon(param_strSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
738 lenSeq = nchar(param_strSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
739 vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
740 matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
741 dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(ncol(matMutationTypes)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
742 return(matMutationTypes)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
743 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
744 computeMutationTypesFast <- function(param_strSeq){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
745 matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
746 #dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(length(vecSeq)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
747 return(matMutationTypes)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
748 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
749 mutationTypeOptimized <- function( matOfCodons ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
750 apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
751 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
752
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
753 # Returns a vector of codons 1 mutation away from the given codon
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
754 permutateAllCodon <- function(codon){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
755 cCodon = s2c(codon)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
756 matCodons = t(array(cCodon,dim=c(3,12)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
757 matCodons[1:4,1] = NUCLEOTIDES
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
758 matCodons[5:8,2] = NUCLEOTIDES
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
759 matCodons[9:12,3] = NUCLEOTIDES
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
760 apply(matCodons,1,c2s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
761 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
762
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
763 # Given two codons, tells you if the mutation is R or S (based on your definition)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
764 mutationType <- function(codonFrom,codonTo){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
765 if(testID==4){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
766 if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
767 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
768 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
769 mutationType = "S"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
770 if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
771 mutationType = "R"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
772 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
773 if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
774 mutationType = "Stop"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
775 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
776 return(mutationType)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
777 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
778 }else if(testID==5){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
779 if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
780 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
781 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
782 if(codonFrom==codonTo){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
783 mutationType = "S"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
784 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
785 codonFrom = s2c(codonFrom)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
786 codonTo = s2c(codonTo)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
787 mutationType = "Stop"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
788 nucOfI = codonFrom[which(codonTo!=codonFrom)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
789 if(nucOfI=="C"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
790 mutationType = "R"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
791 }else if(nucOfI=="G"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
792 mutationType = "S"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
793 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
794 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
795 return(mutationType)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
796 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
797 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
798 if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
799 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
800 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
801 mutationType = "S"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
802 if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
803 mutationType = "R"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
804 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
805 if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
806 mutationType = "Stop"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
807 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
808 return(mutationType)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
809 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
810 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
811 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
812
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
813
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
814 #given a mat of targeting & it's corresponding mutationtypes returns
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
815 #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
816 computeExpected <- function(paramTargeting,paramMutationTypes){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
817 # Replacements
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
818 RPos = which(paramMutationTypes=="R")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
819 #FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
820 Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
821 #CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
822 Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
823 # Silents
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
824 SPos = which(paramMutationTypes=="S")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
825 #FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
826 Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
827 #CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
828 Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
829
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
830 return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
831 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
832
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
833 # Count the mutations in a sequence
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
834 # each mutation is treated independently
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
835 analyzeMutations2NucUri_website <- function( rev_in_matrix ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
836 paramGL = rev_in_matrix[2,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
837 paramSeq = rev_in_matrix[1,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
838
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
839 #Fill seq with GL seq if gapped
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
840 #if( any(paramSeq=="-") ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
841 # gapPos_Seq = which(paramSeq=="-")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
842 # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
843 # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
844 #}
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
845
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
846
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
847 #if( any(paramSeq=="N") ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
848 # gapPos_Seq = which(paramSeq=="N")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
849 # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
850 # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
851 #}
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
852
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
853 analyzeMutations2NucUri( matrix(c( paramGL, paramSeq ),2,length(paramGL),byrow=T) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
854
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
855 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
856
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
857 #1 = GL
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
858 #2 = Seq
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
859 analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
860 paramGL = in_matrix[2,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
861 paramSeq = in_matrix[1,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
862 paramSeqUri = paramGL
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
863 #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
864 mutations_val = paramGL != paramSeq
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
865 if(any(mutations_val)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
866 mutationPos = {1:length(mutations_val)}[mutations_val]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
867 mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
868 length_mutations =length(mutationPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
869 mutationInfo = rep(NA,length_mutations)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
870 if(any(mutationPos)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
871
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
872 pos<- mutationPos
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
873 pos_array<-array(sapply(pos,getCodonPos))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
874 codonGL = paramGL[pos_array]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
875
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
876 codonSeq = sapply(pos,function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
877 seqP = paramGL[getCodonPos(x)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
878 muCodonPos = {x-1}%%3+1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
879 seqP[muCodonPos] = paramSeq[x]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
880 return(seqP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
881 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
882 GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
883 Seqcodons = apply(codonSeq,2,c2s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
884 mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
885 names(mutationInfo) = mutationPos
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
886 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
887 if(any(!is.na(mutationInfo))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
888 return(mutationInfo[!is.na(mutationInfo)])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
889 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
890 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
891 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
892
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
893
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
894 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
895 return (NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
896 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
897 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
898
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
899 processNucMutations2 <- function(mu){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
900 if(!is.na(mu)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
901 #R
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
902 if(any(mu=="R")){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
903 Rs = mu[mu=="R"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
904 nucNumbs = as.numeric(names(Rs))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
905 R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
906 R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
907 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
908 R_CDR = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
909 R_FWR = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
910 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
911
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
912 #S
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
913 if(any(mu=="S")){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
914 Ss = mu[mu=="S"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
915 nucNumbs = as.numeric(names(Ss))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
916 S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
917 S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
918 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
919 S_CDR = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
920 S_FWR = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
921 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
922
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
923
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
924 retVec = c(R_CDR,S_CDR,R_FWR,S_FWR)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
925 retVec[is.na(retVec)]=0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
926 return(retVec)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
927 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
928 return(rep(0,4))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
929 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
930 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
931
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
932
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
933 ## Z-score Test
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
934 computeZScore <- function(mat, test="Focused"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
935 matRes <- matrix(NA,ncol=2,nrow=(nrow(mat)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
936 if(test=="Focused"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
937 #Z_Focused_CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
938 #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
939 P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
940 R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
941 R_sd=sqrt(R_mean*(1-P))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
942 matRes[,1] = (mat[,1]-R_mean)/R_sd
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
943
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
944 #Z_Focused_FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
945 #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
946 P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
947 R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
948 R_sd=sqrt(R_mean*(1-P))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
949 matRes[,2] = (mat[,3]-R_mean)/R_sd
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
950 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
951
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
952 if(test=="Local"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
953 #Z_Focused_CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
954 #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
955 P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
956 R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
957 R_sd=sqrt(R_mean*(1-P))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
958 matRes[,1] = (mat[,1]-R_mean)/R_sd
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
959
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
960 #Z_Focused_FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
961 #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
962 P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
963 R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
964 R_sd=sqrt(R_mean*(1-P))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
965 matRes[,2] = (mat[,3]-R_mean)/R_sd
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
966 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
967
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
968 if(test=="Imbalanced"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
969 #Z_Focused_CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
970 #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
971 P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
972 R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
973 R_sd=sqrt(R_mean*(1-P))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
974 matRes[,1] = (mat[,1]-R_mean)/R_sd
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
975
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
976 #Z_Focused_FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
977 #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
978 P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
979 R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
980 R_sd=sqrt(R_mean*(1-P))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
981 matRes[,2] = (mat[,3]-R_mean)/R_sd
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
982 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
983
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
984 matRes[is.nan(matRes)] = NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
985 return(matRes)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
986 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
987
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
988 # Return a p-value for a z-score
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
989 z2p <- function(z){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
990 p=NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
991 if( !is.nan(z) && !is.na(z)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
992 if(z>0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
993 p = (1 - pnorm(z,0,1))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
994 } else if(z<0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
995 p = (-1 * pnorm(z,0,1))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
996 } else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
997 p = 0.5
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
998 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
999 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1000 p = NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1001 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1002 return(p)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1003 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1004
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1005
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1006 ## Bayesian Test
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1007
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1008 # Fitted parameter for the bayesian framework
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1009 BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1010 CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1011
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1012 # Given x, M & p, returns a pdf
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1013 calculate_bayes <- function ( x=3, N=10, p=0.33,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1014 i=CONST_i,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1015 max_sigma=20,length_sigma=4001
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1016 ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1017 if(!0%in%N){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1018 G <- max(length(x),length(N),length(p))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1019 x=array(x,dim=G)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1020 N=array(N,dim=G)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1021 p=array(p,dim=G)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1022 sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1023 sigma_1<-log({i/{1-i}}/{p/{1-p}})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1024 index<-min(N,60)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1025 y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1026 if(!sum(is.na(y))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1027 tmp<-approx(sigma_1,y,sigma_s)$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1028 tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}}
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1029 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1030 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1031 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1032 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1033 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1034 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1035 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1036 # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1037 computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1038 flagOneSeq = F
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1039 if(nrow(mat)==1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1040 mat=rbind(mat,mat)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1041 flagOneSeq = T
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1042 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1043 if(test=="Focused"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1044 #CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1045 P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1046 N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1047 X = c(mat[,1],0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1048 bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1049 bayesCDR = bayesCDR[-length(bayesCDR)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1050
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1051 #FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1052 P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1053 N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1054 X = c(mat[,3],0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1055 bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1056 bayesFWR = bayesFWR[-length(bayesFWR)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1057 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1058
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1059 if(test=="Local"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1060 #CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1061 P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1062 N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1063 X = c(mat[,1],0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1064 bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1065 bayesCDR = bayesCDR[-length(bayesCDR)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1066
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1067 #FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1068 P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1069 N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1070 X = c(mat[,3],0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1071 bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1072 bayesFWR = bayesFWR[-length(bayesFWR)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1073 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1074
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1075 if(test=="Imbalanced"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1076 #CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1077 P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1078 N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1079 X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1080 bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1081 bayesCDR = bayesCDR[-length(bayesCDR)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1082
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1083 #FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1084 P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1085 N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1086 X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1087 bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1088 bayesFWR = bayesFWR[-length(bayesFWR)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1089 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1090
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1091 if(test=="ImbalancedSilent"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1092 #CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1093 P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1094 N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1095 X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1096 bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1097 bayesCDR = bayesCDR[-length(bayesCDR)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1098
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1099 #FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1100 P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1101 N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1102 X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1103 bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1104 bayesFWR = bayesFWR[-length(bayesFWR)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1105 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1106
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1107 if(flagOneSeq==T){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1108 bayesCDR = bayesCDR[1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1109 bayesFWR = bayesFWR[1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1110 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1111 return( list("CDR"=bayesCDR, "FWR"=bayesFWR) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1112 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1113
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1114 ##Covolution
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1115 break2chunks<-function(G=1000){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1116 base<-2^round(log(sqrt(G),2),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1117 return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1118 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1119
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1120 PowersOfTwo <- function(G=100){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1121 exponents <- array()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1122 i = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1123 while(G > 0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1124 i=i+1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1125 exponents[i] <- floor( log2(G) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1126 G <- G-2^exponents[i]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1127 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1128 return(exponents)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1129 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1130
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1131 convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1132 G = ncol(cons)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1133 if(G>1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1134 for(gen in log(G,2):1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1135 ll<-seq(from=2,to=2^gen,by=2)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1136 sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1137 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1138 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1139 return( cons[,1] )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1140 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1141
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1142 convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1143 if(length(ncol(cons))) G<-ncol(cons)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1144 groups <- PowersOfTwo(G)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1145 matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1146 startIndex = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1147 for( i in 1:length(groups) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1148 stopIndex <- 2^groups[i] + startIndex - 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1149 if(stopIndex!=startIndex){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1150 matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1151 startIndex = stopIndex + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1152 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1153 else {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1154 if(G>1) matG[,i] <- cons[,startIndex:stopIndex]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1155 else matG[,i] <- cons
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1156 #startIndex = stopIndex + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1157 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1158 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1159 return( list( matG, groups ) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1160 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1161
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1162 weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1163 lx<-length(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1164 ly<-length(y)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1165 if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1166 if(w<1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1167 y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1168 x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1169 lx<-length(x1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1170 ly<-length(y1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1171 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1172 else {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1173 y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1174 x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1175 lx<-length(x1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1176 ly<-length(y1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1177 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1178 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1179 else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1180 x1<-x
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1181 y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1182 ly<-length(y1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1183 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1184 tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1185 tmp[tmp<=0] = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1186 return(tmp/sum(tmp))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1187 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1188
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1189 calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1190 matG <- listMatG[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1191 groups <- listMatG[[2]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1192 i = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1193 resConv <- matG[,i]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1194 denom <- 2^groups[i]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1195 if(length(groups)>1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1196 while( i<length(groups) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1197 i = i + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1198 resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1199 #cat({{2^groups[i]}/denom},"\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1200 denom <- denom + 2^groups[i]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1201 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1202 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1203 return(resConv)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1204 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1205
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1206 # Given a list of PDFs, returns a convoluted PDF
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1207 groupPosteriors <- function( listPosteriors, max_sigma=20, length_sigma=4001 ,Threshold=2 ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1208 listPosteriors = listPosteriors[ !is.na(listPosteriors) ]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1209 Length_Postrior<-length(listPosteriors)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1210 if(Length_Postrior>1 & Length_Postrior<=Threshold){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1211 cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1212 listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1213 y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1214 return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1215 }else if(Length_Postrior==1) return(listPosteriors[[1]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1216 else if(Length_Postrior==0) return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1217 else {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1218 cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1219 y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1220 return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1221 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1222 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1223
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1224 fastConv<-function(cons, max_sigma=20, length_sigma=4001){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1225 chunks<-break2chunks(G=ncol(cons))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1226 if(ncol(cons)==3) chunks<-2:1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1227 index_chunks_end <- cumsum(chunks)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1228 index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1229 index_chunks <- cbind(index_chunks_start,index_chunks_end)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1230
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1231 case <- sum(chunks!=chunks[1])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1232 if(case==1) End <- max(1,((length(index_chunks)/2)-1))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1233 else End <- max(1,((length(index_chunks)/2)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1234
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1235 firsts <- sapply(1:End,function(i){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1236 indexes<-index_chunks[i,1]:index_chunks[i,2]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1237 convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1238 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1239 if(case==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1240 result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1241 }else if(case==1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1242 last<-list(calculate_bayesGHelper(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1243 convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1244 ),0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1245 result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1246 result<-calculate_bayesGHelper(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1247 list(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1248 cbind(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1249 result_first,last[[1]]),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1250 c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1251 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1252 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1253 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1254 return(as.vector(result))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1255 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1256
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1257 # Computes the 95% CI for a pdf
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1258 calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1259 if(length(Pdf)!=length_sigma) return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1260 sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1261 cdf = cumsum(Pdf)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1262 cdf = cdf/cdf[length(cdf)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1263 return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1264 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1265
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1266 # Computes a mean for a pdf
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1267 calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1268 if(length(Pdf)!=length_sigma) return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1269 sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1270 norm = {length_sigma-1}/2/max_sigma
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1271 return( (Pdf%*%sigma_s/norm) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1272 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1273
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1274 # Returns the mean, and the 95% CI for a pdf
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1275 calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1276 if(is.na(Pdf))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1277 return(rep(NA,3))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1278 bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1279 bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1280 return(c(bMean, bCI))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1281 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1282
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1283 # Computes the p-value of a pdf
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1284 computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1285 if(length(Pdf)>1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1286 norm = {length_sigma-1}/2/max_sigma
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1287 pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1288 if(pVal>0.5){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1289 pVal = pVal-1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1290 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1291 return(pVal)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1292 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1293 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1294 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1295 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1296
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1297 # Compute p-value of two distributions
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1298 compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1299 #print(c(length(dens1),length(dens2)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1300 if(length(dens1)>1 & length(dens2)>1 ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1301 dens1<-dens1/sum(dens1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1302 dens2<-dens2/sum(dens2)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1303 cum2 <- cumsum(dens2)-dens2/2
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1304 tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1305 #print(tmp)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1306 if(tmp>0.5)tmp<-tmp-1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1307 return( tmp )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1308 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1309 else {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1310 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1311 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1312 #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1313 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1314
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1315 # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1316 numberOfSeqsWithMutations <- function(matMutations,test=1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1317 if(test==4)test=2
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1318 cdrSeqs <- 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1319 fwrSeqs <- 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1320 if(test==1){#focused
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1321 cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1322 fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1323 if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1324 if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1325 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1326 if(test==2){#local
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1327 cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1328 fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1329 if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1330 if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1331 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1332 return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1333 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1334
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1335
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1336
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1337 shadeColor <- function(sigmaVal=NA,pVal=NA){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1338 if(is.na(sigmaVal) & is.na(pVal)) return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1339 if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1340 if(is.na(pVal) || pVal==1 || pVal==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1341 returnColor = "#FFFFFF";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1342 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1343 colVal=abs(pVal);
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1344
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1345 if(sigmaVal<0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1346 if(colVal>0.1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1347 returnColor = "#CCFFCC";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1348 if(colVal<=0.1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1349 returnColor = "#99FF99";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1350 if(colVal<=0.050)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1351 returnColor = "#66FF66";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1352 if(colVal<=0.010)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1353 returnColor = "#33FF33";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1354 if(colVal<=0.005)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1355 returnColor = "#00FF00";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1356
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1357 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1358 if(colVal>0.1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1359 returnColor = "#FFCCCC";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1360 if(colVal<=0.1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1361 returnColor = "#FF9999";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1362 if(colVal<=0.05)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1363 returnColor = "#FF6666";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1364 if(colVal<=0.01)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1365 returnColor = "#FF3333";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1366 if(colVal<0.005)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1367 returnColor = "#FF0000";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1368 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1369 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1370
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1371 return(returnColor)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1372 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1373
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1374
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1375
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1376 plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1377 if(!log){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1378 x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1379 y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1380 }else {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1381 if(log==2){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1382 x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1383 y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1384 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1385 if(log==1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1386 x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1387 y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1388 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1389 if(log==3){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1390 x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1391 y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1392 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1393 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1394 return(c("x"=x,"y"=y))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1395 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1396
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1397 # SHMulation
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1398
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1399 # Based on targeting, introduce a single mutation & then update the targeting
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1400 oneMutation <- function(){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1401 # Pick a postion + mutation
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1402 posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1403 posNucNumb = ceiling(posMutation/4) # Nucleotide number
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1404 posNucKind = 4 - ( (posNucNumb*4) - posMutation ) # Nuc the position mutates to
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1405
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1406 #mutate the simulation sequence
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1407 seqSimVec <- s2c(seqSim)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1408 seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1409 seqSim <<- c2s(seqSimVec)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1410
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1411 #update Mutability, Targeting & MutationsTypes
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1412 updateMutabilityNTargeting(posNucNumb)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1413
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1414 #return(c(posNucNumb,NUCLEOTIDES[posNucKind]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1415 return(posNucNumb)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1416 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1417
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1418 updateMutabilityNTargeting <- function(position){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1419 min_i<-max((position-2),1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1420 max_i<-min((position+2),nchar(seqSim))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1421 min_ii<-min(min_i,3)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1422
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1423 #mutability - update locally
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1424 seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1425
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1426
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1427 #targeting - compute locally
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1428 seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1429 seqTargeting[is.na(seqTargeting)] <<- 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1430 #mutCodonPos = getCodonPos(position)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1431 mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1432 #cat(mutCodonPos,"\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1433 mutTypeCodon = getCodonPos(position)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1434 seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1435 # Stop = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1436 if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1437 seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1438 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1439
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1440
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1441 #Selection
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1442 selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R"))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1443 # CDR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1444 selectedCDR = selectedPos[which(matCDR[selectedPos]==T)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1445 seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR] * exp(selCDR)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1446 seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1447
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1448 # FWR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1449 selectedFWR = selectedPos[which(matFWR[selectedPos]==T)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1450 seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR] * exp(selFWR)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1451 seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1452
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1453 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1454
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1455
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1456
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1457 # Validate the mutation: if the mutation has not been sampled before validate it, else discard it.
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1458 validateMutation <- function(){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1459 if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1460 uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1461 mutatedPositions[uniqueMutationsIntroduced] <<- mutatedPos
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1462 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1463 if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1464 mutatedPositions <<- mutatedPositions[-which(mutatedPositions==mutatedPos)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1465 uniqueMutationsIntroduced <<- uniqueMutationsIntroduced - 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1466 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1467 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1468 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1469
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1470
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1471
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1472 # Places text (labels) at normalized coordinates
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1473 myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1474 par(xpd=TRUE)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1475 if(!log)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1476 text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1477 else {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1478 if(log==2)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1479 text(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1480 par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1481 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1482 w,cex=cex,adj=adj,col=thecol)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1483 if(log==1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1484 text(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1485 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1486 par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1487 w,cex=cex,adj=adj,col=thecol)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1488 if(log==3)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1489 text(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1490 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1491 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1492 w,cex=cex,adj=adj,col=thecol)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1493 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1494 par(xpd=FALSE)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1495 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1496
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1497
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1498
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1499 # Count the mutations in a sequence
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1500 analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1501
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1502 paramGL = s2c(matInput[inputMatrixIndex,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1503 paramSeq = s2c(matInput[inputMatrixIndex,1])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1504
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1505 #if( any(paramSeq=="N") ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1506 # gapPos_Seq = which(paramSeq=="N")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1507 # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1508 # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1509 #}
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1510 mutations_val = paramGL != paramSeq
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1511
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1512 if(any(mutations_val)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1513 mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1514 length_mutations =length(mutationPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1515 mutationInfo = rep(NA,length_mutations)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1516
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1517 pos<- mutationPos
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1518 pos_array<-array(sapply(pos,getCodonPos))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1519 codonGL = paramGL[pos_array]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1520 codonSeqWhole = paramSeq[pos_array]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1521 codonSeq = sapply(pos,function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1522 seqP = paramGL[getCodonPos(x)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1523 muCodonPos = {x-1}%%3+1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1524 seqP[muCodonPos] = paramSeq[x]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1525 return(seqP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1526 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1527 GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1528 SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1529 Seqcodons = apply(codonSeq,2,c2s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1530
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1531 mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1532 names(mutationInfo) = mutationPos
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1533
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1534 mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1535 names(mutationInfoWhole) = mutationPos
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1536
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1537 mutationInfo <- mutationInfo[!is.na(mutationInfo)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1538 mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1539
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1540 if(any(!is.na(mutationInfo))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1541
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1542 #Filter based on Stop (at the codon level)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1543 if(seqWithStops==1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1544 nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1545 mutationInfo = mutationInfo[nucleotidesAtStopCodons]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1546 mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1547 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1548 countStops = sum(mutationInfoWhole=="Stop")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1549 if(seqWithStops==2 & countStops==0) mutationInfo = NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1550 if(seqWithStops==3 & countStops>0) mutationInfo = NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1551 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1552
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1553 if(any(!is.na(mutationInfo))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1554 #Filter mutations based on multipleMutation
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1555 if(multipleMutation==1 & !is.na(mutationInfo)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1556 mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1557 tableMutationCodons <- table(mutationCodons)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1558 codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1559 if(any(codonsWithMultipleMutations)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1560 #remove the nucleotide mutations in the codons with multiple mutations
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1561 mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1562 #replace those codons with Ns in the input sequence
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1563 paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1564 matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1565 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1566 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1567
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1568 #Filter mutations based on the model
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1569 if(any(mutationInfo)==T | is.na(any(mutationInfo))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1570
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1571 if(model==1 & !is.na(mutationInfo)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1572 mutationInfo <- mutationInfo[mutationInfo=="S"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1573 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1574 if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1575 else return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1576 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1577 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1578 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1579 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1580 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1581 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1582
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1583
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1584 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1585 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1586 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1587
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1588
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1589 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1590 return (NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1591 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1592 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1593
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1594 analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1595
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1596 paramGL = s2c(inputArray[2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1597 paramSeq = s2c(inputArray[1])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1598 inputSeq <- inputArray[1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1599 #if( any(paramSeq=="N") ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1600 # gapPos_Seq = which(paramSeq=="N")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1601 # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1602 # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1603 #}
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1604 mutations_val = paramGL != paramSeq
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1605
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1606 if(any(mutations_val)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1607 mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1608 length_mutations =length(mutationPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1609 mutationInfo = rep(NA,length_mutations)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1610
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1611 pos<- mutationPos
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1612 pos_array<-array(sapply(pos,getCodonPos))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1613 codonGL = paramGL[pos_array]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1614 codonSeqWhole = paramSeq[pos_array]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1615 codonSeq = sapply(pos,function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1616 seqP = paramGL[getCodonPos(x)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1617 muCodonPos = {x-1}%%3+1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1618 seqP[muCodonPos] = paramSeq[x]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1619 return(seqP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1620 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1621 GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1622 SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1623 Seqcodons = apply(codonSeq,2,c2s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1624
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1625 mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1626 names(mutationInfo) = mutationPos
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1627
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1628 mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1629 names(mutationInfoWhole) = mutationPos
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1630
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1631 mutationInfo <- mutationInfo[!is.na(mutationInfo)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1632 mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1633
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1634 if(any(!is.na(mutationInfo))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1635
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1636 #Filter based on Stop (at the codon level)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1637 if(seqWithStops==1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1638 nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1639 mutationInfo = mutationInfo[nucleotidesAtStopCodons]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1640 mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1641 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1642 countStops = sum(mutationInfoWhole=="Stop")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1643 if(seqWithStops==2 & countStops==0) mutationInfo = NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1644 if(seqWithStops==3 & countStops>0) mutationInfo = NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1645 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1646
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1647 if(any(!is.na(mutationInfo))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1648 #Filter mutations based on multipleMutation
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1649 if(multipleMutation==1 & !is.na(mutationInfo)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1650 mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1651 tableMutationCodons <- table(mutationCodons)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1652 codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1653 if(any(codonsWithMultipleMutations)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1654 #remove the nucleotide mutations in the codons with multiple mutations
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1655 mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1656 #replace those codons with Ns in the input sequence
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1657 paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1658 #matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1659 inputSeq <- c2s(paramSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1660 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1661 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1662
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1663 #Filter mutations based on the model
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1664 if(any(mutationInfo)==T | is.na(any(mutationInfo))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1665
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1666 if(model==1 & !is.na(mutationInfo)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1667 mutationInfo <- mutationInfo[mutationInfo=="S"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1668 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1669 if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1670 else return(list(NA,inputSeq))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1671 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1672 return(list(NA,inputSeq))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1673 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1674 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1675 return(list(NA,inputSeq))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1676 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1677
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1678
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1679 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1680 return(list(NA,inputSeq))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1681 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1682
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1683
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1684 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1685 return (list(NA,inputSeq))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1686 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1687 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1688
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1689 # triMutability Background Count
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1690 buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1691
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1692 #rowOrigMatInput = matInput[inputMatrixIndex,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1693 seqGL = gsub("-", "", matInput[inputMatrixIndex,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1694 seqInput = gsub("-", "", matInput[inputMatrixIndex,1])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1695 #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1696 tempInput <- cbind(seqInput,seqGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1697 seqLength = nchar(seqGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1698 list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1699 mutationCount <- list_analyzeMutationsFixed[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1700 seqInput <- list_analyzeMutationsFixed[[2]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1701 BackgroundMatrix = mutabilityMatrix
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1702 MutationMatrix = mutabilityMatrix
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1703 MutationCountMatrix = mutabilityMatrix
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1704 if(!is.na(mutationCount)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1705 if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1706
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1707 fivermerStartPos = 1:(seqLength-4)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1708 fivemerLength <- length(fivermerStartPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1709 fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1710 fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1711
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1712 #Background
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1713 for(fivemerIndex in 1:fivemerLength){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1714 fivemer = fivemerGL[fivemerIndex]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1715 if(!any(grep("N",fivemer))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1716 fivemerCodonPos = fivemerCodon(fivemerIndex)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1717 fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1718 fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1719
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1720 # All mutations model
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1721 #if(!any(grep("N",fivemerReadingFrameCodon))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1722 if(model==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1723 if(stopMutations==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1724 if(!any(grep("N",fivemerReadingFrameCodonInputSeq)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1725 BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1726 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1727 if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1728 positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1729 BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1730 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1731 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1732 }else{ # Only silent mutations
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1733 if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1734 positionWithinCodon = which(fivemerCodonPos==3)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1735 BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1736 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1737 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1738 #}
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1739 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1740 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1741
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1742 #Mutations
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1743 if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1744 if(model==1) mutationCount = mutationCount[mutationCount=="S"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1745 mutationPositions = as.numeric(names(mutationCount))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1746 mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1747 mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1748 countMutations = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1749 for(mutationPosition in mutationPositions){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1750 fivemerIndex = mutationPosition-2
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1751 fivemer = fivemerSeq[fivemerIndex]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1752 GLfivemer = fivemerGL[fivemerIndex]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1753 fivemerCodonPos = fivemerCodon(fivemerIndex)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1754 fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1755 fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1756 if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1757 if(model==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1758 countMutations = countMutations + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1759 MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1760 MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1761 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1762 if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1763 countMutations = countMutations + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1764 positionWithinCodon = which(fivemerCodonPos==3)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1765 glNuc = substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1766 inputNuc = substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1767 MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1768 MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1769 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1770 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1771 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1772 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1773
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1774 seqMutability = MutationMatrix/BackgroundMatrix
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1775 seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1776 #cat(inputMatrixIndex,"\t",countMutations,"\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1777 return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1778
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1779 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1780 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1781
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1782 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1783
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1784 #Returns the codon position containing the middle nucleotide
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1785 fivemerCodon <- function(fivemerIndex){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1786 codonPos = list(2:4,1:3,3:5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1787 fivemerType = fivemerIndex%%3
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1788 return(codonPos[[fivemerType+1]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1789 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1790
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1791 #returns probability values for one mutation in codons resulting in R, S or Stop
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1792 probMutations <- function(typeOfMutation){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1793 matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1794 for(codon in rownames(matMutationProb)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1795 if( !any(grep("N",codon)) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1796 for(muPos in 1:3){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1797 matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1798 glNuc = matCodon[1,muPos]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1799 matCodon[,muPos] = canMutateTo(glNuc)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1800 substitutionRate = substitution[glNuc,matCodon[,muPos]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1801 typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1802 matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1803 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1804 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1805 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1806
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1807 return(matMutationProb)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1808 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1809
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1810
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1811
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1812
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1813 #Mapping Trinucleotides to fivemers
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1814 mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1815 rownames(triMutability) <- triMutability_Names
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1816 Fivemer<-rep(NA,1024)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1817 names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1818 Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1819 Fivemer<-Fivemer/sum(Fivemer)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1820 return(Fivemer)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1821 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1822
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1823 collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1824 Indices<-substring(names(Fivemer),3,3)==NUC
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1825 Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1826 tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1827 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1828
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1829
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1830
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1831 CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1832 Indices<-substring(names(Fivemer),3,3)==NUC
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1833 Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1834 tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1835 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1836
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1837 #Uses the real counts of the mutated fivemers
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1838 CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1839 Indices<-substring(names(Fivemer),3,3)==NUC
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1840 Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1841 tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1842 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1843
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1844 bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1845 N<-sum(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1846 if(N){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1847 p<-x/N
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1848 k<-length(x)-1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1849 tmp<-rmultinom(M, size = N, prob=p)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1850 tmp_p<-apply(tmp,2,function(y)y/N)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1851 (apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k))))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1852 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1853 else return(matrix(0,2,length(x)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1854 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1855
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1856
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1857
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1858
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1859 bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1860
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1861 N<-sum(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1862 k<-length(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1863 y<-rep(1:k,x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1864 tmp<-sapply(1:M,function(i)sample(y,n))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1865 if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1866 if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1867 (apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1)))))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1868 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1869
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1870
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1871
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1872 p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1873 n=sum(x_obs)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1874 N<-sum(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1875 k<-length(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1876 y<-rep(1:k,x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1877 tmp<-sapply(1:M,function(i)sample(y,n))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1878 if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1879 if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1880 tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1881 sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1882 sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1883 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1884
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1885 #"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1886 # Remove SNPs from IMGT germline segment alleles
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1887 generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1888 repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1889 alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1890 SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1891 Indices<-NULL
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1892 for(i in 1:length(x)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1893 firstSeq = s2c(x[[1]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1894 iSeq = s2c(x[[i]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1895 Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="." ))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1896 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1897 return(sort(unique(Indices)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1898 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1899 repertoireOut <- repertoireIn
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1900 repertoireOut <- lapply(names(repertoireOut), function(repertoireName){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1901 alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1902 geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1903 alleleSeq <- s2c(repertoireOut[[repertoireName]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1904 alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1905 alleleSeq <- c2s(alleleSeq)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1906 repertoireOut[[repertoireName]] <- alleleSeq
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1907 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1908 names(repertoireOut) <- names(repertoireIn)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1909 write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1910
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1911 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1912
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1913
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1914
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1915
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1916
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1917
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1918 ############
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1919 groupBayes2 = function(indexes, param_resultMat){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1920
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1921 BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])}))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1922 BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])}))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1923 #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])}))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1924 #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])}))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1925 #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])}))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1926 #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])}))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1927 return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1928 "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1929 #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1930 #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1931 # "BayesGDist_Global_CDR" = BayesGDist_Global_CDR,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1932 # "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1933
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1934
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1935 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1936
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1937
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1938 calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1939 G <- max(length(x),length(N),length(p))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1940 x=array(x,dim=G)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1941 N=array(N,dim=G)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1942 p=array(p,dim=G)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1943
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1944 indexOfZero = N>0 & p>0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1945 N = N[indexOfZero]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1946 x = x[indexOfZero]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1947 p = p[indexOfZero]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1948 G <- length(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1949
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1950 if(G){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1951
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1952 cons<-array( dim=c(length_sigma,G) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1953 if(G==1) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1954 return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1955 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1956 else {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1957 for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1958 listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1959 y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1960 return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1961 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1962 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1963 return(NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1964 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1965 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1966
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1967
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1968 calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1969 matG <- listMatG[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1970 groups <- listMatG[[2]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1971 i = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1972 resConv <- matG[,i]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1973 denom <- 2^groups[i]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1974 if(length(groups)>1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1975 while( i<length(groups) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1976 i = i + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1977 resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1978 #cat({{2^groups[i]}/denom},"\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1979 denom <- denom + 2^groups[i]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1980 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1981 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1982 return(resConv)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1983 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1984
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1985 weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1986 lx<-length(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1987 ly<-length(y)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1988 if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1989 if(w<1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1990 y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1991 x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1992 lx<-length(x1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1993 ly<-length(y1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1994 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1995 else {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1996 y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1997 x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1998 lx<-length(x1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1999 ly<-length(y1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2000 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2001 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2002 else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2003 x1<-x
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2004 y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2005 ly<-length(y1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2006 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2007 tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2008 tmp[tmp<=0] = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2009 return(tmp/sum(tmp))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2010 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2011
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2012 ########################
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2013
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2014
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2015
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2016
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2017 mutabilityMatrixONE<-rep(0,4)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2018 names(mutabilityMatrixONE)<-NUCLEOTIDES
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2019
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2020 # triMutability Background Count
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2021 buildMutabilityModelONE <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2022
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2023 #rowOrigMatInput = matInput[inputMatrixIndex,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2024 seqGL = gsub("-", "", matInput[inputMatrixIndex,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2025 seqInput = gsub("-", "", matInput[inputMatrixIndex,1])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2026 matInput[inputMatrixIndex,] <<- c(seqInput,seqGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2027 seqLength = nchar(seqGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2028 mutationCount <- analyzeMutations(inputMatrixIndex, model, multipleMutation, seqWithStops)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2029 BackgroundMatrix = mutabilityMatrixONE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2030 MutationMatrix = mutabilityMatrixONE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2031 MutationCountMatrix = mutabilityMatrixONE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2032 if(!is.na(mutationCount)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2033 if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2034
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2035 # ONEmerStartPos = 1:(seqLength)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2036 # ONEmerLength <- length(ONEmerStartPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2037 ONEmerGL <- s2c(seqGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2038 ONEmerSeq <- s2c(seqInput)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2039
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2040 #Background
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2041 for(ONEmerIndex in 1:seqLength){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2042 ONEmer = ONEmerGL[ONEmerIndex]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2043 if(ONEmer!="N"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2044 ONEmerCodonPos = getCodonPos(ONEmerIndex)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2045 ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2046 ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2047
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2048 # All mutations model
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2049 #if(!any(grep("N",ONEmerReadingFrameCodon))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2050 if(model==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2051 if(stopMutations==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2052 if(!any(grep("N",ONEmerReadingFrameCodonInputSeq)))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2053 BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2054 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2055 if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2056 positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2057 BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2058 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2059 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2060 }else{ # Only silent mutations
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2061 if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2062 positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2063 BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2064 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2065 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2066 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2067 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2068 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2069
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2070 #Mutations
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2071 if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2072 if(model==1) mutationCount = mutationCount[mutationCount=="S"]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2073 mutationPositions = as.numeric(names(mutationCount))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2074 mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2075 mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2076 countMutations = 0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2077 for(mutationPosition in mutationPositions){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2078 ONEmerIndex = mutationPosition
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2079 ONEmer = ONEmerSeq[ONEmerIndex]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2080 GLONEmer = ONEmerGL[ONEmerIndex]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2081 ONEmerCodonPos = getCodonPos(ONEmerIndex)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2082 ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2083 ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2084 if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2085 if(model==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2086 countMutations = countMutations + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2087 MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2088 MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2089 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2090 if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2091 countMutations = countMutations + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2092 positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2093 glNuc = substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2094 inputNuc = substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2095 MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2096 MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2097 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2098 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2099 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2100 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2101
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2102 seqMutability = MutationMatrix/BackgroundMatrix
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2103 seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2104 #cat(inputMatrixIndex,"\t",countMutations,"\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2105 return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2106 # tmp<-list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2107 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2108 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2109
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2110 ################
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2111 # $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2112
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2113 trim <- function(s, recode.factor=TRUE, ...)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2114 UseMethod("trim", s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2115
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2116 trim.default <- function(s, recode.factor=TRUE, ...)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2117 s
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2118
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2119 trim.character <- function(s, recode.factor=TRUE, ...)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2120 {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2121 s <- sub(pattern="^ +", replacement="", x=s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2122 s <- sub(pattern=" +$", replacement="", x=s)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2123 s
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2124 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2125
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2126 trim.factor <- function(s, recode.factor=TRUE, ...)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2127 {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2128 levels(s) <- trim(levels(s))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2129 if(recode.factor) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2130 dots <- list(x=s, ...)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2131 if(is.null(dots$sort)) dots$sort <- sort
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2132 s <- do.call(what=reorder.factor, args=dots)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2133 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2134 s
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2135 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2136
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2137 trim.list <- function(s, recode.factor=TRUE, ...)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2138 lapply(s, trim, recode.factor=recode.factor, ...)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2139
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2140 trim.data.frame <- function(s, recode.factor=TRUE, ...)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2141 {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2142 s[] <- trim.list(s, recode.factor=recode.factor, ...)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2143 s
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2144 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2145 #######################################
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2146 # Compute the expected for each sequence-germline pair by codon
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2147 getExpectedIndividualByCodon <- function(matInput){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2148 if( any(grep("multicore",search())) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2149 facGL <- factor(matInput[,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2150 facLevels = levels(facGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2151 LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2152 computeMutabilities(facLevels[x])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2153 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2154 facIndex = match(facGL,facLevels)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2155
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2156 LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2157 cInput = rep(NA,nchar(matInput[x,1]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2158 cInput[s2c(matInput[x,1])!="N"] = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2159 LisGLs_MutabilityU[[facIndex[x]]] * cInput
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2160 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2161
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2162 LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2163 computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2164 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2165
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2166 LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2167 #print(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2168 computeMutationTypes(matInput[x,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2169 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2170
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2171 LisGLs_R_Exp = mclapply(1:nrow(matInput), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2172 Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2173 function(codonNucs){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2174 RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2175 sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2176 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2177 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2178 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2179
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2180 LisGLs_S_Exp = mclapply(1:nrow(matInput), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2181 Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2182 function(codonNucs){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2183 SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2184 sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2185 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2186 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2187 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2188
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2189 Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2190 Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2191 return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2192 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2193 facGL <- factor(matInput[,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2194 facLevels = levels(facGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2195 LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2196 computeMutabilities(facLevels[x])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2197 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2198 facIndex = match(facGL,facLevels)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2199
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2200 LisGLs_Mutability = lapply(1:nrow(matInput), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2201 cInput = rep(NA,nchar(matInput[x,1]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2202 cInput[s2c(matInput[x,1])!="N"] = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2203 LisGLs_MutabilityU[[facIndex[x]]] * cInput
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2204 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2205
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2206 LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2207 computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2208 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2209
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2210 LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2211 #print(x)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2212 computeMutationTypes(matInput[x,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2213 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2214
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2215 LisGLs_R_Exp = lapply(1:nrow(matInput), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2216 Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2217 function(codonNucs){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2218 RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2219 sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2220 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2221 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2222 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2223
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2224 LisGLs_S_Exp = lapply(1:nrow(matInput), function(x){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2225 Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3,
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2226 function(codonNucs){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2227 SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2228 sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2229 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2230 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2231 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2232
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2233 Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2234 Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2235 return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2236 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2237 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2238
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2239 # getObservedMutationsByCodon <- function(listMutations){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2240 # numbSeqs <- length(listMutations)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2241 # obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2242 # obsMu_S <- obsMu_R
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2243 # temp <- mclapply(1:length(listMutations), function(i){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2244 # arrMutations = listMutations[[i]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2245 # RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2246 # RPos <- sapply(RPos,getCodonNumb)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2247 # if(any(RPos)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2248 # tabR <- table(RPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2249 # obsMu_R[i,as.numeric(names(tabR))] <<- tabR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2250 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2251 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2252 # SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2253 # SPos <- sapply(SPos,getCodonNumb)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2254 # if(any(SPos)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2255 # tabS <- table(SPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2256 # obsMu_S[i,names(tabS)] <<- tabS
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2257 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2258 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2259 # )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2260 # return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2261 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2262
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2263 getObservedMutationsByCodon <- function(listMutations){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2264 numbSeqs <- length(listMutations)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2265 obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2266 obsMu_S <- obsMu_R
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2267 temp <- lapply(1:length(listMutations), function(i){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2268 arrMutations = listMutations[[i]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2269 RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2270 RPos <- sapply(RPos,getCodonNumb)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2271 if(any(RPos)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2272 tabR <- table(RPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2273 obsMu_R[i,as.numeric(names(tabR))] <<- tabR
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2274 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2275
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2276 SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2277 SPos <- sapply(SPos,getCodonNumb)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2278 if(any(SPos)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2279 tabS <- table(SPos)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2280 obsMu_S[i,names(tabS)] <<- tabS
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2281 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2282 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2283 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2284 return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2285 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2286