annotate Baseline_Main.r @ 0:d685e7ba0ed4 draft

Uploaded
author davidvanzessen
date Tue, 15 Jul 2014 08:43:49 -0400
parents
children d8de51314d3f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
1 #########################################################################################
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
2 # License Agreement
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
3 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
4 # THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
5 # ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
6 # APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
7 # OR COPYRIGHT LAW IS PROHIBITED.
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
8 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
9 # BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
10 # BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
11 # TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
12 # CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
13 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
14 # BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
15 # Coded by: Mohamed Uduman & Gur Yaari
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
16 # Copyright 2012 Kleinstein Lab
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
17 # Version: 1.3 (01/23/2014)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
18 #########################################################################################
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
19
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
20 op <- options();
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
21 options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
22 library('seqinr')
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
23 if( Sys.info()[1]=="Linux"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
24 library("multicore")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
25 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
26
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
27
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
28 # Load functions and initialize global variables
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
29 source("Baseline_Functions.r")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
30
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
31 # Initialize parameters with user provided arguments
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
32 arg <- commandArgs(TRUE)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
33 #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
34 #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
35 #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
36 testID <- as.numeric(arg[1]) # 1 = Focused, 2 = Local
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
37 species <- as.numeric(arg[2]) # 1 = Human. 2 = Mouse
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
38 substitutionModel <- as.numeric(arg[3]) # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
39 mutabilityModel <- as.numeric(arg[4]) # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002) , 5 = FiveS
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
40 clonal <- as.numeric(arg[5]) # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
41 fixIndels <- as.numeric(arg[6]) # 0 = Do nothing, 1 = Try and fix Indels
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
42 region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
43 inputFilePath <- arg[8] # Full path to input file
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
44 outputPath <- arg[9] # Full path to location of output files
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
45 outputID <- arg[10] # ID for session output
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
46
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
47
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
48 if(testID==5){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
49 traitChangeModel <- 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
50 if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11]) # 1 <- Chothia 1998
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
51 initializeTraitChange(traitChangeModel)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
52 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
53
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
54 # Initialize other parameters/variables
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
55
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
56 # Initialzie the codon table ( definitions of R/S )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
57 computeCodonTable(testID)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
58
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
59 # Initialize
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
60 # Test Name
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
61 testName<-"Focused"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
62 if(testID==2) testName<-"Local"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
63 if(testID==3) testName<-"Imbalanced"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
64 if(testID==4) testName<-"ImbalancedSilent"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
65
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
66 # Indel placeholders initialization
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
67 indelPos <- NULL
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
68 delPos <- NULL
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
69 insPos <- NULL
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
70
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
71 # Initialize in Tranistion & Mutability matrixes
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
72 substitution <- initializeSubstitutionMatrix(substitutionModel,species)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
73 mutability <- initializeMutabilityMatrix(mutabilityModel,species)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
74
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
75 # FWR/CDR boundaries
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
76 flagTrim <- F
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
77 if( is.na(region[7])){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
78 flagTrim <- T
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
79 region[7]<-region[6]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
80 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
81 readStart = min(region,na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
82 readEnd = max(region,na.rm=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
83 if(readStart>1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
84 region = region - (readStart - 1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
85 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
86 region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
87 region_Cod = region
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
88
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
89 readStart = (readStart*3)-2
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
90 readEnd = (readEnd*3)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
91
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
92 FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
93 rep(FALSE,(region_Nuc[3]-region_Nuc[2])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
94 rep(TRUE,(region_Nuc[4]-region_Nuc[3])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
95 rep(FALSE,(region_Nuc[5]-region_Nuc[4])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
96 rep(TRUE,(region_Nuc[6]-region_Nuc[5])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
97 rep(FALSE,(region_Nuc[7]-region_Nuc[6]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
98 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
99 CDR_Nuc <- (1-FWR_Nuc)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
100 CDR_Nuc <- as.logical(CDR_Nuc)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
101 FWR_Nuc_Mat <- matrix( rep(FWR_Nuc,4), ncol=length(FWR_Nuc), nrow=4, byrow=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
102 CDR_Nuc_Mat <- matrix( rep(CDR_Nuc,4), ncol=length(CDR_Nuc), nrow=4, byrow=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
103
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
104 FWR_Codon <- c( rep(TRUE,(region[2])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
105 rep(FALSE,(region[3]-region[2])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
106 rep(TRUE,(region[4]-region[3])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
107 rep(FALSE,(region[5]-region[4])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
108 rep(TRUE,(region[6]-region[5])),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
109 rep(FALSE,(region[7]-region[6]))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
110 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
111 CDR_Codon <- (1-FWR_Codon)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
112 CDR_Codon <- as.logical(CDR_Codon)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
113
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
114
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
115 # Read input FASTA file
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
116 tryCatch(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
117 inputFASTA <- baseline.read.fasta(inputFilePath, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
118 , error = function(ex){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
119 cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
120 q()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
121 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
122 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
123
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
124 if (length(inputFASTA)==1) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
125 cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
126 q()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
127 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
128
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
129 # Process sequence IDs/names
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
130 names(inputFASTA) <- sapply(names(inputFASTA),function(x){trim(x)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
131
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
132 # Convert non nucleotide characters to N
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
133 inputFASTA[length(inputFASTA)] = gsub("\t","",inputFASTA[length(inputFASTA)])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
134 inputFASTA <- lapply(inputFASTA,replaceNonFASTAChars)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
135
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
136 # Process the FASTA file and conver to Matrix[inputSequence, germlineSequence]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
137 processedInput <- processInputAdvanced(inputFASTA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
138 matInput <- processedInput[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
139 germlines <- processedInput[[2]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
140 lenGermlines = length(unique(germlines))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
141 groups <- processedInput[[3]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
142 lenGroups = length(unique(groups))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
143 rm(processedInput)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
144 rm(inputFASTA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
145
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
146 # # remove clones with less than 2 seqeunces
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
147 # tableGL <- table(germlines)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
148 # singletons <- which(tableGL<8)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
149 # rowsToRemove <- match(singletons,germlines)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
150 # if(any(rowsToRemove)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
151 # matInput <- matInput[-rowsToRemove,]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
152 # germlines <- germlines[-rowsToRemove]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
153 # groups <- groups[-rowsToRemove]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
154 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
155 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
156 # # remove unproductive seqs
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
157 # nonFuctionalSeqs <- sapply(rownames(matInput),function(x){any(grep("unproductive",x))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
158 # if(any(nonFuctionalSeqs)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
159 # if(sum(nonFuctionalSeqs)==length(germlines)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
160 # write.table("Unproductive",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
161 # q()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
162 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
163 # matInput <- matInput[-which(nonFuctionalSeqs),]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
164 # germlines <- germlines[-which(nonFuctionalSeqs)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
165 # germlines[1:length(germlines)] <- 1:length(germlines)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
166 # groups <- groups[-which(nonFuctionalSeqs)]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
167 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
168 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
169 # if(class(matInput)=="character"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
170 # write.table("All unproductive seqs",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
171 # q()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
172 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
173 #
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
174 # if(nrow(matInput)<10 | is.null(nrow(matInput))){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
175 # write.table(paste(nrow(matInput), "seqs only",sep=""),file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
176 # q()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
177 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
178
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
179 # replace leading & trailing "-" with "N:
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
180 matInput <- t(apply(matInput,1,replaceLeadingTrailingDashes,readEnd))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
181
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
182 # Trim (nucleotide) input sequences to the last codon
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
183 #matInput[,1] <- apply(matrix(matInput[,1]),1,trimToLastCodon)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
184
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
185 # # Check for Indels
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
186 # if(fixIndels){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
187 # delPos <- fixDeletions(matInput)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
188 # insPos <- fixInsertions(matInput)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
189 # }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
190 # # Check for indels
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
191 # indelPos <- checkForInDels(matInput)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
192 # indelPos <- apply(cbind(indelPos[[1]],indelPos[[2]]),1,function(x){(x[1]==T & x[2]==T)})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
193 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
194
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
195 # If indels are present, remove mutations in the seqeunce & throw warning at end
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
196 #matInput[indelPos,] <- apply(matrix(matInput[indelPos,],nrow=sum(indelPos),ncol=2),1,function(x){x[1]=x[2]; return(x) })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
197
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
198 colnames(matInput)=c("Input","Germline")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
199
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
200 # If seqeunces are clonal, create effective sequence for each clone & modify germline/group definitions
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
201 germlinesOriginal = NULL
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
202 if(clonal){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
203 germlinesOriginal <- germlines
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
204 collapseCloneResults <- tapply(1:nrow(matInput),germlines,function(i){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
205 collapseClone(matInput[i,1],matInput[i[1],2],readEnd,nonTerminalOnly=(clonal-1))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
206 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
207 matInput = t(sapply(collapseCloneResults,function(x){return(x[[1]])}))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
208 names_groups = tapply(groups,germlines,function(x){names(x[1])})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
209 groups = tapply(groups,germlines,function(x){array(x[1],dimnames=names(x[1]))})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
210 names(groups) = names_groups
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
211
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
212 names_germlines = tapply(germlines,germlines,function(x){names(x[1])})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
213 germlines = tapply( germlines,germlines,function(x){array(x[1],dimnames=names(x[1]))} )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
214 names(germlines) = names_germlines
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
215 matInputErrors = sapply(collapseCloneResults,function(x){return(x[[2]])})
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
216 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
217
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
218
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
219 # Selection Analysis
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
220
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
221
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
222 # if (length(germlines)>sequenceLimit) {
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
223 # # Code to parallelize processing goes here
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
224 # stop( paste("Error: Cannot process more than ", Upper_limit," sequences",sep="") )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
225 # }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
226
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
227 # if (length(germlines)<sequenceLimit) {}
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
228
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
229 # Compute expected mutation frequencies
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
230 matExpected <- getExpectedIndividual(matInput)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
231
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
232 # Count observed number of mutations in the different regions
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
233 mutations <- lapply( 1:nrow(matInput), function(i){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
234 #cat(i,"\n")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
235 seqI = s2c(matInput[i,1])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
236 seqG = s2c(matInput[i,2])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
237 matIGL = matrix(c(seqI,seqG),ncol=length(seqI),nrow=2,byrow=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
238 retVal <- NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
239 tryCatch(
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
240 retVal <- analyzeMutations2NucUri(matIGL)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
241 , error = function(ex){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
242 retVal <- NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
243 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
244 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
245
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
246
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
247 return( retVal )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
248 })
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
249
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
250 matObserved <- t(sapply( mutations, processNucMutations2 ))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
251 numberOfSeqsWithMutations <- numberOfSeqsWithMutations(matObserved, testID)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
252
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
253 #if(sum(numberOfSeqsWithMutations)==0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
254 # write.table("No mutated sequences",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
255 # q()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
256 #}
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
257
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
258 matMutationInfo <- cbind(matObserved,matExpected)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
259 rm(matObserved,matExpected)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
260
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
261
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
262 #Bayesian PDFs
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
263 bayes_pdf = computeBayesianScore(matMutationInfo, test=testName, max_sigma=20,length_sigma=4001)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
264 bayesPDF_cdr = bayes_pdf[[1]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
265 bayesPDF_fwr = bayes_pdf[[2]]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
266 rm(bayes_pdf)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
267
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
268 bayesPDF_germlines_cdr = tapply(bayesPDF_cdr,germlines,function(x) groupPosteriors(x,length_sigma=4001))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
269 bayesPDF_germlines_fwr = tapply(bayesPDF_fwr,germlines,function(x) groupPosteriors(x,length_sigma=4001))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
270
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
271 bayesPDF_groups_cdr = tapply(bayesPDF_cdr,groups,function(x) groupPosteriors(x,length_sigma=4001))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
272 bayesPDF_groups_fwr = tapply(bayesPDF_fwr,groups,function(x) groupPosteriors(x,length_sigma=4001))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
273
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
274 if(lenGroups>1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
275 groups <- c(groups,lenGroups+1)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
276 names(groups)[length(groups)] = "All sequences combined"
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
277 bayesPDF_groups_cdr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_cdr,length_sigma=4001)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
278 bayesPDF_groups_fwr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_fwr,length_sigma=4001)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
279 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
280
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
281 #Bayesian Outputs
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
282 bayes_cdr = t(sapply(bayesPDF_cdr,calcBayesOutputInfo))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
283 bayes_fwr = t(sapply(bayesPDF_fwr,calcBayesOutputInfo))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
284 bayes_germlines_cdr = t(sapply(bayesPDF_germlines_cdr,calcBayesOutputInfo))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
285 bayes_germlines_fwr = t(sapply(bayesPDF_germlines_fwr,calcBayesOutputInfo))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
286 bayes_groups_cdr = t(sapply(bayesPDF_groups_cdr,calcBayesOutputInfo))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
287 bayes_groups_fwr = t(sapply(bayesPDF_groups_fwr,calcBayesOutputInfo))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
288
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
289 #P-values
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
290 simgaP_cdr = sapply(bayesPDF_cdr,computeSigmaP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
291 simgaP_fwr = sapply(bayesPDF_fwr,computeSigmaP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
292
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
293 simgaP_germlines_cdr = sapply(bayesPDF_germlines_cdr,computeSigmaP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
294 simgaP_germlines_fwr = sapply(bayesPDF_germlines_fwr,computeSigmaP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
295
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
296 simgaP_groups_cdr = sapply(bayesPDF_groups_cdr,computeSigmaP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
297 simgaP_groups_fwr = sapply(bayesPDF_groups_fwr,computeSigmaP)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
298
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
299
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
300 #Format output
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
301
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
302 # Round expected mutation frequencies to 3 decimal places
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
303 matMutationInfo[germlinesOriginal[indelPos],] = NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
304 if(nrow(matMutationInfo)==1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
305 matMutationInfo[5:8] = round(matMutationInfo[,5:8]/sum(matMutationInfo[,5:8],na.rm=T),3)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
306 }else{
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
307 matMutationInfo[,5:8] = t(round(apply(matMutationInfo[,5:8],1,function(x){ return(x/sum(x,na.rm=T)) }),3))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
308 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
309
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
310 listPDFs = list()
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
311 nRows = length(unique(groups)) + length(unique(germlines)) + length(groups)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
312
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
313 matOutput = matrix(NA,ncol=18,nrow=nRows)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
314 rowNumb = 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
315 for(G in unique(groups)){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
316 #print(G)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
317 matOutput[rowNumb,c(1,2,11:18)] = c("Group",names(groups)[groups==G][1],bayes_groups_cdr[G,],bayes_groups_fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
318 listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
319 names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
320 #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
321 gs = unique(germlines[groups==G])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
322 rowNumb = rowNumb+1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
323 if( !is.na(gs) ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
324 for( g in gs ){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
325 matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
326 listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
327 names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
328 rowNumb = rowNumb+1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
329 indexesOfInterest = which(germlines==g)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
330 numbSeqsOfInterest = length(indexesOfInterest)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
331 rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1))
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
332 matOutput[rowNumb,] = matrix( c( rep("Sequence",numbSeqsOfInterest),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
333 rownames(matInput)[indexesOfInterest],
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
334 c(matMutationInfo[indexesOfInterest,1:4]),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
335 c(matMutationInfo[indexesOfInterest,5:8]),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
336 c(bayes_cdr[indexesOfInterest,]),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
337 c(bayes_fwr[indexesOfInterest,]),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
338 c(simgaP_cdr[indexesOfInterest]),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
339 c(simgaP_fwr[indexesOfInterest])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
340 ), ncol=18, nrow=numbSeqsOfInterest,byrow=F)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
341 increment=0
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
342 for( ioi in indexesOfInterest){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
343 listPDFs[[min(rowNumb)+increment]] = list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]])
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
344 names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi]
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
345 increment = increment + 1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
346 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
347 rowNumb=max(rowNumb)+1
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
348
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
349 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
350 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
351 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
352 colsToFormat = 11:18
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
353 matOutput[,colsToFormat] = formatC( matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) , digits=3)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
354 matOutput[matOutput== " NaN"] = NA
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
355
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
356
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
357
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
358 colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S",
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
359 "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S",
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
360 paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"),
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
361 paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
362 )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
363 fileName = paste(outputPath,outputID,".txt",sep="")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
364 write.table(matOutput,file=fileName,quote=F,sep="\t",row.names=T,col.names=NA)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
365 fileName = paste(outputPath,outputID,".RData",sep="")
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
366 save(listPDFs,file=fileName)
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
367
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
368 indelWarning = FALSE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
369 if(sum(indelPos)>0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
370 indelWarning = "<P>Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis.";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
371 indelWarning = paste( indelWarning , "<UL>", sep="" )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
372 for(indels in names(indelPos)[indelPos]){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
373 indelWarning = paste( indelWarning , "<LI>", indels, "</LI>", sep="" )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
374 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
375 indelWarning = paste( indelWarning , "</UL></P>", sep="" )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
376 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
377
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
378 cloneWarning = FALSE
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
379 if(clonal==1){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
380 if(sum(matInputErrors)>0){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
381 cloneWarning = "<P>Warning: The following clones have sequences of unequal length.";
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
382 cloneWarning = paste( cloneWarning , "<UL>", sep="" )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
383 for(clone in names(matInputErrors)[matInputErrors]){
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
384 cloneWarning = paste( cloneWarning , "<LI>", names(germlines)[as.numeric(clone)], "</LI>", sep="" )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
385 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
386 cloneWarning = paste( cloneWarning , "</UL></P>", sep="" )
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
387 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
388 }
d685e7ba0ed4 Uploaded
davidvanzessen
parents:
diff changeset
389 cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|"))