comparison QDNAseq.R @ 28:40ae95ad9d8c draft

Uploaded
author stef
date Wed, 18 Jun 2014 05:29:59 -0400
parents 2efda0ed7ccf
children 4c4e4d88779c
comparison
equal deleted inserted replaced
27:47c3ecce5544 28:40ae95ad9d8c
5 ## -------------------- 5 ## --------------------
6 catMsg <- function( msg=c() ){ 6 catMsg <- function( msg=c() ){
7 cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='') 7 cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='')
8 } 8 }
9 ## -------------------- 9 ## --------------------
10 ## return the directory this script exists 10 ## return the location of this script
11 ## -------------------- 11 ## --------------------
12 getScriptPath <- function(){ 12 getScriptPath <- function(){
13 cmd.args <- commandArgs() 13 cmd.args <- commandArgs()
14 m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE) 14 m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE)
15 script.dir <- dirname(regmatches(cmd.args, m)) 15 script.dir <- dirname(regmatches(cmd.args, m))
16 if(length(script.dir) == 0) stop("[ERR] Can't determine script dir: please call the script with Rscript\n") 16 if(length(script.dir) == 0) stop("[ERR] Can't determine script dir: please call the script with Rscript\n")
17 if(length(script.dir) > 1) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n") 17 if(length(script.dir) > 1) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n")
18 return(script.dir) 18 return(script.dir)
19 } 19 }
20 ## -------------------- 20 ## --------------------
21 ## Some html helper functions 21 ## Some html creation functions
22 ## -------------------- 22 ## --------------------
23 htmlTableRow <- function( string_array=c() ){ 23 htmlTableRow <- function( string_array=c() ){
24 td_cells <- '' 24 td_cells <- ''
25 for ( i in string_array ){ 25 for ( i in string_array ){
26 td_cells <- paste( td_cells, '<td>', i, '</td>', sep='' ) 26 td_cells <- paste( td_cells, '<td>', i, '</td>', sep='' )
29 } 29 }
30 htmlLink <- function( path, desc="LINK" ){ 30 htmlLink <- function( path, desc="LINK" ){
31 return( paste( '<a href="', path, '">', desc, "</a>", sep='') ) 31 return( paste( '<a href="', path, '">', desc, "</a>", sep='') )
32 } 32 }
33 ## -------------------- 33 ## --------------------
34 ## constructs a list with bam file info 34 ## constructs a list with input bam file info
35 ## -------------------- 35 ## --------------------
36 makeBamFileList <- function( paths, names ){ 36 makeBamFileList <- function( paths, names ){
37 tmp <- list() 37 tmp <- list()
38 l1 <- length(paths) 38 l1 <- length(paths)
39 l2 <- length(names) 39 l2 <- length(names)
50 } 50 }
51 return( tmp ) 51 return( tmp )
52 } 52 }
53 53
54 ## -------------------- 54 ## --------------------
55 ## took a function from Matias/MarkVdWuel? for extracting the regions by call 55 ## copied code for extracting the regions by segment call status
56 ## -------------------- 56 ## --------------------
57 fuse.regions_test <- function(cgh, onlyCalled=T) { 57 fuse.regions_test <- function(cgh, onlyCalled=T) {
58 if (ncol(cgh) > 1) stop('Please specify which sample...') 58 if (ncol(cgh) > 1) stop('Please specify which sample...')
59 59
60 x <- data.frame(cgh@featureData@data[,1:3], calls(cgh), copynumber(cgh), segmented(cgh), check.names=FALSE, stringsAsFactors=FALSE) 60 x <- data.frame(cgh@featureData@data[,1:3], calls(cgh), copynumber(cgh), segmented(cgh), check.names=FALSE, stringsAsFactors=FALSE)
148 outFiles[[sample]] <- c( outBedFile, outBedgraphFile ) 148 outFiles[[sample]] <- c( outBedFile, outBedgraphFile )
149 } 149 }
150 outFiles 150 outFiles
151 } 151 }
152 152
153 printIgvFile <- function( dat, filename ){ 153 #printIgvFile <- function( dat, filename ){
154 154 #
155 if ( 'calls' %in% assayDataElementNames(dat) ) { 155 # if ( 'calls' %in% assayDataElementNames(dat) ) {
156 #output <- paste(output, '-called.igv', sep="") 156 # #output <- paste(output, '-called.igv', sep="")
157 cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) 157 # cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename)
158 df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), calls(dat), check.names=FALSE, stringsAsFactors=FALSE) 158 # df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), calls(dat), check.names=FALSE, stringsAsFactors=FALSE)
159 }else { 159 # }else {
160 #output <- paste(output, '-normalized.igv', sep="") 160 # #output <- paste(output, '-normalized.igv', sep="")
161 cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) 161 # cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename)
162 df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), round(copynumber(dat), digits=2), check.names=FALSE, stringsAsFactors=FALSE) 162 # df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), round(copynumber(dat), digits=2), check.names=FALSE, stringsAsFactors=FALSE)
163 } 163 # }
164 164 #
165 df$chromosome[df$chromosome == '23'] <- 'X' 165 # df$chromosome[df$chromosome == '23'] <- 'X'
166 df$chromosome[df$chromosome == '24'] <- 'Y' 166 # df$chromosome[df$chromosome == '24'] <- 'Y'
167 df$chromosome[df$chromosome == '25'] <- 'MT' 167 # df$chromosome[df$chromosome == '25'] <- 'MT'
168 #return( df ) 168 # #return( df )
169 write.table( df, file=filename, append=TRUE, quote=FALSE, sep='\t', row.names=FALSE ) 169 # write.table( df, file=filename, append=TRUE, quote=FALSE, sep='\t', row.names=FALSE )
170 } 170 #}
171 171
172 172
173 ## ================================================== 173 ## ==================================================
174 ## Start of analysis 174 ## Start of analysis
175 ## ================================================== 175 ## ==================================================
209 #catMsg( instal_path ) 209 #catMsg( instal_path )
210 ## /DEBUG 210 ## /DEBUG
211 ## ------------------------ 211 ## ------------------------
212 212
213 ## setup bam filelist for easy retrieval later 213 ## setup bam filelist for easy retrieval later
214 catMsg( "Setting up input bam list" ) 214 #catMsg( "Setting up input bam list" )
215 cat( bamsPaths, "\n" ) 215 #cat( bamsPaths, "\n" )
216 catMsg( "Namews" ) 216 #catMsg( "Namews" )
217 cat( bamsNames, "\n" ) 217 #cat( bamsNames, "\n" )
218 fileList <- makeBamFileList( bamsPaths, bamsNames ) 218 fileList <- makeBamFileList( bamsPaths, bamsNames )
219 bamCount <- length( fileList[[ 'all_paths' ]] ) 219 bamCount <- length( fileList[[ 'all_paths' ]] )
220 220
221 ## help msg still needs work! 221 ## help msg still needs work!
222 if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){ 222 #if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){
223 cat( paste( MAIN_NAME, "Usage: ", params_help, sep=''), "\n" ) 223 # cat( paste( MAIN_NAME, "Usage: ", params_help, sep=''), "\n" )
224 quit( save = 'no', status=0, runLast=F ) 224 # quit( save = 'no', status=0, runLast=F )
225 } 225 #}
226 226
227 if ( !file.exists( outputPath) ){ 227 if ( !file.exists( outputPath) ){
228 dir.create( outputPath ) 228 dir.create( outputPath )
229 } 229 }
230 230
278 readCountsFiltered <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs ) 278 readCountsFiltered <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs )
279 readCountsFiltered <- estimateCorrection( readCountsFiltered ) 279 readCountsFiltered <- estimateCorrection( readCountsFiltered )
280 copyNumbers <- correctBins( readCountsFiltered ) 280 copyNumbers <- correctBins( readCountsFiltered )
281 copyNumbersNormalized <- normalizeBins( copyNumbers ) 281 copyNumbersNormalized <- normalizeBins( copyNumbers )
282 copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized ) 282 copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized )
283 sampleNames <- readCountsFiltered@phenoData@data$name
283 284
284 ## save objects to output dir 285 ## save objects to output dir
285 saveRDS( readCounts, robjReadCoPath ); 286 saveRDS( readCounts, robjReadCoPath );
286 saveRDS( copyNumbersSmooth, robjCopyNrPath ); 287 saveRDS( copyNumbersSmooth, robjCopyNrPath );s
287 #printIgvFile( copyNumbersSmooth, igvCopyNrName ) 288 exportBins(copyNumbersSmooth, file=igvCopyNrName, format="igv")
288 #exportBins(copyNumbersSmooth, file=igvCopyNrName, format="igv")
289 289
290 ## also save objects for galaxy history output if requested 290 ## also save objects for galaxy history output if requested
291 if ( doOutputReadcountsRds ){ 291 if ( doOutputReadcountsRds ){
292 saveRDS( readCountsFiltered, readCountsDatasetFile ); 292 saveRDS( readCountsFiltered, readCountsDatasetFile );
293 } 293 }
303 cgh <- makeCgh( copyNumbersCalled ) 303 cgh <- makeCgh( copyNumbersCalled )
304 saveRDS( copyNumbersCalled, robjCalledPath ); 304 saveRDS( copyNumbersCalled, robjCalledPath );
305 if ( doOutputCallsRds ){ 305 if ( doOutputCallsRds ){
306 saveRDS( copyNumbersCalled, calledSegmentsDatasetFile ); 306 saveRDS( copyNumbersCalled, calledSegmentsDatasetFile );
307 } 307 }
308 #df <- getIGVdf( copyNumbersCalled ) 308 exportBins( copyNumbersCalled, file=igvCalledName, format="igv")
309 #printIgvFile( copyNumbersCalled, igvCalledName ) 309 }
310 #write.table( df, file=igvCalledName, append=TRUE, quote=FALSE, sep='\t', row.names=FALSE ) 310
311 } 311
312
313 sampleNames <- readCountsFiltered@phenoData@data$name
314 312
315 ## ------------------------ 313 ## ------------------------
316 ## create output files 314 ## create output files
317 ## ------------------------ 315 ## ------------------------
318 plotted_images <- list() # to keep track of images for later linking 316 plotted_images <- list() # to keep track of images for later linking