Mercurial > repos > stef > qdnaseq
comparison QDNAseq.R @ 28:40ae95ad9d8c draft
Uploaded
author | stef |
---|---|
date | Wed, 18 Jun 2014 05:29:59 -0400 |
parents | 2efda0ed7ccf |
children | 4c4e4d88779c |
comparison
equal
deleted
inserted
replaced
27:47c3ecce5544 | 28:40ae95ad9d8c |
---|---|
5 ## -------------------- | 5 ## -------------------- |
6 catMsg <- function( msg=c() ){ | 6 catMsg <- function( msg=c() ){ |
7 cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='') | 7 cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='') |
8 } | 8 } |
9 ## -------------------- | 9 ## -------------------- |
10 ## return the directory this script exists | 10 ## return the location of this script |
11 ## -------------------- | 11 ## -------------------- |
12 getScriptPath <- function(){ | 12 getScriptPath <- function(){ |
13 cmd.args <- commandArgs() | 13 cmd.args <- commandArgs() |
14 m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE) | 14 m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE) |
15 script.dir <- dirname(regmatches(cmd.args, m)) | 15 script.dir <- dirname(regmatches(cmd.args, m)) |
16 if(length(script.dir) == 0) stop("[ERR] Can't determine script dir: please call the script with Rscript\n") | 16 if(length(script.dir) == 0) stop("[ERR] Can't determine script dir: please call the script with Rscript\n") |
17 if(length(script.dir) > 1) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n") | 17 if(length(script.dir) > 1) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n") |
18 return(script.dir) | 18 return(script.dir) |
19 } | 19 } |
20 ## -------------------- | 20 ## -------------------- |
21 ## Some html helper functions | 21 ## Some html creation functions |
22 ## -------------------- | 22 ## -------------------- |
23 htmlTableRow <- function( string_array=c() ){ | 23 htmlTableRow <- function( string_array=c() ){ |
24 td_cells <- '' | 24 td_cells <- '' |
25 for ( i in string_array ){ | 25 for ( i in string_array ){ |
26 td_cells <- paste( td_cells, '<td>', i, '</td>', sep='' ) | 26 td_cells <- paste( td_cells, '<td>', i, '</td>', sep='' ) |
29 } | 29 } |
30 htmlLink <- function( path, desc="LINK" ){ | 30 htmlLink <- function( path, desc="LINK" ){ |
31 return( paste( '<a href="', path, '">', desc, "</a>", sep='') ) | 31 return( paste( '<a href="', path, '">', desc, "</a>", sep='') ) |
32 } | 32 } |
33 ## -------------------- | 33 ## -------------------- |
34 ## constructs a list with bam file info | 34 ## constructs a list with input bam file info |
35 ## -------------------- | 35 ## -------------------- |
36 makeBamFileList <- function( paths, names ){ | 36 makeBamFileList <- function( paths, names ){ |
37 tmp <- list() | 37 tmp <- list() |
38 l1 <- length(paths) | 38 l1 <- length(paths) |
39 l2 <- length(names) | 39 l2 <- length(names) |
50 } | 50 } |
51 return( tmp ) | 51 return( tmp ) |
52 } | 52 } |
53 | 53 |
54 ## -------------------- | 54 ## -------------------- |
55 ## took a function from Matias/MarkVdWuel? for extracting the regions by call | 55 ## copied code for extracting the regions by segment call status |
56 ## -------------------- | 56 ## -------------------- |
57 fuse.regions_test <- function(cgh, onlyCalled=T) { | 57 fuse.regions_test <- function(cgh, onlyCalled=T) { |
58 if (ncol(cgh) > 1) stop('Please specify which sample...') | 58 if (ncol(cgh) > 1) stop('Please specify which sample...') |
59 | 59 |
60 x <- data.frame(cgh@featureData@data[,1:3], calls(cgh), copynumber(cgh), segmented(cgh), check.names=FALSE, stringsAsFactors=FALSE) | 60 x <- data.frame(cgh@featureData@data[,1:3], calls(cgh), copynumber(cgh), segmented(cgh), check.names=FALSE, stringsAsFactors=FALSE) |
148 outFiles[[sample]] <- c( outBedFile, outBedgraphFile ) | 148 outFiles[[sample]] <- c( outBedFile, outBedgraphFile ) |
149 } | 149 } |
150 outFiles | 150 outFiles |
151 } | 151 } |
152 | 152 |
153 printIgvFile <- function( dat, filename ){ | 153 #printIgvFile <- function( dat, filename ){ |
154 | 154 # |
155 if ( 'calls' %in% assayDataElementNames(dat) ) { | 155 # if ( 'calls' %in% assayDataElementNames(dat) ) { |
156 #output <- paste(output, '-called.igv', sep="") | 156 # #output <- paste(output, '-called.igv', sep="") |
157 cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) | 157 # cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) |
158 df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), calls(dat), check.names=FALSE, stringsAsFactors=FALSE) | 158 # df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), calls(dat), check.names=FALSE, stringsAsFactors=FALSE) |
159 }else { | 159 # }else { |
160 #output <- paste(output, '-normalized.igv', sep="") | 160 # #output <- paste(output, '-normalized.igv', sep="") |
161 cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) | 161 # cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) |
162 df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), round(copynumber(dat), digits=2), check.names=FALSE, stringsAsFactors=FALSE) | 162 # df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), round(copynumber(dat), digits=2), check.names=FALSE, stringsAsFactors=FALSE) |
163 } | 163 # } |
164 | 164 # |
165 df$chromosome[df$chromosome == '23'] <- 'X' | 165 # df$chromosome[df$chromosome == '23'] <- 'X' |
166 df$chromosome[df$chromosome == '24'] <- 'Y' | 166 # df$chromosome[df$chromosome == '24'] <- 'Y' |
167 df$chromosome[df$chromosome == '25'] <- 'MT' | 167 # df$chromosome[df$chromosome == '25'] <- 'MT' |
168 #return( df ) | 168 # #return( df ) |
169 write.table( df, file=filename, append=TRUE, quote=FALSE, sep='\t', row.names=FALSE ) | 169 # write.table( df, file=filename, append=TRUE, quote=FALSE, sep='\t', row.names=FALSE ) |
170 } | 170 #} |
171 | 171 |
172 | 172 |
173 ## ================================================== | 173 ## ================================================== |
174 ## Start of analysis | 174 ## Start of analysis |
175 ## ================================================== | 175 ## ================================================== |
209 #catMsg( instal_path ) | 209 #catMsg( instal_path ) |
210 ## /DEBUG | 210 ## /DEBUG |
211 ## ------------------------ | 211 ## ------------------------ |
212 | 212 |
213 ## setup bam filelist for easy retrieval later | 213 ## setup bam filelist for easy retrieval later |
214 catMsg( "Setting up input bam list" ) | 214 #catMsg( "Setting up input bam list" ) |
215 cat( bamsPaths, "\n" ) | 215 #cat( bamsPaths, "\n" ) |
216 catMsg( "Namews" ) | 216 #catMsg( "Namews" ) |
217 cat( bamsNames, "\n" ) | 217 #cat( bamsNames, "\n" ) |
218 fileList <- makeBamFileList( bamsPaths, bamsNames ) | 218 fileList <- makeBamFileList( bamsPaths, bamsNames ) |
219 bamCount <- length( fileList[[ 'all_paths' ]] ) | 219 bamCount <- length( fileList[[ 'all_paths' ]] ) |
220 | 220 |
221 ## help msg still needs work! | 221 ## help msg still needs work! |
222 if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){ | 222 #if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){ |
223 cat( paste( MAIN_NAME, "Usage: ", params_help, sep=''), "\n" ) | 223 # cat( paste( MAIN_NAME, "Usage: ", params_help, sep=''), "\n" ) |
224 quit( save = 'no', status=0, runLast=F ) | 224 # quit( save = 'no', status=0, runLast=F ) |
225 } | 225 #} |
226 | 226 |
227 if ( !file.exists( outputPath) ){ | 227 if ( !file.exists( outputPath) ){ |
228 dir.create( outputPath ) | 228 dir.create( outputPath ) |
229 } | 229 } |
230 | 230 |
278 readCountsFiltered <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs ) | 278 readCountsFiltered <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs ) |
279 readCountsFiltered <- estimateCorrection( readCountsFiltered ) | 279 readCountsFiltered <- estimateCorrection( readCountsFiltered ) |
280 copyNumbers <- correctBins( readCountsFiltered ) | 280 copyNumbers <- correctBins( readCountsFiltered ) |
281 copyNumbersNormalized <- normalizeBins( copyNumbers ) | 281 copyNumbersNormalized <- normalizeBins( copyNumbers ) |
282 copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized ) | 282 copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized ) |
283 sampleNames <- readCountsFiltered@phenoData@data$name | |
283 | 284 |
284 ## save objects to output dir | 285 ## save objects to output dir |
285 saveRDS( readCounts, robjReadCoPath ); | 286 saveRDS( readCounts, robjReadCoPath ); |
286 saveRDS( copyNumbersSmooth, robjCopyNrPath ); | 287 saveRDS( copyNumbersSmooth, robjCopyNrPath );s |
287 #printIgvFile( copyNumbersSmooth, igvCopyNrName ) | 288 exportBins(copyNumbersSmooth, file=igvCopyNrName, format="igv") |
288 #exportBins(copyNumbersSmooth, file=igvCopyNrName, format="igv") | |
289 | 289 |
290 ## also save objects for galaxy history output if requested | 290 ## also save objects for galaxy history output if requested |
291 if ( doOutputReadcountsRds ){ | 291 if ( doOutputReadcountsRds ){ |
292 saveRDS( readCountsFiltered, readCountsDatasetFile ); | 292 saveRDS( readCountsFiltered, readCountsDatasetFile ); |
293 } | 293 } |
303 cgh <- makeCgh( copyNumbersCalled ) | 303 cgh <- makeCgh( copyNumbersCalled ) |
304 saveRDS( copyNumbersCalled, robjCalledPath ); | 304 saveRDS( copyNumbersCalled, robjCalledPath ); |
305 if ( doOutputCallsRds ){ | 305 if ( doOutputCallsRds ){ |
306 saveRDS( copyNumbersCalled, calledSegmentsDatasetFile ); | 306 saveRDS( copyNumbersCalled, calledSegmentsDatasetFile ); |
307 } | 307 } |
308 #df <- getIGVdf( copyNumbersCalled ) | 308 exportBins( copyNumbersCalled, file=igvCalledName, format="igv") |
309 #printIgvFile( copyNumbersCalled, igvCalledName ) | 309 } |
310 #write.table( df, file=igvCalledName, append=TRUE, quote=FALSE, sep='\t', row.names=FALSE ) | 310 |
311 } | 311 |
312 | |
313 sampleNames <- readCountsFiltered@phenoData@data$name | |
314 | 312 |
315 ## ------------------------ | 313 ## ------------------------ |
316 ## create output files | 314 ## create output files |
317 ## ------------------------ | 315 ## ------------------------ |
318 plotted_images <- list() # to keep track of images for later linking | 316 plotted_images <- list() # to keep track of images for later linking |