|
0
|
1 # Load ExomeDepth library (without warnings)
|
|
|
2 suppressMessages(library(cn.mops))
|
|
|
3
|
|
|
4 # Import parameters from xml wrapper (args_file)
|
|
|
5 args <- commandArgs(trailingOnly=TRUE)
|
|
|
6 param <- read.table(args[1],sep="=", as.is=TRUE)
|
|
|
7
|
|
|
8 # Set common parameters
|
|
|
9 target <- read.table(param[match("target",param[,1]),2], sep="\t", as.is=TRUE)
|
|
|
10 padding <- as.numeric(param[match("padding",param[,1]),2])
|
|
|
11 mapping_mode <- param[match("mapping_mode",param[,1]),2]
|
|
|
12 output <- param[match("output",param[,1]),2]
|
|
|
13
|
|
|
14 # Set advanced parameters
|
|
|
15 advanced_mode <- ifelse("advanced_mode" %in% param[,1], TRUE, FALSE)
|
|
|
16 if (advanced_mode){
|
|
|
17 prior_impact <- as.integer(param[match("prior_impact",param[,1]),2])
|
|
|
18 cyc <- as.integer(param[match("cyc",param[,1]),2])
|
|
|
19 norm_type <- param[match("norm_type",param[,1]),2]
|
|
|
20 norm <- as.logical(param[match("norm",param[,1]),2])
|
|
|
21 upper_threshold <- as.numeric(param[match("upper_threshold",param[,1]),2])
|
|
|
22 lower_threshold <- as.numeric(param[match("lower_threshold",param[,1]),2])
|
|
|
23 min_width <- as.integer(param[match("min_width",param[,1]),2])
|
|
|
24 seq_alg <- param[match("seq_alg",param[,1]),2]
|
|
|
25 min_read_count <- as.integer(param[match("min_read_count",param[,1]),2])
|
|
|
26 use_median <- as.logical(param[match("use_median",param[,1]),2])
|
|
|
27 }
|
|
|
28
|
|
|
29 # Create symbolic links for multiple bam and bai
|
|
|
30 bam <- param[param[,1]=="bam",2]
|
|
|
31 bam_bai <- param[param[,1]=="bam_bai",2]
|
|
|
32 bam_label <- param[param[,1]=="bam_label",2]
|
|
|
33 bam_label <- gsub(" ", "_", bam_label)
|
|
|
34
|
|
|
35 for(i in 1:length(bam)){
|
|
|
36 stopifnot(file.symlink(bam[i], paste(bam_label[i], "bam", sep=".")))
|
|
|
37 stopifnot(file.symlink(bam_bai[i], paste(bam_label[i], "bam.bai", sep=".")))
|
|
|
38 }
|
|
|
39
|
|
|
40 # Create genomic ranges
|
|
|
41 gr <- GRanges(target[,1],IRanges(target[,2]-padding,target[,3]+padding))
|
|
|
42 # Merge overlapping segments (make sense if padding != 0)
|
|
|
43 gr <- reduce(gr)
|
|
|
44
|
|
|
45
|
|
|
46 # Get read counts
|
|
|
47 BAMFiles <- paste(bam_label, "bam", sep=".")
|
|
|
48 X <- suppressMessages(getSegmentReadCountsFromBAM(BAMFiles,GR=gr,
|
|
|
49 mode=mapping_mode,
|
|
|
50 sampleNames=bam_label))
|
|
|
51
|
|
|
52 if (advanced_mode){
|
|
|
53 resCNMOPS <- suppressMessages(exomecn.mops(X,
|
|
|
54 priorImpact=prior_impact,
|
|
|
55 cyc=cyc,
|
|
|
56 normType=norm_type,
|
|
|
57 norm=norm,
|
|
|
58 upperThreshold=upper_threshold,
|
|
|
59 lowerThreshold=lower_threshold,
|
|
|
60 minWidth=min_width,
|
|
|
61 seqAlgorithm=seq_alg,
|
|
|
62 minReadCount=min_read_count,
|
|
|
63 useMedian=use_median))
|
|
|
64 }else{
|
|
|
65 resCNMOPS <- suppressMessages(exomecn.mops(X))
|
|
|
66 }
|
|
|
67
|
|
|
68 resCNMOPS <- calcIntegerCopyNumbers(resCNMOPS)
|
|
|
69
|
|
|
70 # Extract individual CNV calls
|
|
|
71 # Legend for CN values is as follows:
|
|
|
72 # The copy number classes default to CN0, CN1, CN2, CN3, .., CN8.
|
|
|
73 # CN2 is the normal copy number for diploid samples.
|
|
|
74 # CN1 is a heterozygous deletion and
|
|
|
75 # CN0 is a homozygous deletion.
|
|
|
76 # CN3 thru CN8 are amplifications.
|
|
|
77 # For non-tumor samples the highest copy number class is 8 - higher copy numbers have not been reported.
|
|
|
78 # CN8 is expected to have 4 times as many reads (for times as high coverage) as CN2.
|
|
|
79 # For tumor samples very high copy numbers have been observed (e.g. CN64),
|
|
|
80 # therefore the parameters of cn.mops have to be adjusted to allow for high copy numbers.
|
|
|
81 # A way to set the parameters is given in https://gist.github.com/gklambauer/8955203
|
|
|
82 res <- cnvs(resCNMOPS)
|
|
|
83
|
|
|
84 # Convert results to data.frame
|
|
|
85 df <- data.frame(chr=as.character(seqnames(res)),
|
|
|
86 starts=start(res),
|
|
|
87 #ranges(res),
|
|
|
88 ends=end(res),
|
|
|
89 as.data.frame(elementMetadata(res)))
|
|
|
90
|
|
|
91 # Remove CN2 (= normal copy number), if any
|
|
|
92 df <- df[df$CN != "CN2",]
|
|
|
93
|
|
|
94 # Write results
|
|
|
95 write.table(df, sep='\t', quote=FALSE, file = output,
|
|
|
96 row.names = FALSE, dec=".")
|