# HG changeset patch # User morinlab # Date 1476210963 14400 # Node ID 2f57a1c16d15546de52c0d3d2c369090290e5f96 planemo upload for repository https://github.com/morinlab/tools-morinlab/tree/master/tools/titan commit 4ef2d91b7c1686a2696b92fe538d4aec51d05e40-dirty diff -r 000000000000 -r 2f57a1c16d15 citations.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/citations.xml Tue Oct 11 14:36:03 2016 -0400 @@ -0,0 +1,153 @@ + + + + @unpublished{ + albuquerque2016galaxy, + author = "Marco Albuquerque and Bruno Grande and Elie Ritch and Martin Krzywinski and Prasath Pararajalingam and Selin Jessa and Paul Boutros and Sohrab Shah and Ryan Morin", + title = "A Suite of Galaxy Tools for Cancer Mutational Analysis", + note = "Unpublished Manuscript", + year = "2016" + } + + + + + @article{ + Lai20062016, + title = {VarDict: a novel and versatile variant caller for next-generation sequencing in cancer research}, + author = {Lai, Zhongwu and Markovets, Aleksandra and Ahdesmaki, Miika and Chapman, Brad and Hofmann, Oliver and McEwen, Robert and Johnson, Justin and Dougherty, Brian and Barrett, J. Carl and Dry, Jonathan R.}, + journal = {Nucleic Acids Research} + volume = {44}, + number = {11}, + pages = {e108}, + year = {2016}, + doi = {10.1093/nar/gkw227} + } + + + + + @article{ + Larson01022012, + author = {Larson, David E. and Harris, Christopher C. and Chen, Ken and Koboldt, Daniel C. and Abbott, Travis E. and Dooling, David J. and Ley, Timothy J. and Mardis, Elaine R. and Wilson, Richard K. and Ding, Li}, + title = {SomaticSniper: identification of somatic point mutations in whole genome sequencing data}, + volume = {28}, + number = {3}, + pages = {311-317}, + year = {2012}, + doi = {10.1093/bioinformatics/btr665}, + journal = {Bioinformatics} + } + + + + + @ARTICLE{Goecks2010-ra, + title = "Galaxy: a comprehensive approach for supporting accessible, + reproducible, and transparent computational research in the life + sciences", + author = "Goecks, Jeremy and Nekrutenko, Anton and Taylor, James and + {Galaxy Team}", + journal = "Genome Biol.", + volume = 11, + number = 8, + pages = "R86", + month = "25~", + year = 2010 + } + + + + + + @ARTICLE{Rausch2012-yi, + title = "{DELLY}: structural variant discovery by integrated paired-end + and split-read analysis", + author = "Rausch, Tobias and Zichner, Thomas and Schlattl, Andreas and + St{\"{u}}tz, Adrian M and Benes, Vladimir and Korbel, Jan O", + journal = "Bioinformatics", + volume = 28, + number = 18, + pages = "i333--i339", + month = "15~", + year = 2012 +} + + + + +@ARTICLE{Ding2012-jq, + title = "Feature-based classifiers for somatic mutation detection in + tumour-normal paired sequencing data", + author = "Ding, Jiarui and Bashashati, Ali and Roth, Andrew and Oloumi, + Arusha and Tse, Kane and Zeng, Thomas and Haffari, Gholamreza and + Hirst, Martin and Marra, Marco A and Condon, Anne and Aparicio, + Samuel and Shah, Sohrab P", + journal = "Bioinformatics", + volume = 28, + number = 2, + pages = "167--175", + month = "15~" # jan, + year = 2012 +} + + + + + + @ARTICLE{Saunders2012-nh, + title = "Strelka: accurate somatic small-variant calling from sequenced + tumor-normal sample pairs", + author = "Saunders, Christopher T and Wong, Wendy S W and Swamy, Sajani and + Becq, Jennifer and Murray, Lisa J and Cheetham, R Keira", + journal = "Bioinformatics", + volume = 28, + number = 14, + pages = "1811--1817", + month = "15~" # jul, + year = 2012 + } + + + + + @article{ + Radenbaugh2014-tj, + title={RADIA: RNA and DNA integrated analysis for somatic + mutation detection}, + author={Radenbaugh, Amie J and Ma, Singer and Ewing, Adam and Stuart, + Joshua M and Collisson, Eric A and Zhu, Jingchun and Haussler, + David}, + journal={PLoS One}, + volume={9}, + number={11}, + pages={e111516}, + year={2014}, + publisher={PLoS} + } + + + + + @ARTICLE{Ha2014-pu, + title = "{TITAN}: inference of copy number architectures in clonal cell + populations from tumor whole-genome sequence data", + author = "Ha, Gavin and Roth, Andrew and Khattra, Jaswinder and Ho, + Julie and Yap, Damian and Prentice, Leah M and Melnyk, + Nataliya and McPherson, Andrew and Bashashati, Ali and Laks, + Emma and Biele, Justina and Ding, Jiarui and Le, Alan and + Rosner, Jamie and Shumansky, Karey and Marra, Marco A and + Gilks, C Blake and Huntsman, David G and McAlpine, Jessica N + and Aparicio, Samuel and Shah, Sohrab P", + journal = "Genome Res.", + publisher = "Cold Spring Harbor Lab", + volume = 24, + number = 11, + pages = "1881--1893", + month = "1~" # nov, + year = 2014, + keywords = "computational method" +} + + + diff -r 000000000000 -r 2f57a1c16d15 findOptimal.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/findOptimal.sh Tue Oct 11 14:36:03 2016 -0400 @@ -0,0 +1,9 @@ +val=1; +index=0; +for i in $(eval echo "{$1..$2}"); + do newval=$( grep ".*S_Dbw validity index (Both).*" ./parameters/samp${i}.txt | cut -f2); + if [ $(echo "$val > $newval" | bc) -eq 1 ]; + then val=$newval; index=$i; + fi; +done; +echo -e "$val\t$index" diff -r 000000000000 -r 2f57a1c16d15 titan.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/titan.R Tue Oct 11 14:36:03 2016 -0400 @@ -0,0 +1,112 @@ +library(TitanCNA) + +version <- '0.1.3' + +args <- commandArgs(TRUE) + +id <- args[1] +tc_het_file <- args[2] +cnfile <- args[3] +map <- args[4] +numClusters <- as.numeric(args[5]) +numCores <- as.numeric(args[6]) +ploidy <- as.numeric(args[7]) +outfile <- args[8] +outparam <- args[9] +myskew <- as.numeric(args[10]) +boolEstPloidy <- args[11] +n_zero <- as.numeric(args[12]) +normEstMeth <- args[13] +maxI <- as.numeric(args[14]) +pseudo_counts = as.numeric(args[15]) +txn_exp_len = as.numeric(args[16]) +txn_z_strength = as.numeric(args[17]) +alphaK <- as.numeric(args[18]) #prior for events; default: 15000 +alphaHigh <- as.numeric(args[19]) #prior for extreme events; default: 15000 +maxCN <- as.numeric(args[20]) #maximum number of copies to use +sym <- args[21] +outobj <- args[22] +genometype <- args[23] +chrom <- args[24] +yThreshold <- as.numeric(args[25]) +chrom <- eval(parse(text=chrom)) + +message('Running TITAN...') + +#### LOAD DATA #### +data <- loadAlleleCounts(tc_het_file, symmetric=sym, genomeStyle=genometype) + +#### LOAD PARAMETERS #### +message('titan: Loading default parameters') +params <- loadDefaultParameters(copyNumber=maxCN,numberClonalClusters=numClusters, skew=myskew, symmetric=sym) +params$ploidyParams$phi_0 <- ploidy +params$normalParams$n_0 <- n_zero + +# #### GC AND MAPPABILITY CORRECTION #### +message('titan: Reading GC content and mappability corrected read counts ...') +cnData <- read.delim(cnfile,header=TRUE,stringsAsFactors=FALSE,sep="\t") + +#### READ COPY NUMBER FROM HMMCOPY FILE #### +message('titan: Extracting read depth...') + +logR <- getPositionOverlap(data$chr,data$posn,cnData) +data$logR <- log(2^logR) +rm(logR,cnData) + +#### FILTER DATA FOR DEPTH, MAPPABILITY, NA, etc #### +mScore <- as.data.frame(wigToRangedData(map)) +mScore <- getPositionOverlap(data$chr,data$posn,mScore[,-4]) + +#### Check if Chromosomes Have been provided + +if (is.null(chrom)) { +chrom <- unique(sort(data$chr)) +} + +# check if sample is Female or number of datapoints is very small. +if (NROW(filterData(data,c('Y'),minDepth=10,maxDepth=200,map=mScore,mapThres=0.8)) > yThreshold){ +data <- filterData(data,chrom,minDepth=10,maxDepth=200,map=mScore,mapThres=0.8) +} else { +data <- filterData(data,chrom[which(chrom!='Y')],minDepth=10,maxDepth=200,map=mScore,mapThres=0.8) +} + +#### MODEL SELECTION USING EM (FWD-BACK) TO SELECT NUMBER OF CLUSTERS #### +library(doMC) +registerDoMC(cores=numCores) + + + +##### RUN USING EM ALGORITHM ###### +K <- length(params$genotypeParams$rt) +params$genotypeParams$alphaKHyper <- rep(alphaK,K) +if (sym) { highStates <- c(1,7:K) } else { highStates <- c(1,11:K) } +params$genotypeParams$alphaKHyper[highStates] <- alphaHigh +convergeParams <- runEMclonalCN(data,gParams=params$genotypeParams, + nParams=params$normalParams, + pParams=params$ploidyParams,sParams=params$cellPrevParams, + maxiter=maxI,maxiterUpdate=1500,txnExpLen=txn_exp_len, + txnZstrength=txn_z_strength, + useOutlierState=FALSE,normalEstimateMethod=normEstMeth, + estimateS=TRUE, estimatePloidy=boolEstPloidy, + pseudoCounts=pseudo_counts) + +#### COMPUTE OPTIMAL STATE PATH USING VITERBI #### +#options(cores=1) +optimalPath <- viterbiClonalCN(data,convergeParams) + +#### PRINT RESULTS TO FILES #### +#if (numClusters < 10) { numClusters <- paste("0",numClusters,sep="") } + + +tryCatch({ + results <- outputTitanResults(data,convergeParams, optimalPath, filename=outfile,posteriorProbs=FALSE, subcloneProfiles=TRUE) + outputModelParameters(convergeParams, results, outparam) + save(convergeParams, results, file=paste(outobj)) +}, +error = function(err){ + print('setting subcloneprofiles to False and retrying due to error:') + print(err) + results <- outputTitanResults(data,convergeParams, optimalPath, filename='outfile',posteriorProbs=FALSE, subcloneProfiles=FALSE) + outputModelParameters(convergeParams, results, outparam) + save(convergeParams, results, file=paste(outobj)) +}) diff -r 000000000000 -r 2f57a1c16d15 titan.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/titan.xml Tue Oct 11 14:36:03 2016 -0400 @@ -0,0 +1,106 @@ + + + + Estimate Cellular Prevalence and Call Copy Number Aberations + + + + Rscript + TitanCNA + R + titancna + + + + + mkdir parameters; + mkdir outputs; + mkdir rdatas; + #for $numClusters in range($numClustersMin, $numClustersMax): + + Rscript $__tool_directory__/titan.R + + #if $sampleid_source.sampleid_selector == "bamfile": + \$(basename $sampleid_source.id | sed 's/.bam$//g' ) + #else: + $sampleid_source.id + #end if + + $tc_het_file + $cnfile + $map + $numClusters + \${GALAXY_SLOTS:-1} + $advancedsettings.ploidy + ./outputs/samp${numClusters}.txt + ./parameters/samp${numClusters}.txt + $advancedsettings.myskew + TRUE + 0.5 + map + 50 + 1e-300 + 1e16 + 1e6 + $advancedsettings.alphaK + $advancedsettings.alphaHigh + $advancedsettings.maxCN + TRUE + ./rdatas/samp${numClusters}.RData + NCBI + NULL + 20 + 2>&1 ; + #end for + + grep ".*S_Dbw validity index (Both).*" ./parameters/* | cut -f2 > vals.txt; + grep ".*Clonal cluster cellular prevalence.*" ./parameters/* | sed 's/.*Z=//g' | sed 's/:.*//g' > clusters.txt; + paste --delimiters='\t' vals.txt clusters.txt | sort | head -n 1 > optimal.txt; + + cp ./outputs/samp\$(cat optimal.txt | cut -f2).txt $optimal_output; + cp ./parameters/samp\$(cat optimal.txt | cut -f2).txt $optimal_parameter; + cp ./rdatas/samp\$(cat optimal.txt | cut -f2).RData $optimal_rdata; + + + + + + + + + + + + + + + + + + + + + +

+ + + + + +

+ + + + + + + + + + + + + + + + diff -r 000000000000 -r 2f57a1c16d15 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Oct 11 14:36:03 2016 -0400 @@ -0,0 +1,6 @@ + + + + + +