Mercurial > repos > morinlab > titan

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/citations.xml	Tue Oct 11 14:36:03 2016 -0400
@@ -0,0 +1,153 @@
+<macros>
+    <xml name="morinlab_citation">
+        <citation type="bibtex">
+            @unpublished{
+            albuquerque2016galaxy,
+            author = "Marco Albuquerque and Bruno Grande and Elie Ritch and Martin Krzywinski and Prasath Pararajalingam and Selin Jessa and Paul Boutros and Sohrab Shah and Ryan Morin",
+            title = "A Suite of Galaxy Tools for Cancer Mutational Analysis",
+            note = "Unpublished Manuscript",
+            year = "2016"
+            }
+        </citation>
+    </xml>
+    <xml name="vardict_citation">
+        <citation type="bibtex">
+            @article{
+            Lai20062016,
+            title = {VarDict: a novel and versatile variant caller for next-generation sequencing in cancer research},
+            author = {Lai, Zhongwu and Markovets, Aleksandra and Ahdesmaki, Miika and Chapman, Brad and Hofmann, Oliver and McEwen, Robert and Johnson, Justin and Dougherty, Brian and Barrett, J. Carl and Dry, Jonathan R.},
+            journal = {Nucleic Acids Research}
+            volume = {44},
+            number = {11},
+            pages = {e108},
+            year = {2016},
+            doi = {10.1093/nar/gkw227}
+            }
+        </citation>
+    </xml>
+    <xml name="somatic_sniper_citation">
+        <citation>
+            @article{
+            Larson01022012,
+            author = {Larson, David E. and Harris, Christopher C. and Chen, Ken and Koboldt, Daniel C. and Abbott, Travis E. and Dooling, David J. and Ley, Timothy J. and Mardis, Elaine R. and Wilson, Richard K. and Ding, Li},
+            title = {SomaticSniper: identification of somatic point mutations in whole genome sequencing data},
+            volume = {28},
+            number = {3},
+            pages = {311-317},
+            year = {2012},
+            doi = {10.1093/bioinformatics/btr665},
+            journal = {Bioinformatics}
+            }
+        </citation>
+    </xml>
+    <xml name="galaxy_citation">
+        <citation>
+          @ARTICLE{Goecks2010-ra,
+          title    = "Galaxy: a comprehensive approach for supporting accessible,
+                      reproducible, and transparent computational research in the life
+                      sciences",
+          author   = "Goecks, Jeremy and Nekrutenko, Anton and Taylor, James and
+                      {Galaxy Team}",
+          journal  = "Genome Biol.",
+          volume   =  11,
+          number   =  8,
+          pages    = "R86",
+          month    =  "25~",
+          year     =  2010
+        }
+        </citation>
+    </xml>
+
+    <xml name="delly_citation">
+      <citation>
+      @ARTICLE{Rausch2012-yi,
+  title    = "{DELLY}: structural variant discovery by integrated paired-end
+              and split-read analysis",
+  author   = "Rausch, Tobias and Zichner, Thomas and Schlattl, Andreas and
+              St{\"{u}}tz, Adrian M and Benes, Vladimir and Korbel, Jan O",
+  journal  = "Bioinformatics",
+  volume   =  28,
+  number   =  18,
+  pages    = "i333--i339",
+  month    =  "15~",
+  year     =  2012
+}
+      </citation>
+    </xml>
+    <xml name="mutationseq_citation">
+        <citation type="bibtex">
+@ARTICLE{Ding2012-jq,
+  title    = "Feature-based classifiers for somatic mutation detection in
+              tumour-normal paired sequencing data",
+  author   = "Ding, Jiarui and Bashashati, Ali and Roth, Andrew and Oloumi,
+              Arusha and Tse, Kane and Zeng, Thomas and Haffari, Gholamreza and
+              Hirst, Martin and Marra, Marco A and Condon, Anne and Aparicio,
+              Samuel and Shah, Sohrab P",
+  journal  = "Bioinformatics",
+  volume   =  28,
+  number   =  2,
+  pages    = "167--175",
+  month    =  "15~" # jan,
+  year     =  2012
+}
+        </citation>
+    </xml>
+
+    <xml name="strelka_citation">
+        <citation type="bibtex">
+        @ARTICLE{Saunders2012-nh,
+          title    = "Strelka: accurate somatic small-variant calling from sequenced
+                      tumor-normal sample pairs",
+          author   = "Saunders, Christopher T and Wong, Wendy S W and Swamy, Sajani and
+                      Becq, Jennifer and Murray, Lisa J and Cheetham, R Keira",
+          journal  = "Bioinformatics",
+          volume   =  28,
+          number   =  14,
+          pages    = "1811--1817",
+          month    =  "15~" # jul,
+          year     =  2012
+        }
+        </citation>
+    </xml>
+    <xml name="radia_citation">
+        <citation type="bibtex">
+            @article{
+            Radenbaugh2014-tj,
+            title={RADIA: RNA and DNA integrated analysis for somatic
+            mutation detection},
+            author={Radenbaugh, Amie J and Ma, Singer and Ewing, Adam and Stuart,
+            Joshua M and Collisson, Eric A and Zhu, Jingchun and Haussler,
+            David},
+            journal={PLoS One},
+            volume={9},
+            number={11},
+            pages={e111516},
+            year={2014},
+            publisher={PLoS}
+            }
+        </citation>
+    </xml>
+    <xml name="titan_citation">
+      <citation type="bibtex">
+          @ARTICLE{Ha2014-pu,
+  title       = "{TITAN}: inference of copy number architectures in clonal cell
+                 populations from tumor whole-genome sequence data",
+  author      = "Ha, Gavin and Roth, Andrew and Khattra, Jaswinder and Ho,
+                 Julie and Yap, Damian and Prentice, Leah M and Melnyk,
+                 Nataliya and McPherson, Andrew and Bashashati, Ali and Laks,
+                 Emma and Biele, Justina and Ding, Jiarui and Le, Alan and
+                 Rosner, Jamie and Shumansky, Karey and Marra, Marco A and
+                 Gilks, C Blake and Huntsman, David G and McAlpine, Jessica N
+                 and Aparicio, Samuel and Shah, Sohrab P",
+  journal     = "Genome Res.",
+  publisher   = "Cold Spring Harbor Lab",
+  volume      =  24,
+  number      =  11,
+  pages       = "1881--1893",
+  month       =  "1~" # nov,
+  year        =  2014,
+  keywords    = "computational method"
+}
+      </citation>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/findOptimal.sh	Tue Oct 11 14:36:03 2016 -0400
@@ -0,0 +1,9 @@
+val=1;
+index=0;
+for i in $(eval echo "{$1..$2}");
+    do newval=$( grep ".*S_Dbw validity index (Both).*" ./parameters/samp${i}.txt | cut -f2);
+    if [ $(echo "$val > $newval" | bc) -eq 1 ];
+        then val=$newval; index=$i;
+    fi;
+done;
+echo -e "$val\t$index"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/titan.R	Tue Oct 11 14:36:03 2016 -0400
@@ -0,0 +1,112 @@
+library(TitanCNA)
+
+version <- '0.1.3'
+
+args <- commandArgs(TRUE)
+
+id <- args[1]
+tc_het_file <- args[2]
+cnfile <- args[3]
+map <- args[4]
+numClusters <- as.numeric(args[5])
+numCores <- as.numeric(args[6])
+ploidy <- as.numeric(args[7])
+outfile <- args[8]
+outparam <- args[9]
+myskew <- as.numeric(args[10])
+boolEstPloidy <- args[11]
+n_zero <- as.numeric(args[12])
+normEstMeth <- args[13]
+maxI <- as.numeric(args[14])
+pseudo_counts =  as.numeric(args[15])
+txn_exp_len = as.numeric(args[16])
+txn_z_strength = as.numeric(args[17])
+alphaK <- as.numeric(args[18])        #prior for events; default: 15000
+alphaHigh <- as.numeric(args[19])     #prior for extreme events; default: 15000
+maxCN <- as.numeric(args[20])                       #maximum number of copies to use
+sym <- args[21]
+outobj <- args[22]
+genometype <- args[23]
+chrom <- args[24]
+yThreshold <- as.numeric(args[25])
+chrom <- eval(parse(text=chrom))
+
+message('Running TITAN...')
+
+#### LOAD DATA ####
+data <- loadAlleleCounts(tc_het_file, symmetric=sym, genomeStyle=genometype)
+
+#### LOAD PARAMETERS ####
+message('titan: Loading default parameters')
+params <- loadDefaultParameters(copyNumber=maxCN,numberClonalClusters=numClusters, skew=myskew, symmetric=sym)
+params$ploidyParams$phi_0 <- ploidy
+params$normalParams$n_0 <- n_zero
+
+# #### GC AND MAPPABILITY CORRECTION ####
+message('titan: Reading GC content and mappability corrected read counts ...')
+cnData <- read.delim(cnfile,header=TRUE,stringsAsFactors=FALSE,sep="\t")
+
+#### READ COPY NUMBER FROM HMMCOPY FILE ####
+message('titan: Extracting read depth...')
+
+logR <- getPositionOverlap(data$chr,data$posn,cnData)
+data$logR <- log(2^logR)
+rm(logR,cnData)
+
+#### FILTER DATA FOR DEPTH, MAPPABILITY, NA, etc ####
+mScore <- as.data.frame(wigToRangedData(map))
+mScore <- getPositionOverlap(data$chr,data$posn,mScore[,-4])
+
+#### Check if Chromosomes Have been provided
+
+if (is.null(chrom)) {
+chrom <- unique(sort(data$chr))
+}
+
+# check if sample is Female or number of datapoints is very small.
+if (NROW(filterData(data,c('Y'),minDepth=10,maxDepth=200,map=mScore,mapThres=0.8)) > yThreshold){
+data <- filterData(data,chrom,minDepth=10,maxDepth=200,map=mScore,mapThres=0.8)
+} else {
+data <- filterData(data,chrom[which(chrom!='Y')],minDepth=10,maxDepth=200,map=mScore,mapThres=0.8)
+}
+
+#### MODEL SELECTION USING EM (FWD-BACK) TO SELECT NUMBER OF CLUSTERS ####
+library(doMC)
+registerDoMC(cores=numCores)
+
+
+
+##### RUN USING EM ALGORITHM ######
+K <- length(params$genotypeParams$rt)
+params$genotypeParams$alphaKHyper <- rep(alphaK,K)
+if (sym) { highStates <- c(1,7:K) } else { highStates <- c(1,11:K) }
+params$genotypeParams$alphaKHyper[highStates] <- alphaHigh
+convergeParams <- runEMclonalCN(data,gParams=params$genotypeParams,
+								nParams=params$normalParams,
+                                pParams=params$ploidyParams,sParams=params$cellPrevParams,
+                                maxiter=maxI,maxiterUpdate=1500,txnExpLen=txn_exp_len,
+                                txnZstrength=txn_z_strength,
+                                useOutlierState=FALSE,normalEstimateMethod=normEstMeth,
+                                estimateS=TRUE, estimatePloidy=boolEstPloidy,
+                                pseudoCounts=pseudo_counts)
+
+#### COMPUTE OPTIMAL STATE PATH USING VITERBI ####
+#options(cores=1)
+optimalPath <- viterbiClonalCN(data,convergeParams)
+
+#### PRINT RESULTS TO FILES ####
+#if (numClusters < 10) { numClusters <- paste("0",numClusters,sep="") }
+
+
+tryCatch({
+         results <- outputTitanResults(data,convergeParams, optimalPath, filename=outfile,posteriorProbs=FALSE, subcloneProfiles=TRUE)
+         outputModelParameters(convergeParams, results, outparam)
+         save(convergeParams, results, file=paste(outobj))
+},
+error = function(err){
+                    print('setting subcloneprofiles to False and retrying due to error:')
+                    print(err)
+                    results <- outputTitanResults(data,convergeParams, optimalPath, filename='outfile',posteriorProbs=FALSE, subcloneProfiles=FALSE)
+                    outputModelParameters(convergeParams, results, outparam)
+                    save(convergeParams, results, file=paste(outobj))
+})
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/titan.xml	Tue Oct 11 14:36:03 2016 -0400
@@ -0,0 +1,106 @@
+<tool id="titan" name="TITAN" version="0.1.3">
+
+  <description>
+    Estimate Cellular Prevalence and Call Copy Number Aberations
+  </description>
+
+  <requirements>
+    <requirement type="binary">Rscript</requirement>
+    <requirement type="R-module">TitanCNA</requirement>
+    <requirement type="package" version="3.2.1">R</requirement>
+    <requirement type="package" version="1.8.0">titancna</requirement>
+  </requirements>
+
+  <command>
+
+    mkdir parameters;
+    mkdir outputs;
+    mkdir rdatas;
+    #for $numClusters in range($numClustersMin, $numClustersMax):
+
+    Rscript $__tool_directory__/titan.R
+
+    <!-- id             -->   #if $sampleid_source.sampleid_selector == "bamfile":
+                                \$(basename $sampleid_source.id | sed 's/.bam$//g' )
+                              #else:
+                                $sampleid_source.id
+                              #end if
+
+    <!-- tc_het_file    -->  $tc_het_file
+    <!-- cnfile         -->  $cnfile
+    <!-- map            -->  $map
+    <!-- numClusters    -->  $numClusters
+    <!-- numCores       -->  \${GALAXY_SLOTS:-1}
+    <!-- ploidy         -->  $advancedsettings.ploidy
+    <!-- outfile        -->  ./outputs/samp${numClusters}.txt
+    <!-- outparam       -->  ./parameters/samp${numClusters}.txt
+    <!-- myskew         -->  $advancedsettings.myskew
+    <!-- boolEstPloidy  -->  TRUE
+    <!-- n_zero         -->  0.5
+    <!-- normEstMeth    -->  map
+    <!-- maxI           -->  50
+    <!-- pseudo_counts  -->  1e-300
+    <!-- txn_exp_len    -->  1e16
+    <!-- txn_z_strength -->  1e6
+    <!-- alphaK         -->  $advancedsettings.alphaK
+    <!-- alphaHigh      -->  $advancedsettings.alphaHigh
+    <!-- maxCN          -->  $advancedsettings.maxCN
+    <!-- sym            -->  TRUE
+    <!-- outobj         -->  ./rdatas/samp${numClusters}.RData
+    <!-- genometype     -->  NCBI
+    <!-- chrom          -->  NULL
+    <!-- y_threshold    -->  20
+    2>&amp;1 ;
+    #end for
+
+    grep ".*S_Dbw validity index (Both).*" ./parameters/* | cut -f2 > vals.txt;
+    grep ".*Clonal cluster cellular prevalence.*" ./parameters/* | sed 's/.*Z=//g' | sed 's/:.*//g' > clusters.txt;
+    paste --delimiters='\t' vals.txt clusters.txt | sort | head -n 1 > optimal.txt;
+
+    cp ./outputs/samp\$(cat optimal.txt | cut -f2).txt $optimal_output;
+    cp ./parameters/samp\$(cat optimal.txt | cut -f2).txt $optimal_parameter;
+    cp ./rdatas/samp\$(cat optimal.txt | cut -f2).RData $optimal_rdata;
+
+  </command>
+
+  <inputs>
+    <conditional name="sampleid_source">
+      <param label="Choose the source to open the Sample Id" name="sampleid_selector" type="select">
+        <option value="bamfile">BAM File Name</option>
+        <option value="manual">Manual</option>
+      </param>
+      <when value="manual">
+        <param name="id" type="text" label="Tumour ID (Name)"/>
+      </when>
+      <when value="bamfile">
+        <param type="data" format="bam" name="id" label="Sequence Alignment File"/>
+      </when>
+    </conditional>
+    <param type="data" format="wig,txt" name="tc_het_file" label="Normal SNP Counts"/>
+    <param type="data" format="wig,txt" name="cnfile" label="Corrected Reads Counts"/>
+    <param type="data" format="wig,txt" name="map" label="Map Wig"/>
+    <param type="integer" min="1" max="10" value="1" name="numClustersMin" label="The Min Number of Clusters"/>
+    <param type="integer" min="2" max="10" value="5" name="numClustersMax" label="The Max Number of Clusters" help="All Clusters will be computed for all k within the min and max values"/>
+    <section name="advancedsettings" title="Advanced Settings" expanded="false">
+      <param type="integer" min="1" value="2" name="ploidy" label="Ploidy of Organism"/>
+      <param type="float" min="-3" max="3" value="0" name="myskew" label="Skew"/>
+      <param type="integer" value="15000" min="1" name="alphaK" label="alpha K"/>
+      <param type="integer" value="20000" min="1" name="alphaHigh" label="alpha High"/>
+      <param type="integer" value="8" min="1" name="maxCN" label="The Maximum Copy Number"/>
+    </section>
+  </inputs>
+  <outputs>
+    <data format="txt" name="optimal_output"/>
+    <data format="txt" name="optimal_parameter"/>
+    <data format="txt" name="optimal_rdata"/>
+    <collection name="list_output1" type="list" label="TITAN Output File" description="TITAN Output Files">
+      <discover_datasets pattern="__name_and_ext__" directory="outputs"/>
+    </collection>
+    <collection name="list_output2" type="list" label="TITAN Parameter File" description="TITAN Parameter Files">
+      <discover_datasets pattern="__name_and_ext__" directory="parameters"/>
+    </collection>
+    <collection name="list_output3" type="list" label="TITAN RData File" description="TITAN RData Files">
+      <discover_datasets pattern="__name_and_ext__" directory="rdatas"/>
+    </collection>
+  </outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Oct 11 14:36:03 2016 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="titancna" version="1.8.0">
+    <repository changeset_revision="5991e428f9d8" name="package_titancna_1_8_0" owner="morinlab" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+  </package>
+</tool_dependency>