changeset 0:a1f4624e2165 draft

Uploaded
author morinlab
date Sun, 04 Dec 2016 17:45:03 -0500
parents
children a1f7c7f8b135
files pyclone.xml
diffstat 1 files changed, 241 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyclone.xml	Sun Dec 04 17:45:03 2016 -0500
@@ -0,0 +1,241 @@
+<tool id="pyclone" name="PyClone" version="0.13.0.01762d9">
+	<description>
+		PyClone is a tool for inferring the cellular prevalence of point mutations from deeply sequenced data
+	</description>
+
+	<requirements>
+		<requirement type="set_environment">MORINLAB_SCRIPTS</requirement>
+		<requirement type="package" version="14.2">morinlab_scripts</requirement>
+		<requirement type="package" version="0.13.0.01762d9">pyclone</requirement>
+		<requirement type="package" version="1.0">pyclone_dependencies</requirement>
+		<!-- <requirement type="package" version="3.11">pyyaml</requirement>
+		<requirement type="package" version="0.2.3">pydp</requirement>
+		<requirement type="package" version="0.7.1">seaborn</requirement> -->
+	</requirements>
+
+	<command>
+		<![CDATA[	
+			PyClone run_analysis_pipeline 
+			--in_files 
+			#for $file in $infiles:
+				'$file' 
+			#end for
+			--working_dir ./pyclone_results
+			#if $adv_opts.adv_opts_selector == "advanced" and $adv_opts.TUMOUR_CONTENTS != "":
+				--tumour_contents "$adv_opts.TUMOUR_CONTENTS"
+			#end if
+			#if $adv_opts.adv_opts_selector == "advanced" and $adv_opts.SAMPLES != "":
+				--samples $adv_opts.SAMPLES
+			#end if
+			#if $adv_opts.adv_opts_selector == "advanced":
+				--density $adv_opts.DENSITY
+				--num_iters $adv_opts.NUM_ITERS
+				--prior $adv_opts.PRIOR
+				--burnin $adv_opts.BURNIN 
+				--thin $adv_opts.THIN
+				--mesh_size $adv_opts.MESH_SIZE
+				--min_cluster_size $adv_opts.MIN_CLUSTER_SIZE
+			#end if
+			;
+
+			gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=lociplots ./pyclone_results/plots/loci/parallel_coordinates.pdf ./pyclone_results/plots/loci/scatter.pdf ./pyclone_results/plots/loci/vaf_parallel_coordinates.pdf ./pyclone_results/plots/loci/vaf_scatter.pdf;
+			
+			gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=clusterplots ./pyclone_results/plots/cluster/parallel_coordinates.pdf ./pyclone_results/plots/cluster/scatter.pdf ./pyclone_results/plots/cluster/density.pdf;
+			
+			cat ./clusterplots > $cluster_plots;
+			cat ./lociplots > $loci_plots;
+			cat ./pyclone_results/plots/loci/density.pdf > $loci_density_plot;
+			
+			cat ./pyclone_results/tables/cluster.tsv > $cluster_table;
+			cat ./pyclone_results/tables/loci.tsv > $loci_table;
+
+			#if $plot_custom.plot_options == "multisample":
+				Rscript \$MORINLAB_SCRIPTS/plot_pyclone_output/plot_ccfs.R
+				
+				$plot_custom.patient
+				
+				#if $adv_opts.adv_opts_selector == "advanced" and $adv_opts.SAMPLES != "":
+					--samples $adv_opts.SAMPLES
+				#end if
+
+				./pyclone_results/tables/loci.tsv
+				
+				$plot_custom.genes 
+				
+				#if $plot_custom.maf_files:
+				--mafs
+					#for $maf in $plot_custom.maf_files:
+						'$maf'
+					#end for
+				#end if
+				
+				#if $plot_custom.effects
+					--effects $plot_custom.effects
+				#end if
+				;
+			#end if
+		]]>
+
+	</command>  
+
+	<stdio>
+		<exit_code range="1:" />
+		<exit_code range=":-1" />
+		<regex match="Error:" />
+		<regex match="Exception:" />
+	</stdio>
+
+	<inputs>
+		<param type="data"   name="infiles"      format="tabular" multiple="true" label="input_file" help="PyClone input file with mutation and copy number info, as generated by generate_pyclone_input_tool" />
+
+
+		<conditional name="adv_opts">
+			<param name="adv_opts_selector" type="select" label="Advanced Options">
+				<option value="basic" selected="True">Hide Advanced Options</option>
+				<option value="advanced">Show Advanced Options</option>
+			</param>
+
+			<when value="basic" />
+
+			<when value="advanced">
+				<param type="text"    name="TUMOUR_CONTENTS"  value=""                          label="Space delimited list of tumour contents. Should match the order of --in_files (the list of inputs in mutations_file). If not given tumour content is assumed to 1.0 in all samples."/>
+
+				<param type="text"    name="SAMPLES"          value=""                          label="Space delimited list of sample names. Should be in the same order as --in_files (the list of inputs in mutations_file). If not set sample name will be inferred from file names and ordering in plots will be arbitrary."/>
+				
+				<param type="text"    name="DENSITY"          value="pyclone_beta_binomial"     label="Emission density for the model. Default is pyclone_beta_binomial."/>
+				
+				<param type="integer" name="NUM_ITERS"        value="10000"                     label="Emission density for the model. Default is pyclone_beta_binomial."/>
+				 
+				<param type="select"  name="PRIOR"            value="major_copy_number"         label="Method used to set the possible genotypes.">
+					<option value="major_copy_number">major_copy_number: Considers all possible genotypes with up to the major copy number of B alleles.</option>
+					<option value="parental_copy_number">parental_copy_number:  Considers all possible genotypes compatible with the predicted parental copy number.</option>
+					<option value="total_copy_number">total_copy_number: Use this if information about major and minor copy number is not available.</option>
+				</param>
+				
+				<param type="integer" name="BURNIN"           value="0"                         label="Number of samples to discard as burning for the MCMC chain. Default is 0."/>
+				
+				<param type="integer" name="THIN"             value="1"                         label="Number of samples to thin MCMC trace. For example if thin=10 every tenth sample after burning will be used for inference. Default is 1."/>
+				
+				<param type="integer" name="MESH_SIZE"        value="101"                       label="Number of points to use for approximating the cluster posteriors. Default is 101."/>
+				
+				<param type="integer" name="MIN_CLUSTER_SIZE" value="5"                         label="Clusters with fewer mutations than this value will not  be plotted."/>
+
+			</when>
+
+		</conditional>
+
+		<conditional name="plot_custom">
+			<param name="plot_options" type="select" label="Custom Plotting Options" help="If running PyClone with more than one sample, use this menu to customize CCF plots.">
+				<option value="single">Hide options (Running PyClone with a single sample)</option>
+				<option value="multisample">Show options (Running PyClone with two or more samples)</option>
+			</param>
+
+			<when value="single" />
+
+			<when value="multisample">
+				<param type="text" 	    name="patient"   option="false"  size="20"                        label="Patient ID"/>
+
+				<param type="data"      name="genes"     optional="false" format="text"                    label="Label mutations in these genes (provide a file with one gene per line)"/>
+
+				<param type="data"      name="maf_files" optional="true" format="tabular" multiple="true" label="maf_file" help="MAF file for each sample, in same order as the samples were input" />
+
+				<param type="select"    name="effects"   optional="true" multiple="true"                  label="Select effect criteria to apply to mutations to label" display="checkboxes">
+					<option value="Splice_Site">Splice_Site</option>
+					<option value="Nonsense_Mutation">Nonsense_Mutation</option>
+					<option value="Frame_Shift_Del">Frame_Shift_Del</option>
+					<option value="Frame_Shift_Ins">Frame_Shift_Ins</option>
+					<option value="Nonstop_Mutation">Nonstop_Mutation</option>
+					<option value="Translation_Start_Site">Translation_Start_Site</option>
+					<option value="In_Frame_Ins">In_Frame_Ins</option>
+					<option value="In_Frame_Del">In_Frame_Del</option>
+					<option value="Missense_Mutation">Missense_Mutation</option>
+					<option value="Intron">Intron</option>
+					<option value="Splice_Region">Splice_Region</option>
+					<option value="Silent">Silent</option>
+					<option value="RNA">RNA</option>
+					<option value="5'UTR">5'UTR</option>
+					<option value="3'UTR">3'UTR</option>
+					<option value="IGR">Intergenic/regulatory region</option>
+					<option value="5'Flank">5'Flank</option>
+					<option value="3'Flank">3'Flank</option>
+				</param>
+
+			</when>
+
+		</conditional>
+
+
+	</inputs>
+
+	<outputs>
+		<data name="cluster_plots" label="${tool.name} on ${on_string} - Cluster Plots" format="pdf">
+			<filter>output_files == "both" or output_files == "plots"</filter>
+		</data>
+
+		<data name="loci_plots" label="${tool.name} on ${on_string} - Loci Plots" format="pdf">
+			<filter>output_files == "both" or output_files == "plots"</filter>
+		</data>
+
+		<data name="loci_density_plot" label="${tool.name} on ${on_string} - Loci Density Plots" format="pdf">
+			<filter>output_files == "both" or output_files == "plots"</filter>
+		</data>
+
+		<data name="cluster_table" label="${tool.name} on ${on_string} - Cluster.tsv" format="tsv">
+			<filter>output_files == "both" or output_files == "tables"</filter>
+		</data>
+
+		<data name="loci_table" label="${tool.name} on ${on_string} - Loci.tsv" format="tsv">
+			<filter>output_files == "both" or output_files == "tables"</filter>
+		</data>
+
+		<data format="png" name="ccf_plot">
+            <discover_datasets pattern="__designation__" ext="png" visible="true" />
+        </data>
+
+	</outputs>
+
+	<help>
+		<![CDATA[
+
+		.. class:: infomark
+
+		**What this tool does**
+
+		PyClone is statistical model and software tool designed to infer the prevalence of point mutations in heterogeneous cancer samples. The input data for PyClone consists of a set read counts from a deep sequencing experiment, the copy number of the genomic region containing the mutation and an estimate of tumour content.
+
+		.. _PyClone: https://bitbucket.org/aroth85/pyclone/wiki/Home
+
+		-----
+
+		.. class:: infomark
+
+		**Input**
+
+		This tool take a tab seperated value file with six required fields. This file can also be generated using the 'Generate PyClone input' tool which takes a takes a MAF file and a segments.txt file produced by sequenza.
+
+		** The required fields of the header in this input file are**
+
+		1. mutation_id - A unique ID to identify the mutation. Good names are thing such a the genomic co-ordinates of the mutation i.e. chr22:12345. Gene names are not good IDs because one gene may have multiple mutations, in which case the ID is not unique and PyClone will fail to run or worse give unexpected results. If you want to include the gene name I suggest adding the genomic coordinates i.e. TP53_chr17:753342.
+
+		2. ref_counts - The number of reads covering the mutation which contain the reference (genome) allele.
+
+		3. var_counts - The number of reads covering the mutation which contain the variant allele.
+
+		4. normal_cn - The copy number of the cells in the normal population. For autosomal chromosomes this will be 2 and for sex chromosomes it could be either 1 or 2. For species besides human other values are possible.
+
+		5. minor_cn - The minor copy number of the cancer cells. Usually this value will be predicted from WGSS or array data.
+
+		6. major_cn - The major copy number of the cancer cells. Usually this value will be predicted from WGSS or array data.
+
+		-----
+
+		.. class:: infomark
+
+		**Cite**
+
+		Roth et al. PyClone: statistical inference of clonal population structure in cancer PMID: 24633410
+
+		]]>
+	</help>
+
+</tool>
\ No newline at end of file