changeset 0:73338a1805e7 draft

planemo upload for repository https://github.com/morinlab/tools-morinlab/tree/master/tools/sequenza commit 4ef2d91b7c1686a2696b92fe538d4aec51d05e40-dirty
author morinlab
date Tue, 11 Oct 2016 14:31:59 -0400
parents
children a8359c3073ba
files create_seqz_file.xml sequenza_pipeline.R sequenza_pipeline.xml tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 6 files changed, 279 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/create_seqz_file.xml	Tue Oct 11 14:31:59 2016 -0400
@@ -0,0 +1,140 @@
+<tool id="create_seqz_file" name="Create Seqz File" version="2.1.2">
+<description>
+    extracts the common positions containing A and B allele frequencies
+</description>
+<requirements>
+  <requirement type="package" version="2.1.2">sequenza</requirement>
+  <requirement type="package" version="5.4.1">pypy</requirement>
+  <requirement type="set_environment" version="2.1.2">SEQUENZA_INSTALL_DIR</requirement>
+</requirements>
+<command><![CDATA[
+
+    ln -s $normal normal.bam;
+    ln -s $normal.metadata.bam_index normal.bam.bai;
+    ln -s $tumour tumour.bam;
+    ln -s $tumour.metadata.bam_index tumour.bam.bai;
+  
+
+    #if $reference_source.reference_source_selector == "history":
+      ln -s $reference_source.ref_file reference.fa;
+      samtools faidx reference.fa;
+    #elif $reference_source.reference_source_selector == "cached":
+      ln -s ${reference_source.ref_file.fields.path} reference.fa;	
+    #end if
+
+    EXEC=python;
+    if [ $(which pypy)!="" ] ; then
+      EXEC=pypy;
+    fi ;
+
+    #if $interval
+      #if $gzip.gzip_selector == "yes"
+        echo 1 >> $bytes;
+      #end if
+      for int in \$( cat $interval ); do
+    #end if
+
+    #if $interval and $order_file
+      if [ "\$int" != "\$( cat $order_file | head -n1)" ] ; then
+        skip=2;
+      else
+        skip=1;
+      fi ;
+    #end if
+
+    \$EXEC \$SEQUENZA_INSTALL_DIR/sequenza/exec/sequenza-utils.py bam2seqz 
+      -n normal.bam 
+      -t tumour.bam
+      -gc $gc_file
+      -F reference.fa 
+
+      #if $interval
+        -C \$int
+      #end if
+
+      --hom $geno.hom
+      --het $geno.het
+      -q $qual.qlimit
+      -f $qual.qformat
+      -N $qual.depth
+      
+    | \$EXEC \$SEQUENZA_INSTALL_DIR/sequenza/exec/sequenza-utils.py seqz-binning
+      -s -
+      -w $window
+    
+    | awk '{ if (\$4 >= $min_depth && \$5 >= $min_depth) print \$_ }'
+      
+    #if $gzip.gzip_selector == "yes":
+      #if $order_file and $interval:
+        | tail -n+\$skip | gzip >> $output_gzip;
+      #else
+        | gzip >> $output_gzip;
+      #end if
+
+      #if $interval:
+        echo \$(( \$( wc -c < $output_gzip ) + 1 )) >> $bytes;
+      #end if
+
+    #else:
+      >> $output;
+    #end if
+      
+    #if $interval
+      done;
+    #end if
+  
+]]></command>
+<inputs>
+<conditional name="reference_source">
+<param label="Choose the source for the reference files" name="reference_source_selector" type="select">
+<option value="cached">Locally Cached</option>
+<option value="history">History</option>
+</param>
+<when value="cached">
+<param label="Genome" name="ref_file" type="select">
+<options from_data_table="fasta_indexes"/>
+</param>
+</when>
+<when value="history">
+<param label="Genome" name="ref_file" type="data" format="fasta"/>
+</when>
+</conditional>
+<param name="normal" label="Normal Alignment File (BAM)" type="data" format="data"/>
+<param name="tumour" label="Tumour Alignment File (BAM)" type="data" format="data"/>
+<param label="GC Window File" name="gc_file" type="data" format="tabular"/>
+<param label="Bin Output by Window" name="window" type="integer" min="1" max="50" value="50"/>
+<param name="min_depth" label="Minimum coverage for variant to be used in model" type="integer" min="1" max="50" value="12"/>
+    <conditional name="gzip">
+      <param label="Should the output be gzipped" name="gzip_selector" type="select">
+        <option value="yes" selected="true">Yes</option>
+        <option value="no">No</option>
+      </param>
+    </conditional>
+    <param name="interval" optional="true" label="Restrict Computation to a particular Interval" help="Must be present in the BAM" 
+type="data" format="txt"/>
+<param name="order_file" optional="true" label="Order File" help="Should be Present with Interval File" type="data" format="txt"/>
+    <section name="geno" title="Genotyping Options" expanded="False">
+<param name="hom" label="Threshold to Select Homozygous Positions" type="float" value="0.9" min="0" max="1"/>
+<param name="het" label="Threshold to Select Heterozygous Positions" type="float" value="0.25" min="0" max="1"/>
+</section>
+<section name="qual" title="Quality Threshold Options" expanded="False">
+<param name="depth" label="Treshold to Filter Positions" help="The sum of read depth in both samples" value="20" type="integer"/>
+<param name="qlimit" label="Minimum Nucleotide Quality Score" value="20" type="integer"/>
+<param name="qformat" label="Quality Format" type="select">
+<option value="sanger">Sanger</option>
+<option value="illumina">Illumina</option>
+</param>
+</section>
+</inputs>
+<outputs>
+<data format="tabular" name="output" label="Seqz File">
+      <filter>gzip['gzip_selector'] == "no"</filter>
+    </data>
+    <data format="txt" name="output_gzip" label="Gzipped Seqz File">
+      <filter>gzip['gzip_selector'] == "yes"</filter>
+    </data>
+    <data format="txt" name="bytes" label="Gzip Block Locations">
+      <filter>gzip['gzip_selector'] == "yes"</filter>
+    </data>
+</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sequenza_pipeline.R	Tue Oct 11 14:31:59 2016 -0400
@@ -0,0 +1,37 @@
+#!/usr/bin/R
+
+# Script to Run Sequenza Pipeline in Galaxy
+library(sequenza);
+
+# INPUT FILE
+args=(commandArgs(TRUE));
+input.file <- args[1];
+
+ploidy <- args[4];
+
+cellularity <-args[5];
+
+# STEP ONE
+extract.data <- sequenza.extract(
+file=input.file,
+gz=TRUE
+);
+
+# STEP TWO
+fit.data <- sequenza.fit(
+extract.data
+);
+
+# STEP THREE
+if(length(args)>3){
+
+results.data <- sequenza.results(extract.data, cellularity=cellularity, ploidy=ploidy,out.dir = args[3],sample.id = args[2]);
+
+} else{
+  results.data <- sequenza.results(
+extract.data,
+  fit.data,
+out.dir = args[3],
+  sample.id = args[2]
+);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sequenza_pipeline.xml	Tue Oct 11 14:31:59 2016 -0400
@@ -0,0 +1,60 @@
+<tool id="sequenza_pipeline" name="Analyze Seqz File" version="2.1.2">
+  <description>
+  using general R sequenza pipeline
+  </description>
+  <requirements>
+    <requirement type="package" version="2.1.2">sequenza</requirement>
+    <requirement type="set_environment" version="2.1.2">SEQUENZA_INSTALL_DIR</requirement>
+  </requirements>
+<command>
+
+Rscript $__tool_directory__/sequenza_pipeline.R $input
+
+    #if $sampleid_source.sampleid_selector == "bamfile":
+      \$(basename $sampleid_source.id | sed 's/.bam$//g' )
+    #else:
+      $sampleid_source.id
+    #end if
+
+    output
+    
+    #if $choose_fit_option.fit_option == "manual":
+    
+      $ploidy $cellularity
+    
+    #end if
+     
+    2>&amp;1 ;
+
+  cat ./output/*segments.txt > $output;
+
+  </command>
+<inputs>
+    <conditional name="sampleid_source">
+      <param label="Choose the source to open the Sample Id" name="sampleid_selector" type="select">
+        <option value="bamfile">BAM File Name</option>
+        <option value="manual">Manual</option>
+      </param>
+      <when value="manual">
+        <param name="id" type="text" label="Tumour ID (Name)"/>
+      </when>
+      <when value="bamfile">
+        <param type="data" format="bam" name="id" label="Sequence Alignment File"/>
+      </when>
+    </conditional>
+    <conditional name="choose_fit_option">
+    <param label="Use the best fit or manually set the ploidy and cellularity" name="fit_option" type="select">
+      <option value="bestfit">Best fit</option>
+      <option value="manual">Manually specify</option>
+    </param>
+    <when value="manual">
+      <param name="ploidy" type="float" label="ploidy" value="2" />
+      <param name="cellularity" type="float" label="cellularity" value="1" />
+    </when>
+    </conditional>
+    <param name="input" format="txt,tabular" type="data" label="Input Sequenza Seqz File"/>
+  </inputs>
+  <outputs>
+    <data name="output" format="seg,txt"/>
+  </outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Tue Oct 11 14:31:59 2016 -0400
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id> <dbkey> <display_name> <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Oct 11 14:31:59 2016 -0400
@@ -0,0 +1,7 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc"/>
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Oct 11 14:31:59 2016 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="sequenza" version="2.1.2">
+    <repository changeset_revision="6340da10d134" name="package_sequenza_2_1_2" owner="morinlab" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+  </package>
+</tool_dependency>