Mercurial > repos > morinlab > sequenza
changeset 0:73338a1805e7 draft
planemo upload for repository https://github.com/morinlab/tools-morinlab/tree/master/tools/sequenza commit 4ef2d91b7c1686a2696b92fe538d4aec51d05e40-dirty
| author | morinlab |
|---|---|
| date | Tue, 11 Oct 2016 14:31:59 -0400 |
| parents | |
| children | a8359c3073ba |
| files | create_seqz_file.xml sequenza_pipeline.R sequenza_pipeline.xml tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
| diffstat | 6 files changed, 279 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_seqz_file.xml Tue Oct 11 14:31:59 2016 -0400 @@ -0,0 +1,140 @@ +<tool id="create_seqz_file" name="Create Seqz File" version="2.1.2"> +<description> + extracts the common positions containing A and B allele frequencies +</description> +<requirements> + <requirement type="package" version="2.1.2">sequenza</requirement> + <requirement type="package" version="5.4.1">pypy</requirement> + <requirement type="set_environment" version="2.1.2">SEQUENZA_INSTALL_DIR</requirement> +</requirements> +<command><![CDATA[ + + ln -s $normal normal.bam; + ln -s $normal.metadata.bam_index normal.bam.bai; + ln -s $tumour tumour.bam; + ln -s $tumour.metadata.bam_index tumour.bam.bai; + + + #if $reference_source.reference_source_selector == "history": + ln -s $reference_source.ref_file reference.fa; + samtools faidx reference.fa; + #elif $reference_source.reference_source_selector == "cached": + ln -s ${reference_source.ref_file.fields.path} reference.fa; + #end if + + EXEC=python; + if [ $(which pypy)!="" ] ; then + EXEC=pypy; + fi ; + + #if $interval + #if $gzip.gzip_selector == "yes" + echo 1 >> $bytes; + #end if + for int in \$( cat $interval ); do + #end if + + #if $interval and $order_file + if [ "\$int" != "\$( cat $order_file | head -n1)" ] ; then + skip=2; + else + skip=1; + fi ; + #end if + + \$EXEC \$SEQUENZA_INSTALL_DIR/sequenza/exec/sequenza-utils.py bam2seqz + -n normal.bam + -t tumour.bam + -gc $gc_file + -F reference.fa + + #if $interval + -C \$int + #end if + + --hom $geno.hom + --het $geno.het + -q $qual.qlimit + -f $qual.qformat + -N $qual.depth + + | \$EXEC \$SEQUENZA_INSTALL_DIR/sequenza/exec/sequenza-utils.py seqz-binning + -s - + -w $window + + | awk '{ if (\$4 >= $min_depth && \$5 >= $min_depth) print \$_ }' + + #if $gzip.gzip_selector == "yes": + #if $order_file and $interval: + | tail -n+\$skip | gzip >> $output_gzip; + #else + | gzip >> $output_gzip; + #end if + + #if $interval: + echo \$(( \$( wc -c < $output_gzip ) + 1 )) >> $bytes; + #end if + + #else: + >> $output; + #end if + + #if $interval + done; + #end if + +]]></command> +<inputs> +<conditional name="reference_source"> +<param label="Choose the source for the reference files" name="reference_source_selector" type="select"> +<option value="cached">Locally Cached</option> +<option value="history">History</option> +</param> +<when value="cached"> +<param label="Genome" name="ref_file" type="select"> +<options from_data_table="fasta_indexes"/> +</param> +</when> +<when value="history"> +<param label="Genome" name="ref_file" type="data" format="fasta"/> +</when> +</conditional> +<param name="normal" label="Normal Alignment File (BAM)" type="data" format="data"/> +<param name="tumour" label="Tumour Alignment File (BAM)" type="data" format="data"/> +<param label="GC Window File" name="gc_file" type="data" format="tabular"/> +<param label="Bin Output by Window" name="window" type="integer" min="1" max="50" value="50"/> +<param name="min_depth" label="Minimum coverage for variant to be used in model" type="integer" min="1" max="50" value="12"/> + <conditional name="gzip"> + <param label="Should the output be gzipped" name="gzip_selector" type="select"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + </conditional> + <param name="interval" optional="true" label="Restrict Computation to a particular Interval" help="Must be present in the BAM" +type="data" format="txt"/> +<param name="order_file" optional="true" label="Order File" help="Should be Present with Interval File" type="data" format="txt"/> + <section name="geno" title="Genotyping Options" expanded="False"> +<param name="hom" label="Threshold to Select Homozygous Positions" type="float" value="0.9" min="0" max="1"/> +<param name="het" label="Threshold to Select Heterozygous Positions" type="float" value="0.25" min="0" max="1"/> +</section> +<section name="qual" title="Quality Threshold Options" expanded="False"> +<param name="depth" label="Treshold to Filter Positions" help="The sum of read depth in both samples" value="20" type="integer"/> +<param name="qlimit" label="Minimum Nucleotide Quality Score" value="20" type="integer"/> +<param name="qformat" label="Quality Format" type="select"> +<option value="sanger">Sanger</option> +<option value="illumina">Illumina</option> +</param> +</section> +</inputs> +<outputs> +<data format="tabular" name="output" label="Seqz File"> + <filter>gzip['gzip_selector'] == "no"</filter> + </data> + <data format="txt" name="output_gzip" label="Gzipped Seqz File"> + <filter>gzip['gzip_selector'] == "yes"</filter> + </data> + <data format="txt" name="bytes" label="Gzip Block Locations"> + <filter>gzip['gzip_selector'] == "yes"</filter> + </data> +</outputs> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sequenza_pipeline.R Tue Oct 11 14:31:59 2016 -0400 @@ -0,0 +1,37 @@ +#!/usr/bin/R + +# Script to Run Sequenza Pipeline in Galaxy +library(sequenza); + +# INPUT FILE +args=(commandArgs(TRUE)); +input.file <- args[1]; + +ploidy <- args[4]; + +cellularity <-args[5]; + +# STEP ONE +extract.data <- sequenza.extract( +file=input.file, +gz=TRUE +); + +# STEP TWO +fit.data <- sequenza.fit( +extract.data +); + +# STEP THREE +if(length(args)>3){ + +results.data <- sequenza.results(extract.data, cellularity=cellularity, ploidy=ploidy,out.dir = args[3],sample.id = args[2]); + +} else{ + results.data <- sequenza.results( +extract.data, + fit.data, +out.dir = args[3], + sample.id = args[2] +); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sequenza_pipeline.xml Tue Oct 11 14:31:59 2016 -0400 @@ -0,0 +1,60 @@ +<tool id="sequenza_pipeline" name="Analyze Seqz File" version="2.1.2"> + <description> + using general R sequenza pipeline + </description> + <requirements> + <requirement type="package" version="2.1.2">sequenza</requirement> + <requirement type="set_environment" version="2.1.2">SEQUENZA_INSTALL_DIR</requirement> + </requirements> +<command> + +Rscript $__tool_directory__/sequenza_pipeline.R $input + + #if $sampleid_source.sampleid_selector == "bamfile": + \$(basename $sampleid_source.id | sed 's/.bam$//g' ) + #else: + $sampleid_source.id + #end if + + output + + #if $choose_fit_option.fit_option == "manual": + + $ploidy $cellularity + + #end if + + 2>&1 ; + + cat ./output/*segments.txt > $output; + + </command> +<inputs> + <conditional name="sampleid_source"> + <param label="Choose the source to open the Sample Id" name="sampleid_selector" type="select"> + <option value="bamfile">BAM File Name</option> + <option value="manual">Manual</option> + </param> + <when value="manual"> + <param name="id" type="text" label="Tumour ID (Name)"/> + </when> + <when value="bamfile"> + <param type="data" format="bam" name="id" label="Sequence Alignment File"/> + </when> + </conditional> + <conditional name="choose_fit_option"> + <param label="Use the best fit or manually set the ploidy and cellularity" name="fit_option" type="select"> + <option value="bestfit">Best fit</option> + <option value="manual">Manually specify</option> + </param> + <when value="manual"> + <param name="ploidy" type="float" label="ploidy" value="2" /> + <param name="cellularity" type="float" label="cellularity" value="1" /> + </when> + </conditional> + <param name="input" format="txt,tabular" type="data" label="Input Sequenza Seqz File"/> + </inputs> + <outputs> + <data name="output" format="seg,txt"/> + </outputs> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Tue Oct 11 14:31:59 2016 -0400 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Oct 11 14:31:59 2016 -0400 @@ -0,0 +1,7 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc"/> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Oct 11 14:31:59 2016 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="sequenza" version="2.1.2"> + <repository changeset_revision="6340da10d134" name="package_sequenza_2_1_2" owner="morinlab" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
