Mercurial > repos > morinlab > sequenza
diff create_seqz_file.xml @ 0:73338a1805e7 draft
planemo upload for repository https://github.com/morinlab/tools-morinlab/tree/master/tools/sequenza commit 4ef2d91b7c1686a2696b92fe538d4aec51d05e40-dirty
| author | morinlab |
|---|---|
| date | Tue, 11 Oct 2016 14:31:59 -0400 |
| parents | |
| children | a8359c3073ba |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_seqz_file.xml Tue Oct 11 14:31:59 2016 -0400 @@ -0,0 +1,140 @@ +<tool id="create_seqz_file" name="Create Seqz File" version="2.1.2"> +<description> + extracts the common positions containing A and B allele frequencies +</description> +<requirements> + <requirement type="package" version="2.1.2">sequenza</requirement> + <requirement type="package" version="5.4.1">pypy</requirement> + <requirement type="set_environment" version="2.1.2">SEQUENZA_INSTALL_DIR</requirement> +</requirements> +<command><![CDATA[ + + ln -s $normal normal.bam; + ln -s $normal.metadata.bam_index normal.bam.bai; + ln -s $tumour tumour.bam; + ln -s $tumour.metadata.bam_index tumour.bam.bai; + + + #if $reference_source.reference_source_selector == "history": + ln -s $reference_source.ref_file reference.fa; + samtools faidx reference.fa; + #elif $reference_source.reference_source_selector == "cached": + ln -s ${reference_source.ref_file.fields.path} reference.fa; + #end if + + EXEC=python; + if [ $(which pypy)!="" ] ; then + EXEC=pypy; + fi ; + + #if $interval + #if $gzip.gzip_selector == "yes" + echo 1 >> $bytes; + #end if + for int in \$( cat $interval ); do + #end if + + #if $interval and $order_file + if [ "\$int" != "\$( cat $order_file | head -n1)" ] ; then + skip=2; + else + skip=1; + fi ; + #end if + + \$EXEC \$SEQUENZA_INSTALL_DIR/sequenza/exec/sequenza-utils.py bam2seqz + -n normal.bam + -t tumour.bam + -gc $gc_file + -F reference.fa + + #if $interval + -C \$int + #end if + + --hom $geno.hom + --het $geno.het + -q $qual.qlimit + -f $qual.qformat + -N $qual.depth + + | \$EXEC \$SEQUENZA_INSTALL_DIR/sequenza/exec/sequenza-utils.py seqz-binning + -s - + -w $window + + | awk '{ if (\$4 >= $min_depth && \$5 >= $min_depth) print \$_ }' + + #if $gzip.gzip_selector == "yes": + #if $order_file and $interval: + | tail -n+\$skip | gzip >> $output_gzip; + #else + | gzip >> $output_gzip; + #end if + + #if $interval: + echo \$(( \$( wc -c < $output_gzip ) + 1 )) >> $bytes; + #end if + + #else: + >> $output; + #end if + + #if $interval + done; + #end if + +]]></command> +<inputs> +<conditional name="reference_source"> +<param label="Choose the source for the reference files" name="reference_source_selector" type="select"> +<option value="cached">Locally Cached</option> +<option value="history">History</option> +</param> +<when value="cached"> +<param label="Genome" name="ref_file" type="select"> +<options from_data_table="fasta_indexes"/> +</param> +</when> +<when value="history"> +<param label="Genome" name="ref_file" type="data" format="fasta"/> +</when> +</conditional> +<param name="normal" label="Normal Alignment File (BAM)" type="data" format="data"/> +<param name="tumour" label="Tumour Alignment File (BAM)" type="data" format="data"/> +<param label="GC Window File" name="gc_file" type="data" format="tabular"/> +<param label="Bin Output by Window" name="window" type="integer" min="1" max="50" value="50"/> +<param name="min_depth" label="Minimum coverage for variant to be used in model" type="integer" min="1" max="50" value="12"/> + <conditional name="gzip"> + <param label="Should the output be gzipped" name="gzip_selector" type="select"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + </conditional> + <param name="interval" optional="true" label="Restrict Computation to a particular Interval" help="Must be present in the BAM" +type="data" format="txt"/> +<param name="order_file" optional="true" label="Order File" help="Should be Present with Interval File" type="data" format="txt"/> + <section name="geno" title="Genotyping Options" expanded="False"> +<param name="hom" label="Threshold to Select Homozygous Positions" type="float" value="0.9" min="0" max="1"/> +<param name="het" label="Threshold to Select Heterozygous Positions" type="float" value="0.25" min="0" max="1"/> +</section> +<section name="qual" title="Quality Threshold Options" expanded="False"> +<param name="depth" label="Treshold to Filter Positions" help="The sum of read depth in both samples" value="20" type="integer"/> +<param name="qlimit" label="Minimum Nucleotide Quality Score" value="20" type="integer"/> +<param name="qformat" label="Quality Format" type="select"> +<option value="sanger">Sanger</option> +<option value="illumina">Illumina</option> +</param> +</section> +</inputs> +<outputs> +<data format="tabular" name="output" label="Seqz File"> + <filter>gzip['gzip_selector'] == "no"</filter> + </data> + <data format="txt" name="output_gzip" label="Gzipped Seqz File"> + <filter>gzip['gzip_selector'] == "yes"</filter> + </data> + <data format="txt" name="bytes" label="Gzip Block Locations"> + <filter>gzip['gzip_selector'] == "yes"</filter> + </data> +</outputs> +</tool>
