view hifiasm.xml @ 7:6a3eeb09a252 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/hifiasm commit e11987ed5bffa0f2aa60d28acfb9301ac87d4dc4
author bgruening
date Tue, 31 May 2022 14:14:58 +0000
parents b1e0be6fdc9a
children a536bf4fb792
line wrap: on
line source

<tool id="hifiasm" name="Hifiasm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>haplotype-resolved de novo assembler for PacBio Hifi reads</description>
    <macros>
        <token name="@TOOL_VERSION@">0.16.1</token>
        <token name="@VERSION_SUFFIX@">3</token>
        <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token>
        <xml name="reads">
            <param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads" />
        </xml>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">hifiasm</requirement>
        <requirement type="package" version="0.1">yak</requirement>
    </requirements>
    <version_command>hifiasm --version</version_command>
    <command detect_errors="exit_code">

        <![CDATA[
        #set $input_files = list()
        #set $hap1_inputs = list()
        #set $hap2_inputs = list()
        #set $hic1_inputs = list()
        #set $hic2_inputs = list()
        #for idx, read in enumerate($mode.reads):
            #set $inputfile = 'input_%d.%s' % ($idx, $read.dataset.extension)
            ln -s '$read' $inputfile &&
            $input_files.append($inputfile)
        #end for
        #set $input_filenames = ' '.join($input_files)
        #if str($hic_partition.hic_partition_selector) == 'set'
            mkdir HiCF HiCR &&
            #for idx, read in enumerate($hic_partition.h1):
                #set $inputfile = './HiCF/input_%d.%s' % ($idx, $read.dataset.extension)
                ln -s '$read' $inputfile &&
                $hic1_inputs.append($inputfile)
            #end for
            #for idx, read in enumerate($hic_partition.h2):
                #set $inputfile = './HiCR/input_%d.%s' % ($idx, $read.dataset.extension)
                ln -s '$read' $inputfile &&
                $hic2_inputs.append($inputfile)
            #end for
        #end if
        #if str($mode.mode_selector) == 'trio':
            #for idx, read in enumerate($mode.hap1_reads):
                #set $inputfile = 'hap1_input_%d.%s' % ($idx, $read.dataset.extension)
                ln -s '$read' $inputfile &&
                $hap1_inputs.append($inputfile)
            #end for
            #for idx, read in enumerate($mode.hap2_reads):
                #set $inputfile = 'hap2_input_%d.%s' % ($idx, $read.dataset.extension)
                ln -s '$read' $inputfile &&
                $hap2_inputs.append($inputfile)
            #end for
            #set $hap1_filenames = ' '.join($hap1_inputs)
            #set $hap2_filenames = ' '.join($hap2_inputs)
            yak count -k$mode.yak_kmer_length -b$filter_bits -t\${GALAXY_SLOTS:-1} -o hap1.yak $hap1_filenames &&
            yak count -k$mode.yak_kmer_length -b$filter_bits -t\${GALAXY_SLOTS:-1} -o hap2.yak $hap2_filenames &&
        #end if
        hifiasm
        -t \${GALAXY_SLOTS:-1}
        -o output
        -f $filter_bits
        #if str($advanced_options.advanced_selector) == 'set':
            -k $advanced_options.hifiasm_kmer_length
            -w $advanced_options.window_size
            -D $advanced_options.drop_kmers
            -N $advanced_options.max_overlaps
            -r $advanced_options.correction_rounds
            #if $advanced_options.min_hist_cnt:
                --min-hist-cnt $advanced_options.min_hist_cnt
            #end if
            --max-kocc $advanced_options.max_kocc
            #if $advanced_options.hg_size
                --hg-size $advanced_options.hg_size
            #end if

        #end if
        #if str($assembly_options.assembly_selector) == 'set':
            -a $assembly_options.cleaning_rounds
            -z $assembly_options.adapter_length
            -m $assembly_options.pop_contigs
            -p $assembly_options.pop_unitigs
            -n $assembly_options.remove_tips
            -x $assembly_options.max_overlap
            -y $assembly_options.min_overlap
            $assembly_options.disable_post_join
            $assembly_options.ignore_error_corrected
            #if $assembly_options.hom_cov
                --hom-cov $assembly_options.hom_cov
            #end if
        #end if
        #if str($mode.mode_selector) == 'trio':
            -1 hap1.yak
            -2 hap2.yak
            -c $mode.max_kmers
            -d $mode.min_kmers
        #end if
        #if str($purge_options.purge_selector) == 'set':
            -l $purge_options.purge_level
            -s $purge_options.similarity_threshold
            -O $purge_options.minimum_overlap
            #if $purge_options.purge_max:
                --purge-max $purge_options.purge_max
            #end if
            #if $purge_options.n_hap:
                --n-hap $purge_options.n_hap
            #end if
        #end if

        #if str($hic_partition.hic_partition_selector) == 'set':
            --h1 '${ ' '.join(["%s" % $x for $x in $hic1_inputs]) }'
            --h2 '${ ' '.join(["%s" % $x for $x in $hic2_inputs]) }'
            #if $hic_partition.seed:
                --seed $hic_partition.seed
            #end if
            #if $hic_partition.n_weight:
                --n-weight $hic_partition.n_weight
            #end if
            #if $hic_partition.n_perturb:
                --n-perturb $hic_partition.n_perturb
            #end if
            #if $hic_partition.f_perturb:
                --f-perturb $hic_partition.f_perturb
            #end if
            --l-msjoin $hic_partition.l_msjoin
        #end if

        ## Changed the default outputs of hifiasm. Hifiasm outputs a primary assembly and two balanced haplotypes in default. Incorporated the option '--primary' to output primary assembly and alternate assembly.
        --primary
        $input_filenames
        #if $log_out:
            2> output.log
        #end if
        ]]>
    </command>
    <inputs>
        <conditional name="mode">
            <param name="mode_selector" type="select" label="Assembly mode">
                <option value="standard">Standard</option>
                <option value="trio">Trio mode</option>
            </param>
            <when value="standard">
                <expand macro="reads" />
            </when>
            <when value="trio">
                <expand macro="reads" />
                <param name="hap1_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 1 reads" />
                <param name="hap2_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 2 reads" />
                <param name="max_kmers" argument="-c" type="integer" value="2" label="Lower bound of the binned k-mer's frequency" />
                <param name="min_kmers" argument="-d" type="integer" value="5" label="Upper bound of the binned k-mer's frequency" />
                <param name="yak_kmer_length" type="integer" min="0" max="64" value="31" label="Yak counter k-mer length" />
            </when>
        </conditional>
        <param name="filter_bits" argument="-f" type="integer" min="0" value="37" label="Bits for bloom filter" help="A value of 0 disables the bloom filter" />       
        <conditional name="assembly_options">
            <param name="assembly_selector" type="select" label="Assembly options">
                <option value="blank">Leave default</option>
                <option value="set">Specify</option>
            </param>
            <when value="blank" />
            <when value="set">
                <param name="cleaning_rounds" argument="-a" type="integer" value="4" label="Cleaning rounds" />
                <param name="adapter_length" argument="-z" type="integer" min="0" value="0" label="Length of adapters to be removed" />
                <param name="pop_contigs" argument="-m" type="integer" value="10000000" label="Minimum contig bubble size" help="Pop contig graph bubbles smaller than this value" />
                <param name="pop_unitigs" argument="-p" type="integer" value="100000" label="Minimum unitig bubble size" help="Pop unitig graph bubbles smaller than this value" />
                <param name="remove_tips" argument="-n" type="integer" value="3" label="Tip unitigs" help="Keep only tip unitigs with a number of reads greater than or equal to this value" />
                <param name="max_overlap" argument="-x" type="float" min="0" max="1" value="0.8" label="Maximum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are below a threshold controlled by -x. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/>
                <param name="min_overlap" argument="-y" type="float" min="0" max="1" value="0.2" label="Minimum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are over a threshold controlled by -y. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/>
                <param name="disable_post_join" argument="-u" type="boolean" truevalue="-u" falsevalue="" label="Skip post join contigs step" help="May improve N50" />
                <param name="ignore_error_corrected" argument="-i" type="boolean" truevalue="-i" falsevalue="" value="False" label="Ignore error corrected reads and overlaps" help="Ignore  error corrected reads and overlaps saved in prefix.*.bin files.  Apart from assembly graphs, hifiasm also outputs three binary files  that  save  alloverlap information during assembly step.  With these files, hifiasm can avoid the time-consuming all-to-all overlap calculation step, and  do  the  assembly directly  and  quickly.  This might be helpful when users want to get an optimized assembly by multiple rounds of experiments with different parameters." />
                <param argument="--hom-cov" type="integer" optional="True" value="" label="Homozygous read coverage" />
            </when>
        </conditional>
        <conditional name="purge_options">
            <param name="purge_selector" type="select" label="Options for purging duplicates">
                <option value="blank">Leave default</option>
                <option value="set">Specify</option>
            </param>
            <when value="blank" />
            <when value="set">
                <param name="purge_level" argument="-l" type="select" label="Purge level">
                    <option value="0" selected="true">None (0)</option>
                    <option value="1">Light (1)</option>
                    <option value="2">Aggressive (2)</option>
                    <option value="3">Aggressive - high heterozygosity rate (3)</option>
                </param>
                <param name="similarity_threshold" argument="-s" type="float" min="0" max="1" value="0.75" label="Similarity threshold for duplicate haplotigs" />
                <param name="minimum_overlap" argument="-O" type="integer" value="1" label="Minimum overlapped reads for duplicate haplotigs" />
                <param argument="--purge-max" type="integer" optional="true" label="Coverage upper bound" help="If not set, this will be determined automatically" />
                <param argument="--n-hap" type="integer" min="0" value="" optional="true" label="Assumtion of haplotype number" help="A haplotype is defined as the combination of alleles for different polymorphisms that occur on the same chromosome." />
            </when>
        </conditional>
        <conditional name="hic_partition">
            <param name="hic_partition_selector" type="select" label="Options for Hi-C-partition">
                <option value="blank">Leave default</option>
                <option value="set">Specify</option>
            </param>
            <when value="blank" />
            <when value="set">
                <param argument="--h1" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R1 reads" />
                <param argument="--h2" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R2 reads" />
                <param argument="--seed" type="integer" min="1" value="" optional="true" label="RNG seed" />
                <param argument="--n-weight" type="integer" min="1" value="" optional="true" label="Rounds of reweighting Hi-C links. Increasing this may improves phasing results but takes longer time" />
                <param argument="--n-perturb" type="integer" min="1" value="" optional="true" label="Rounds of perturbation. Increasing this may improves phasing results but takes longer time" />
                <param argument="--f-perturb" type="float" min="0" max="1" value="" optional="true" label="Fraction to flip for perturbation. Increasing this may improves phasing results but takes longer time" />
                <param argument="--l-msjoin" type="integer" min="0" value="500000" label="Detect misjoined unitigs of greater than or equal to specified size" help="A value of 0 disables this filter"/>
            </when>
        </conditional>
        <conditional name="advanced_options">
            <param name="advanced_selector" type="select" label="Advanced options">
                <option value="blank">Leave default</option>
                <option value="set">Specify</option>
            </param>
            <when value="blank" />
            <when value="set">
                <param name="hifiasm_kmer_length" argument="-k" type="integer" min="0" max="64" value="51" label="Hifiasm k-mer length" />
                <param name="window_size" argument="-w" type="integer" min="0" value="51" label="Minimizer window size" />
                <param name="drop_kmers" argument="-D" type="float" value="5.0" label="Drop k-mers" help="K-mers that occur more than this value multiplied by the coverage will be discarded" />
                <param name="max_overlaps" argument="-N" type="integer" value="100" label="Maximum overlaps to consider" help="The software selects the larger of this value and the k-mer count multiplied by coverage" />
                <param name="correction_rounds" argument="-r" type="integer" value="3" label="Correction rounds" />
                <param argument="--min-hist-cnt" type="integer" min="0" value="" optional="true" label="Minimum count threshold" help="When analyzing the k-mer spectrum, ignore counts below this value" />
                <param argument="--max-kocc" type="integer" min="0" value="20000" label="Maximum k-mer ocurrence" help="Employ k-mers occurring less than INT times to rescue repetitive overlaps" />
                <param argument="--hg-size" type="text" value="" optional="true" label="Estimated haploid genome size"
                       help="Estimated haploid genome size used for inferring read coverage. If not provided, this parameter will be infered by hifism. Common suffices are required, for example, 100m or 3g">
                    <sanitizer invalid_char="">
                        <valid initial="string.digits">
                            <add value="k" />
                            <add value="K" />
                            <add value="m" />
                            <add value="M" />
                            <add value="G" />
                            <add value="g" />
                        </valid>
                    </sanitizer>
                    <validator type="regex">[0-9kKmMGg]+</validator>
                </param>
            </when>
        </conditional>
        <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> 
    </inputs>
    <outputs>
        <!--Standard mode-->
        <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph">
            <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter>
        </data>
        <data name="processed_unitigs" format="gfa1" from_work_dir="output.p_utg.gfa" label="${tool.name} on ${on_string}: processed unitig graph">
            <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter>
        </data>
        <data name="primary_contig_graph" format="gfa1" from_work_dir="output.p_ctg.gfa" label="${tool.name} on ${on_string}: primary assembly contig graph">
            <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter>
        </data>
        <data name="alternate_contig_graph" format="gfa1" from_work_dir="output.a_ctg.gfa" label="${tool.name} on ${on_string}: alternate assembly contig graph">
            <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter>
        </data>
        <!--Trio outputs without Hi-c reads-->
        <data name="hap1_contigs" format="gfa1" from_work_dir="output.dip.hap1.p_ctg.gfa" label="${tool.name} on ${on_string}: hap1.p_ctg contig graph">
            <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter>
        </data>
        <data name="hap2_contigs" format="gfa1" from_work_dir="output.dip.hap2.p_ctg.gfa" label="${tool.name} on ${on_string}: hap2.p_ctg contig graph">
            <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter>
        </data>
        <data name="raw_unitigs_trio" format="gfa1" from_work_dir="output.dip.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph">
            <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter>
        </data>
        <data name="processed_unitigs_trio" format="gfa1" from_work_dir="output.dip.p_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved processed unitig graph">
            <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter>
        </data>
        <!-- Stardand mode with Hi-C partition outputs -->
        <data name="hic_pcontig_graph" format="gfa1" from_work_dir="output.hic.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C primary contig graph">
            <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter>
        </data>
        <data name="hic_acontig_graph" format="gfa1" from_work_dir="output.hic.a_ctg.gfa" label="${tool.name} ${on_string}: Hi-C alternate contig graph">
            <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter>
        </data>
        <data name="hic_balanced_contig_hap1_graph" format="gfa1" from_work_dir="output.hic.hap1.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C hap1 balanced contig graph hap1">
            <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter>
        </data>
        <data name="hic_balanced_contig_hap2_graph" format="gfa1" from_work_dir="output.hic.hap2.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C hap2 balanced contig graph hap2">
            <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter>
        </data>
        <data name="hic_raw_initig" format="gfa1" from_work_dir="output.hic.r_utg.gfa" label="${tool.name} ${on_string}: Hi-C raw unitig">
            <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter>
        </data>
        <!--Trio outputs with Hi-c reads-->
        <data name="hap1_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.r_utg.gfa" label="${tool.name} on ${on_string}: raw initig graph">
            <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter>
        </data>
        <data name="hap2_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.p_utg.gfa" label="${tool.name} on ${on_string}: processsed initig graph">
            <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter>
        </data>
        <!--Log output-->
        <data name="log_file" format="txt" from_work_dir="output.log" label="${tool.name} ${on_string}: log file">
            <filter>log_out</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="4">
            <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" />
            <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1" />
            <output name="primary_contig_graph" file="hifiasm-out1-primary.gfa" ftype="gfa1" />
            <output name="alternate_contig_graph" ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="reads" value="hifiasm-in2-0.fa.gz,hifiasm-in2-1.fa.gz,hifiasm-in2-2.fa.gz,hifiasm-in2-3.fa.gz,hifiasm-in2-4.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <output name="raw_unitigs" file="hifiasm-out2-raw.gfa" ftype="gfa1" />
            <output name="processed_unitigs" file="hifiasm-out2-processed.gfa" ftype="gfa1" />
            <output name="primary_contig_graph" file="hifiasm-out2-primary.gfa" ftype="gfa1" />
            <output name="alternate_contig_graph" ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test logfile out-->
        <test expect_num_outputs="5">
            <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <param name="log_out" value="yes"/>
            <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" />
            <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1" />
            <output name="primary_contig_graph" file="hifiasm-out1-primary.gfa" ftype="gfa1" />
            <output name="alternate_contig_graph" ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
            <output name="log_file" ftype="txt">
                <assert_contents>
                    <has_line line="[M::main] CMD: hifiasm -t 1 -o output -f 0 --primary input_0.fasta.gz"/>
                </assert_contents>
            </output>
        </test>
        <!--Test Hi-C reads-->
        <test expect_num_outputs="5"> 
            <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <conditional name="hic_partition">
                <param name="hic_partition_selector" value="set"/>
                <param name="h1" value="hic_1.fastq.gz"/>
                <param name="h2" value="hic_2.fastq.gz"/>
                <param name="n_weight" value="1"/>
                <param name="n_perturb" value="1"/>
                <param name="l_perturb" value="0"/>
                <param name="l_msjoin" value="0"/>
            </conditional>
            <output name="hic_pcontig_graph" file="hifiasm-out-hifi-p.gfa" ftype="gfa1" />
            <output name="hic_acontig_graph" file="hifiasm-out-hifi-a.gfa" ftype="gfa1" />
            <output name="hic_balanced_contig_hap1_graph" ftype="gfa1">
                <assert_contents>
                    <has_text_matching expression="^S" />
                    <has_size value="59529" delta="500"/>
                </assert_contents>
            </output>
            <output name="hic_balanced_contig_hap1_graph" ftype="gfa1" >
                <assert_contents>
                    <has_text_matching expression="^S" />
                    <has_size value="59529" delta="500"/>
                </assert_contents>
            </output>
            <output name="hic_raw_initig" ftype="gfa1" >
                <assert_contents>
                    <has_text_matching expression="^S" />
                    <has_size value="59522" delta="500"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test trio mode -->
        <test expect_num_outputs="4">
            <param name="filter_bits" value="0"/>
            <conditional name="mode">
                <param name="mode_selector" value="trio"/>
                <param name="reads" value="child.fasta.gz"/>
                <param name="hap1_reads" value="paternal.fasta.gz"/>
                <param name="hap2_reads" value="maternal.fasta.gz"/>
                <param name="max_kmers" value="2"/>
                <param name="min_kmers" value="5"/>
            </conditional>
            <output name="raw_unitigs_trio" ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
            <output name="processed_unitigs_trio" ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
            <output name="hap1_contigs">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
            <output name="hap2_contigs">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test ignore-error-corrected option -->
        <test expect_num_outputs="4">
            <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <conditional name="assembly_options">
                <param name="assembly_selector" value="set"/>
                <param name="ignore_error_corrected" value="True"/>
            </conditional>
            <output name="raw_unitigs" file="hifiasm-out3-raw.gfa" ftype="gfa1" />
            <output name="processed_unitigs" file="hifiasm-out3-processed.gfa" ftype="gfa1" />
            <output name="primary_contig_graph" file="hifiasm-out3-primary.gfa" ftype="gfa1" />
            <output name="alternate_contig_graph" ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test expected haplotype number -->
        <test expect_num_outputs="4">
            <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <conditional name="purge_options">
                <param name="purge_selector" value="set"/>
                <param name="n_hap" value="1"/>
            </conditional>
            <output name="raw_unitigs" file="hifiasm-out4-raw.gfa" ftype="gfa1" />
            <output name="processed_unitigs" file="hifiasm-out4-processed.gfa" ftype="gfa1" />
            <output name="primary_contig_graph" file="hifiasm-out4-primary.gfa" ftype="gfa1" />
            <output name="alternate_contig_graph" ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test min_hist_cnt option -->
        <test expect_num_outputs="4">
            <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <conditional name="advanced_options">
                <param name="advanced_selector" value="set"/>
                <param name="min_hist_cnt" value="1"/>
            </conditional>
            <output name="raw_unitigs" file="hifiasm-out5-raw.gfa" ftype="gfa1" />
            <output name="processed_unitigs" file="hifiasm-out5-processed.gfa" ftype="gfa1" />
            <output name="primary_contig_graph" file="hifiasm-out5-primary.gfa" ftype="gfa1" />
            <output name="alternate_contig_graph"  ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test max_kooc option -->
        <test expect_num_outputs="4">
            <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <conditional name="advanced_options">
                <param name="advanced_selector" value="set"/>
                <param name="max_kooc" value="21000"/>
            </conditional>
            <output name="raw_unitigs" file="hifiasm-out6-raw.gfa" ftype="gfa1" />
            <output name="processed_unitigs" file="hifiasm-out6-processed.gfa" ftype="gfa1" />
            <output name="primary_contig_graph" file="hifiasm-out6-primary.gfa" ftype="gfa1" />
            <output name="alternate_contig_graph"  ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test hg-size option -->
        <test expect_num_outputs="4">
            <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <conditional name="advanced_options">
                <param name="advanced_selector" value="set"/>
                <param name="hg_size" value="1k"/>
            </conditional>
            <output name="raw_unitigs" file="hifiasm-out7-raw.gfa" ftype="gfa1" />
            <output name="processed_unitigs" file="hifiasm-out7-processed.gfa" ftype="gfa1" />
            <output name="primary_contig_graph" file="hifiasm-out7-primary.gfa" ftype="gfa1" />
            <output name="alternate_contig_graph"  ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test ignore-error-corrected option -->
        <test expect_num_outputs="4">
            <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
            <param name="filter_bits" value="0" />
            <param name="mode_selector" value="standard" />
            <conditional name="assembly_options">
                <param name="assembly_selector" value="set"/>
                <param name="hom-cov" value="1000"/>
            </conditional>
            <output name="raw_unitigs" file="hifiasm-out8-raw.gfa" ftype="gfa1" />
            <output name="processed_unitigs" file="hifiasm-out8-processed.gfa" ftype="gfa1" />
            <output name="primary_contig_graph" file="hifiasm-out8-primary.gfa" ftype="gfa1" />
            <output name="alternate_contig_graph" ftype="gfa1">
                <assert_contents>
                    <has_size value="0"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**HiFiASM - a fast de novo assembler**


Hifiasm is a fast haplotype-resolved de novo assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data.

----

.. class:: infomark

**Assembly mode**

- *Standard*
- *Trio* When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning.

----

.. class:: infomark

**Outputs**

Non Trio assembly:

- Haplotype-resolved raw unitig graph in GFA format. This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors.
- Haplotype-resolved processed unitig graph without small bubbles : Small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information.
- Primary assembly contig graph : This graph collapses different haplotypes.
- Alternate assembly contig graph : This graph consists of all assemblies that are discarded in primary contig graph.


Trio assembly:

- Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information.
- Phased paternal/haplotype1 contig graph. This graph keeps the phased paternal/haplotype1 assembly.
- Phased maternal/haplotype2 contig graph. This graph keeps the phased maternal/haplotype2 assembly.


]]></help>
    <citations>
        <citation type="doi">10.1038/s41592-020-01056-5</citation>
    </citations>
</tool>