Mercurial > repos > iuc > hmmer_hmmsearch

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hmmbuild.xml.orig	Tue Aug 31 08:43:59 2021 +0000
@@ -0,0 +1,114 @@
+<?xml version="1.0"?>
+<<<<<<< HEAD
+<tool id="hmmer_hmmbuild" name="hmmbuild" version="@TOOL_VERSION@+galaxy1">
+  <description>Build a profile HMM from an input multiple alignment</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <expand macro="stdio"/>
+  <command><![CDATA[
+@ADDTHREADS@
+=======
+<tool id="hmmer_hmmbuild" name="hmmbuild" version="@TOOL_VERSION@">
+    <description>Build a profile HMM from an input multiple alignment</description>
+    <expand macro="bio_tools"/>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+>>>>>>> c37d72558 (add more bio.tool IDs)
+hmmbuild
+
+#if $hmmname:
+    -n '$hmmname'
+#end if
+
+@FORMAT_SELECTOR@
+@MCSS@
+@ARSWS@
+@AEEWS@
+@PRIOR@
+@HSSI@
+@EVAL_CALIB@
+
+@CPU@
+@SEED@
+
+@LENGTHS@
+#if str($maxinsertlen):
+    --maxinsertlen $maxinsertlen
+#end if
+
+'$hmmout'
+'$msafile'
+  ]]></command>
+  <inputs>
+    <expand macro="input_msa" />
+    <param name="hmmname" argument="-n" type="text" optional="true" label="Name for the HMM" />
+    <expand macro="format_selector"/>
+    <expand macro="mcss"/>
+    <expand macro="arsws"/>
+    <expand macro="aeews"/>
+    <expand macro="prior"/>
+    <expand macro="hssi"/>
+    <expand macro="eval_calib_xml"/>
+    <expand macro="seed"/>
+
+    <expand macro="lengths" />
+    <param argument="--maxinsertlen" type="integer" optional="true" label="Pretend all inserts are length &lt;= n" />
+  </inputs>
+  <outputs>
+    <data name="hmmout" format="hmm3" label="HMM profile from $msafile.name"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="msafile" value="globins4.sto"/>
+      <expand macro="seed_test" />
+      <output name="hmmout" file="globins4.hmm" compare="sim_size">
+          <assert_contents>
+              <has_line_matching expression="HMMER3/f.*"/>
+              <has_line_matching expression="ALPH  amino"/>
+              <has_line_matching expression="NSEQ  4"/>
+              <has_line_matching expression="//"/>
+          </assert_contents>
+      </output>
+    </test>
+    <test>
+      <param name="msafile" value="MADE1.sto"/>
+      <param name="input_format_select" value="--dna"/>
+      <expand macro="seed_test" />
+      <output name="hmmout" file="MADE1.hmm" compare="sim_size">
+          <assert_contents>
+              <has_line_matching expression="HMMER3/f.*"/>
+              <has_line_matching expression="ALPH  DNA"/>
+              <has_line_matching expression="NSEQ  1997"/>
+              <has_line_matching expression="//"/>
+          </assert_contents>
+      </output>
+    </test>
+  </tests>
+  <help><![CDATA[
+@HELP_PRE@
+
+For each multiple sequence alignment in <msafile> build a profile HMM and save
+it to a new file <hmmfile out>.
+
+@HELP_PRE_OTH@
+
+@FORMAT_SELECTOR_HELP@
+@MCSS_HELP@
+@ARSWS_HELP@
+@AEEWS_HELP@
+@PRIOR_HELP@
+@HSSI_HELP@
+@EVAL_CALIB_HELP@
+@SEED_HELP@
+@LENGTHS_HELP@
+
+@ATTRIBUTION@
+  ]]></help>
+  <expand macro="citation"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hmmscan.xml.orig	Tue Aug 31 08:43:59 2021 +0000
@@ -0,0 +1,93 @@
+<?xml version="1.0"?>
+<<<<<<< HEAD
+<tool id="hmmer_hmmscan" name="hmmscan" version="@TOOL_VERSION@+galaxy1">
+  <description>search protein sequence(s) against a protein profile database</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <expand macro="stdio"/>
+  <command><![CDATA[
+@ADDTHREADS@
+=======
+<tool id="hmmer_hmmscan" name="hmmscan" version="@TOOL_VERSION@">
+    <description>search protein sequence(s) against a protein profile database</description>
+    <expand macro="bio_tools"/>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+>>>>>>> c37d72558 (add more bio.tool IDs)
+@INPUTHMMCHOICE@
+hmmscan
+
+@OFORMAT_WITH_OPTS@
+@THRESHOLDS@
+@CUT@
+@ACCEL_HEUR@
+@ADV_OPTS@
+@CPU@
+@SEED@
+
+'$input_hmm_filename'
+'$seqfile'
+> '$output'
+  ]]></command>
+  <inputs>
+    <expand macro="input_hmm_choice" />
+    <!-- todo use Galaxy features like data libraries/data tables/??? -->
+    <param name="seqfile" type="data" format="fasta" label="Sequence file"/>
+    <expand macro="oformat_with_opts_dom_pfam"/>
+    <expand macro="thresholds_xml"/>
+    <expand macro="cut"/>
+    <expand macro="accel_heur_xml"/>
+    <expand macro="adv_opts"/>
+    <expand macro="seed"/>
+  </inputs>
+  <outputs>
+    <expand macro="output_dom_pfam" tool="HMMSCAN"/>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="4">
+      <param name="input_hmm_conditional|input_hmm_source" value="history"/>
+      <param name="input_hmm_conditional|hmmfile" value="MADE1.hmm"/>
+      <param name="seqfile" value="dna_target.fa"/>
+      <expand macro="oformat_test" />
+      <expand macro="seed_test" />
+      <output name="output" file="MADE1.out" lines_diff="24">
+          <expand macro="assert_out" tool="hmmscan"/>
+      </output>
+      <output name="tblout" file="MADE1.out.tblout" lines_diff="14">
+          <expand macro="assert_tblout" tool="hmmscan"/>
+      </output>
+      <output name="domtblout" file="MADE1.out.domtblout" lines_diff="10">
+          <expand macro="assert_tblout" tool="hmmscan"/>
+      </output>
+      <output name="pfamtblout" file="MADE1.out.pfamtblout" lines_diff="10">
+          <expand macro="assert_tblout" tool="hmmscan"/>
+      </output>
+    </test>
+  </tests>
+  <help><![CDATA[
+@HELP_PRE@
+
+hmmscan is used to search protein sequences against collections of protein
+profiles. For each sequence in <seqfile>, use that query sequence to search the
+target database of profiles in <hmmfile>, and output ranked lists of the profiles
+with the most significant matches to the sequence.
+
+@HELP_PRE_OTH@
+
+@OFORMAT_WITH_OPTS_HELP@
+@THRESHOLDS_HELP@
+@CUT_HELP@
+@ACCEL_HEUR_HELP@
+@ADV_OPTS_HELP@
+@SEED_HELP@
+
+@ATTRIBUTION@
+  ]]></help>
+  <expand macro="citation"/>
+</tool>
--- a/hmmsearch.xml	Wed Jul 21 14:06:36 2021 +0000
+++ b/hmmsearch.xml	Tue Aug 31 08:43:59 2021 +0000
@@ -1,6 +1,7 @@
 <?xml version="1.0"?>
 <tool id="hmmer_hmmsearch" name="hmmsearch" version="@TOOL_VERSION@+galaxy0">
   <description>search profile(s) against a sequence database</description>
+  <expand macro="bio_tools"/>
   <macros>
     <import>macros.xml</import>
   </macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hmmsearch.xml.orig	Tue Aug 31 08:43:59 2021 +0000
@@ -0,0 +1,102 @@
+<?xml version="1.0"?>
+<<<<<<< HEAD
+<tool id="hmmer_hmmsearch" name="hmmsearch" version="@TOOL_VERSION@+galaxy1">
+  <description>search profile(s) against a sequence database</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <expand macro="stdio"/>
+  <command><![CDATA[
+@ADDTHREADS@
+=======
+<tool id="hmmer_hmmsearch" name="hmmsearch" version="@TOOL_VERSION@">
+    <description>search profile(s) against a sequence database</description>
+    <expand macro="bio_tools"/>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+>>>>>>> c37d72558 (add more bio.tool IDs)
+hmmsearch
+
+@OFORMAT_WITH_OPTS@
+@THRESHOLDS@
+@CUT@
+@ACCEL_HEUR@
+@ADV_OPTS@
+@CPU@
+@SEED@
+
+'$hmmfile'
+'$seqdb'
+> '$output'
+  ]]></command>
+  <inputs>
+    <expand macro="input_hmm" />
+    <!-- todo use Galaxy features like data libraries/data tables/??? -->
+    <param name="seqdb" type="data" format="fasta" label="Sequence database to search against"/>
+    <expand macro="oformat_with_opts_dom_pfam"/>
+    <expand macro="thresholds_xml"/>
+    <expand macro="cut"/>
+    <expand macro="accel_heur_xml"/>
+    <expand macro="adv_opts"/>
+    <expand macro="seed"/>
+  </inputs>
+  <outputs>
+    <expand macro="output_dom_pfam" tool="PHMMER"/>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="4">
+      <param name="hmmfile" value="globins4.hmm"/>
+      <param name="seqdb" value="uniprot_matches.fasta"/>
+      <expand macro="oformat_test" />
+      <expand macro="seed_test" />
+      <output name="output" file="uniprot_globins_match.out" lines_diff="20">
+          <expand macro="assert_out" tool="hmmsearch"/>
+      </output>
+      <output name="domtblout" file="globins.domtblout" lines_diff="16">
+          <expand macro="assert_tblout" tool="hmmsearch"/>
+      </output>
+      <output name="pfamtblout" file="globins.pfamtblout" lines_diff="12">
+          <expand macro="assert_tblout" tool="hmmsearch"/>
+      </output>
+      <output name="tblout" file="globins.tblout" lines_diff="16">
+          <expand macro="assert_tblout" tool="hmmsearch"/>
+      </output>
+    </test>
+    <test expect_num_outputs="1">
+      <param name="hmmfile" value="globins4.hmm"/>
+      <param name="seqdb" value="uniprot_matches.fasta"/>
+      <expand macro="oformat_test" />
+      <param name="oformat" value=""/>
+      <expand macro="seed_test" />
+      <output name="output" file="uniprot_globins_match.out" lines_diff="20">
+          <expand macro="assert_out" tool="hmmsearch"/>
+      </output>
+    </test>
+  </tests>
+  <help><![CDATA[
+@HELP_PRE@
+
+hmmsearch is used to search one or more profiles against a sequence database.
+For each profile in <hmmfile>, use that query profile to search the target
+database of sequences in <seqdb>, and output ranked lists of the sequences with
+the most significant matches to the profile. To build profiles from multiple
+alignments, see hmmbuild.
+
+@HELP_PRE_OTH@
+
+@OFORMAT_WITH_OPTS_HELP@
+@THRESHOLDS_HELP@
+@CUT_HELP@
+@ACCEL_HEUR_HELP@
+@ADV_OPTS_HELP@
+@SEED_HELP@
+
+@ATTRIBUTION@
+  ]]></help>
+  <expand macro="citation"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jackhmmer.xml.orig	Tue Aug 31 08:43:59 2021 +0000
@@ -0,0 +1,116 @@
+<?xml version="1.0"?>
+<<<<<<< HEAD
+<tool id="hmmer_jackhmmer" name="jackhmmer" version="@TOOL_VERSION@+galaxy1">
+  <description>iteratively search a protein sequence against a protein database (PSIBLAST-like)</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <expand macro="stdio"/>
+  <command><![CDATA[
+@ADDTHREADS@
+=======
+<tool id="hmmer_jackhmmer" name="jackhmmer" version="@TOOL_VERSION@">
+    <description>iteratively search a protein sequence against a protein database (PSIBLAST-like)</description>
+    <expand macro="bio_tools"/>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+>>>>>>> c37d72558 (add more bio.tool IDs)
+jackhmmer
+-N $N
+@OFORMAT_WITH_OPTS@
+@HSSI@
+@THRESHOLDS@
+@ACCEL_HEUR@
+@ARSWS@
+@AEEWS@
+@PRIOR@
+
+@EVAL_CALIB@
+@ADV_OPTS@
+@CPU@
+@SEED@
+
+'$seqfile'
+'$seqdb'
+> '$output'
+  ]]></command>
+  <inputs>
+    <param name="seqfile" type="data" format="fasta" label="Protein sequence to search with"/>
+    <!-- todo use Galaxy features like data libraries/data tables/??? -->
+    <param name="N" type="integer" value="5" min="1" label="Maximum number of iterations" help="(-N)"/>
+    <param name="seqdb" type="data" format="fasta" label="Sequence Database"/>
+    <expand macro="oformat_with_opts_dom"/>
+    <expand macro="hssi"/>
+    <expand macro="thresholds_xml"/>
+    <expand macro="accel_heur_xml"/>
+    <expand macro="arsws"/>
+    <expand macro="aeews"/>
+    <expand macro="prior"/>
+    <expand macro="eval_calib_xml"/>
+    <expand macro="adv_opts"/>
+    <expand macro="seed"/>
+  </inputs>
+  <outputs>
+    <expand macro="output_dom_pfam" tool="JACKHMMER"/>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="3">
+      <param name="seqfile" value="uniprot_matches.fasta"/>
+      <param name="seqdb" value="globins45.fa"/>
+      <expand macro="oformat_test" />
+      <expand macro="seed_test" />
+      <output name="output" file="jackhmmer.out" lines_diff="180">
+          <expand macro="assert_out" tool="jackhmmer"/>
+      </output>
+      <output name="domtblout" file="jackhmmer.domtblout" lines_diff="10">
+          <expand macro="assert_tblout" tool="jackhmmer"/>
+      </output>
+      <output name="tblout" file="jackhmmer.tblout" lines_diff="10">
+          <expand macro="assert_tblout" tool="jackhmmer"/>
+      </output>
+    </test>
+    <test expect_num_outputs="1">
+      <param name="seqfile" value="uniprot_matches.fasta"/>
+      <param name="seqdb" value="globins45.fa"/>
+      <expand macro="oformat_test" />
+      <param name="oformat" value=""/>
+      <expand macro="seed_test" />
+      <output name="output" file="jackhmmer.out" lines_diff="180">
+          <expand macro="assert_out" tool="jackhmmer"/>
+      </output>
+    </test>
+  </tests>
+  <help><![CDATA[
+@HELP_PRE@
+
+jackhmmer iteratively searches each query sequence in <seqfile> against the
+target sequence(s) in <seqdb>. The first iteration is identical to a phmmer
+search. For the next iteration, a multiple alignment of the query together
+with all target sequences satisfying inclusion thresholds is assembled, a
+profile is constructed from this alignment (identical to using hmmbuild on the
+alignment), and profile search of the <seqdb> is done (identical to an
+hmmsearch with the profile).
+
+
+@HELP_PRE_OTH@
+
+@OFORMAT_WITH_OPTS_HELP_NOPFAM@
+@HSSI_HELP@
+@THRESHOLDS_HELP@
+@ACCEL_HEUR_HELP@
+@ARSWS_HELP@
+@AEEWS_HELP@
+@PRIOR_HELP@
+@EVAL_CALIB_HELP@
+@ADV_OPTS_HELP@
+@SEED_HELP@
+
+@ATTRIBUTION@
+  ]]></help>
+  <expand macro="citation"/>
+</tool>
--- a/macros.xml	Wed Jul 21 14:06:36 2021 +0000
+++ b/macros.xml	Tue Aug 31 08:43:59 2021 +0000
@@ -6,6 +6,11 @@
       <yield/>
     </requirements>
   </xml>
+  <xml name="bio_tools">
+      <xrefs>
+          <xref type="bio.tools">hmmer3</xref>
+      </xrefs>
+  </xml>
   <token name="@TOOL_VERSION@">3.3.2</token>
   <xml name="stdio">
     <stdio>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml.orig	Tue Aug 31 08:43:59 2021 +0000
@@ -0,0 +1,1126 @@
+<?xml version="1.0"?>
+<macros>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="@TOOL_VERSION@">hmmer</requirement>
+      <yield/>
+    </requirements>
+  </xml>
+<<<<<<< HEAD
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">gemini</xref>
+        </xrefs>
+    </xml>
+=======
+  <xml name="bio_tools">
+      <xrefs>
+          <xref type="bio.tools">hmmer3</xref>
+      </xrefs>
+  </xml>
+>>>>>>> 5f65381bd (fix id of hmmer3)
+  <token name="@TOOL_VERSION@">3.3.2</token>
+  <xml name="stdio">
+    <stdio>
+      <!-- Anything other than zero is an error -->
+      <exit_code range="1:"/>
+      <exit_code range=":-1"/>
+      <!-- In case the return code has not been set propery check stderr too -->
+      <regex match="Error:"/>
+      <regex match="Exception:"/>
+    </stdio>
+  </xml>
+  <token name="@THRESHOLDS@">
+-E $E
+--domE $domE
+
+#if str($T):
+    -T $T
+#end if
+
+#if str($domT):
+    --domT $domT
+#end if
+
+#if str($incE):
+    --incE $incE
+#end if
+
+#if str($incdomE):
+    --incdomE $incdomE
+#end if
+
+#if str($incT):
+    --incT $incT
+#end if
+
+#if str($incdomT):
+    --incdomT $incdomT
+#end if
+  </token>
+  <xml name="thresholds_xml">
+    <!-- Options controlling reporting thresholds -->
+    <param argument="-E" type="float" min="0" value="10.0" label="report sequences &lt;= this E-Value threshold in output" />
+    <param argument="--domE" type="float" min="0" value="10.0" label="report domains &lt;= this E-Value threshold in output" />
+    <param argument="-T" type="float" optional="true" label="report sequences &gt;= this score threshold in output" />
+    <param argument="--domT" type="float" optional="true" label="report domains &gt;= this score threshold in output" />
+    <!-- Options controlling inclusion (significance) thresholds -->
+    <param argument="--incE" type="float" optional="true" label="consider sequences &lt;= this E-Value threshold as significant" />
+    <param argument="--incdomE" type="float" optional="true" label="consider domains &lt;= this E-Value threshold as significant" />
+    <param argument="--incT" type="float" optional="true" label="consider sequences &gt;= this score threshold as significant" />
+    <param argument="--incdomT" type="float" optional="true" label="consider domains &gt;= this score threshold as significant" />
+  </xml>
+  <token name="@THRESHOLDS_NODOM@">
+-E $E
+
+#if str($T):
+    -T $T
+#end if
+
+#if str($incE):
+    --incE $incE
+#end if
+
+#if str($incT):
+    --incT $incT
+#end if
+  </token>
+  <xml name="thresholds_nodom">
+    <!-- Options controlling reporting thresholds -->
+    <param argument="-E" type="float" min="0" value="10.0" label="report sequences &lt;= this E-Value threshold in output" />
+    <param argument="-T" type="float" optional="true" label="report sequences &gt;= this score threshold in output" />
+    <!-- Options controlling inclusion (significance) thresholds -->
+    <param argument="--incE" type="float" optional="true" label="consider sequences &lt;= this E-Value threshold as significant" />
+    <param argument="--incT" type="float" optional="true" label="consider sequences &gt;= this score threshold as significant" />
+  </xml>
+  <token name="@ACCEL_HEUR@">
+$max
+--F1 $F1
+--F2 $F2
+--F3 $F3
+$nobias
+  </token>
+  <xml name="accel_heur_xml">
+    <!-- Options controlling acceleration heuristics -->
+    <param argument="--max" type="boolean" truevalue="--max" falsevalue="" label="Turn all heuristic filters off (less speed, more power)" />
+    <param argument="--F1" type="float" value="0.02" label="Stage 1 (MSV) threshold: promote hits w/ P &lt;= F1" />
+    <param argument="--F2" type="float" value="1e-3" label="Stage 2 (Vit) threshold: promote hits w/ P &lt;= F2" />
+    <param argument="--F3" type="float" value="1e-5" label="Stage 3 (Fwd) threshold: promote hits w/ P &lt;= F3" />
+    <param argument="--nobias" type="boolean" truevalue="--nobias" falsevalue="" label="Turn off composition bias filter" />
+  </xml>
+  <token name="@EVAL_CALIB@">
+--EmL $EmL
+--EmN $EmN
+--EvL $EvL
+--EvN $EvN
+--EfL $EfL
+--EfN $EfN
+--Eft $Eft
+  </token>
+  <xml name="eval_calib_xml">
+    <!-- Control of E-value calibration -->
+    <param argument="--EmL" type="integer" min="1" value="200" label="Length of sequences for MSV Gumbel mu fit" />
+    <param argument="--EmN" type="integer" min="1" value="200" label="Number of sequences for MSV Gumbel mu fit" />
+    <param argument="--EvL" type="integer" min="1" value="200" label="Length of sequences for Viterbi Gumbel mu fit" />
+    <param argument="--EvN" type="integer" min="1" value="200" label="Number of sequences for Viterbi Gumbel mu fit" />
+    <param argument="--EfL" type="integer" min="1" value="100" label="Length of sequences for Forward exp tail tau fit" />
+    <param argument="--EfN" type="integer" min="1" value="200" label="Number of sequences for Forward exp tail tau fit" />
+    <param argument="--Eft" type="float" min="0" max="1" value="0.04" label="tail mass for Forward exponential tail tau fit" />
+  </xml>
+  <token name="@OFORMAT_WITH_OPTS@">
+#if $oformat:
+    #for o in str($oformat).split(','):
+        --$o '$getVar($o, 'MISSING_OUTPUT'+$o)'
+    #end for
+#end if
+$acc $noali $notextw
+  </token>
+  <xml name="oformat_with_opts">
+    <!-- Options directing output -->
+    <param name="oformat" type="select" multiple="true" display="checkboxes" label="Output Formats">
+      <option value="tblout" selected="true">Table of per-sequence hits (--tblout)</option>
+      <yield/>
+    </param>
+    <param argument="--acc" type="boolean" truevalue="--acc" falsevalue="" label="Prefer accessions over names in output" />
+    <param argument="--noali" type="boolean" truevalue="--noali" falsevalue="" label="Don't output alignments, so output is smaller" />
+    <param argument="--notextw" type="boolean" truevalue="--notextw" falsevalue="" label="Unlimited ASCII text output line width" />
+  </xml>
+
+  <xml name="oformat_with_opts_dom">
+    <expand macro="oformat_with_opts">
+      <option value="domtblout" selected="true">Table of per-domain hits (--domtblout)</option>
+      <yield/>
+    </expand>
+  </xml>
+
+  <xml name="oformat_with_opts_dom_pfam">
+    <expand macro="oformat_with_opts_dom">
+      <option value="pfamtblout" selected="true">Table of hits and domains in Pfam format (--pfamtblout)</option>
+    </expand>
+  </xml>
+
+  <xml name="oformat_with_opts_dfam_alisc">
+    <!-- Options directing output -->
+    <expand macro="oformat_with_opts">
+      <option value="dfamtblout" selected="true">Table of hits in Dfam format (--dfamtblout)</option>
+      <option value="aliscoresout">Scores for each position in each alignment to file (--aliscoresout)</option>
+    </expand>
+  </xml>
+
+  <xml name="output" token_tool="">
+    <data name="output" format="txt" label="@TOOL@ on ${on_string}"/>
+    <data name="tblout" format="txt" label="@TOOL@ on ${on_string}: per-sequence hits from HMM matches">
+      <filter>oformat and 'tblout' in oformat</filter>
+    </data>
+    <yield/>
+  </xml>
+  <xml name="output_dom" token_tool="">
+    <expand macro="output" tool="@TOOL@">
+      <data name="domtblout" format="txt" label="@TOOL@ on ${on_string}: per-domain hits from HMM matches">
+        <filter>oformat and 'domtblout' in oformat</filter>
+      </data>
+    </expand>
+    <yield/>
+  </xml>
+  <xml name="output_dom_pfam" token_tool="">
+    <expand macro="output_dom" tool="@TOOL@">
+      <data name="pfamtblout" format="txt" label="@TOOL@ on ${on_string}: per-sequence/per-domain hits from HMM matches">
+        <filter>oformat and 'pfamtblout' in oformat</filter>
+      </data>
+    </expand>
+  </xml>
+  <xml name="output_dfam_alisc" token_tool="" token_ofvar="seqfile" token_invar="seqdb">
+    <expand macro="output" tool="@TOOL@">
+      <data name="dfamtblout" format="txt" label="@TOOL@ on ${on_string}: per-sequence/per-domain hits from HMM matches">
+        <filter>oformat and 'dfamtblout' in oformat</filter>
+      </data>
+      <data name="aliscoresout" format="txt" label="@TOOL@ on ${on_string}: scores for positional matches">
+        <filter>oformat and 'aliscoresout' in oformat</filter>
+      </data>
+    </expand>
+  </xml>
+
+  <xml name="assert_out" token_tool="">
+    <assert_contents>
+      <has_line_matching expression="# @TOOL@.*"/>
+      <has_line_matching expression="\[ok\]"/>
+    </assert_contents>
+  </xml>
+
+  <xml name="assert_tblout" token_tool="">
+    <assert_contents>
+      <has_line_matching expression="# Program:         @TOOL@"/>
+      <has_line_matching expression="# \[ok\]"/>
+    </assert_contents>
+  </xml>
+
+  <xml name="oformat_test">
+      <param name="notextw" value="true" />
+  </xml>
+  <token name="@HSSI@">
+#if $hssi.hssi_select == "singlemx":
+    --popen $hssi.popen
+    --pextend $hssi.pextend
+#end if
+  </token>
+  <xml name="hssi">
+    <!-- Handling single sequence inputs -->
+    <conditional name="hssi">
+      <param name="hssi_select" type="select" label="Options for handling single sequence inputs">
+        <option value="false" selected="true">Disable</option>
+        <option value="singlemx">Use substitution score matrix for single-sequence inputs</option>
+      </param>
+      <when value="false" />
+      <when value="singlemx">
+        <param argument="--popen" type="float" min="0.0" max="0.5" value="0.02" label="Gap open probability" />
+        <param argument="--pextend" type="float" min="0.0" max="1.0" value="0.4" label="Gap extend probability" />
+      </when>
+      <!-- -mx <s>      : substitution score matrix (built-in matrices, with -singlemx)-->
+      <!-- -mxfile <f>  : read substitution score matrix from file <f> (with -singlemx)-->
+    </conditional>
+  </xml>
+  <token name="@ADDTHREADS@"><![CDATA[
+        ##compute the number of ADDITIONAL threads to be used (--cpu)
+        addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) &&
+    ]]></token>
+  <token name="@CPU@">
+      --cpu \$addthreads
+  </token>
+  <token name="@SEED@">
+      --seed $seed
+  </token>
+  <xml name="seed">
+    <param argument="--seed" type="integer" min="0" value="42" label="RNG seed, 0 generates a random seed" />
+  </xml>
+  <xml name="seed_test">
+      <param name="seed" value="4" />
+  </xml>
+  <token name="@ADV_OPTS@">
+$nonull2
+
+#if str($Z):
+    -Z $Z
+#end if
+
+#if str($domZ):
+    --domZ $domZ
+#end if
+  </token>
+  <xml name="adv_opts">
+    <!-- Other options -->
+    <param argument="--nonull2" type="boolean" truevalue="--nonull2" falsevalue="" label="Turn off biased composition score corrections" />
+    <param argument="-Z" type="integer" optional="true" label="# of comparisons done for E-value calculation" />
+    <param argument="--domZ" type="integer" optional="true" label="# of significant sequences, for domain E-value calculation" />
+  </xml>
+  <token name="@FORMAT_SELECTOR@">
+      $input_format_select
+  </token>
+  <xml name="format_selector">
+    <param name="input_format_select" type="select" label="Format of sequence and model">
+      <option value="--amino">Protein</option>
+      <option value="--dna">DNA</option>
+      <option value="--rna">RNA</option>
+    </param>
+  </xml>
+  <xml name="format_selector_noprot">
+    <param name="input_format_select" type="select" label="Format of sequence and model">
+      <option value="--dna">DNA</option>
+      <option value="--rna">RNA</option>
+    </param>
+  </xml>
+  <token name="@ARSWS@">
+$arsws.arsws_select
+
+#if $arsws.arsws_select == "--wblosum":
+    --wid $arsws.wid
+#end if
+  </token>
+  <xml name="arsws">
+    <!-- Alternative relative sequence weighting strategies -->
+    <conditional name="arsws">
+      <param name="arsws_select" type="select" label="Alternative relative sequence weighting strategies">
+        <option value="--wpb" selected="true">Henikoff position-based weights (--wpb)</option>
+        <option value="--wgsc">Gerstein/Sonnhammer/Chothia tree weights (--wgsc)</option>
+        <option value="--wblosum">Henikoff simple filter weights (--wblosum)</option>
+        <option value="--wnone">don't do any relative weighting; set all to 1 (--wnnoe)</option>
+        <option value="--wgiven">use weights as given in MSA file (--wgiven)</option>
+      </param>
+      <when value="--wpb">
+      </when>
+      <when value="--wgsc">
+      </when>
+      <when value="--wblosum">
+        <param argument="--wid" type="float" value="0.62" label="Set identity cutoff" />
+      </when>
+      <when value="--wnone">
+      </when>
+      <when value="--wgiven">
+      </when>
+    </conditional>
+  </xml>
+  <token name="@AEEWS@">
+#if $aeews.aeews_select != "":
+--$aeews.aeews_select
+    #if $aeews.aeews_select == "eent":
+        --eset $aeews.eset
+        --ere $aeews.ere
+        --esigma $aeews.esigma
+    #elif $aeews.aeews_select == "eclust":
+        --eset $aeews.eset
+        --eid $aeews.eid
+    #end if
+#end if
+  </token>
+  <xml name="aeews">
+    <!-- Alternative effective sequence weighting strategies -->
+    <conditional name="aeews">
+      <param name="aeews_select" type="select" label="Alternative effective sequence weighting strategies">
+        <option value="">Disabled</option>
+        <option value="eent">Adjust eff seq # to achieve relative entropy target (--eent)</option>
+        <option value="eclust">Eff seq # is the # of single linkage clusters (--eclust)</option>
+        <option value="enone">No effective seq # weighting: just use nseq (--enone)</option>
+      </param>
+      <when value="">
+      </when>
+      <when value="eent">
+        <param argument="--eset" type="float" value="0" label="set eff seq # for all models" />
+        <param argument="--ere" type="float" value="0" label="set minimum rel entropy/position" />
+        <param argument="--esigma" type="float" value="45" label="set sigma param" />
+      </when>
+      <when value="eclust">
+        <param argument="--eset" type="float" value="0" label="set eff seq # for all models" />
+        <param argument="--eid" type="float" min="0" max="1" value="0.62" label="set fractional identity cutoff" />
+      </when>
+      <when value="enone">
+      </when>
+    </conditional>
+  </xml>
+  <token name="@CUT@">
+$cut_ga
+$cut_nc
+$cut_tc
+  </token>
+  <xml name="cut">
+    <param argument="--cut_ga" type="boolean" truevalue="--cut_ga" falsevalue="" label="use profile's GA gathering cutoffs to set all thresholding" />
+    <param argument="--cut_nc" type="boolean" truevalue="--cut_nc" falsevalue="" label="use profile's NC gathering cutoffs to set all thresholding" />
+    <param argument="--cut_tc" type="boolean" truevalue="--cut_tc" falsevalue="" label="use profile's TC gathering cutoffs to set all thresholding" />
+  </xml>
+  <token name="@MCSS@">
+--$mcs.model_construction_strategy_select
+
+#if $mcs.model_construction_strategy_select == "fast":
+    --symfrac $mcs.symfrac
+#end if
+#if str($fragthresh)
+    --fragthresh $fragthresh
+#end if
+  </token>
+  <xml name="mcss">
+    <!-- Alternative model construction strategies -->
+    <conditional name="mcs">
+      <param name="model_construction_strategy_select" type="select" label="Model Construction Strategy">
+        <option value="fast" selected="true">Assign columns with &gt;= symfrac residues as consensus (--fast)</option>
+        <option value="hand">Manual construction (requires reference annotation) (--hand)</option>
+      </param>
+      <when value="fast">
+        <param argument="--symfrac" value="0.5" type="float" label="Sets sym fraction controlling --fast construction"/>
+      </when>
+      <when value="hand"></when>
+    </conditional>
+    <param argument="--fragthresh" type="float" value="0.5" optional="true" label="Fraction of alignment length, under which sequences are excluded" help="HMMER infers fragments if the sequence length L is less than or equal to a fraction x times the alignment length in columns" />
+  </xml>
+  <token name="@PRIOR@">
+$aps_select
+  </token>
+  <xml name="prior">
+    <param name="aps_select" type="select" label="Alternative Prior Strategies">
+      <option value="" selected="true">Unspecified</option>
+      <option value="--pnone">Don't use any prior; parameters are frequencies (--pnone)</option>
+      <option value="--plaplace">Use a Laplace +1 prior (--plaplace)</option>
+    </param>
+  </xml>
+  <xml name="citation">
+    <citations>
+      <citation type="doi">10.1093/nar/gkr367</citation>
+    </citations>
+  </xml>
+  <token name="@LENGTHS@">
+#if str($w_beta):
+    --w_beta $w_beta
+#end if
+
+#if str($w_length):
+    --w_length $w_length
+#end if
+  </token>
+  <xml name="lengths">
+    <param argument="--w_beta" type="float" optional="true" label="Tail mass at which window length is determined" />
+    <param argument="--w_length" type="integer" optional="true" label="Window Length" />
+  </xml>
+  <token name="@INPUTHMMCHOICE@"><![CDATA[
+#if $input_hmm_conditional.input_hmm_source == "history":
+    #set $input_hmm_filename = "localref.hmm"
+    ln -s '${input_hmm_conditional.hmmfile}' '${input_hmm_filename}' &&
+    ## "Press" database
+    hmmpress '${input_hmm_filename}' &&
+#else:
+    #set $input_hmm_filename = str($input_hmm_conditional.index.fields.db_path)
+#end if
+  ]]></token>
+  <xml name="input_hmm_choice">
+    <conditional name="input_hmm_conditional">
+      <param name="input_hmm_source" type="select" label="Use a built-in HMM model database or own from your history" >
+        <option value="indexed" selected="true">Use a built-in HMM model database</option>
+        <option value="history">Use a HMM database from history</option>
+      </param>
+      <when value="indexed">
+        <param name="index" type="select" label="Select a HMM model database" help="If your database of interest is not listed, contact the Galaxy administrator">
+          <options from_data_table="hmm_database">
+            <filter type="sort_by" column="2"/>
+            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+          </options>
+        </param>
+      </when>
+      <when value="history">
+        <param name="hmmfile" type="data" format="hmm2,hmm3" label="HMM model" />
+      </when>  <!-- history -->
+    </conditional>  <!-- input_hmm_conditional -->
+  </xml>
+  <xml name="input_hmm">
+    <param name="hmmfile" type="data" format="hmm2,hmm3" label="HMM model" />
+  </xml>
+  <xml name="input_msa">
+    <param name="msafile" type="data" label="Multiple Sequence Alignment" format="stockholm,clustal,fasta"
+      help="in Stockholm, Clustal, or Fasta format. While this tool accepts fasta, please ensure that the sequences are not unaligned"/>
+  </xml>
+
+
+  <token name="@ACCEL_HEUR_HELP@"><![CDATA[
+Acceleration Heuristicts (--F1, --F2, --F3)
+-------------------------------------------
+
+**MSV filter**
+
+The sequence is aligned to the profile using a specialized model that
+allows multiple high-scoring local ungapped segments to match. The
+optimal alignment score (Viterbi score) is calculated under this multi-
+segment model, hence the term MSV, for “multi-segment Viterbi”. This is
+HMMER’s main speed heuristic. The MSV score is comparable to BLAST’s sum
+score (optimal sum of ungapped alignment segments). Roughly speaking,
+MSV is comparable to skipping the heuristic word hit and hit extension
+steps of the BLAST acceleration algorithm.
+
+The MSV filter is very, very fast. In addition to avoiding indel
+calculations in the dynamic programming table, it uses reduced precision
+scores scaled to 8-bit integers, enabling acceleration via 16-way
+parallel SIMD vector instructions.
+
+The MSV score is a true log-odds likelihood ratio, so it obeys
+conjectures about the expected score distribution (Eddy, 2008) that
+allow immediate and accurate calculation of the statistical significance
+(P- value) of the MSV bit score.
+
+By default, comparisons with a P-value of ≤ 0.02 pass this filter,
+meaning that about 2% of nonhomol- ogous sequences are expected to pass.
+You can use the --F1 option to change this threshold. For example, --F1
+<0.05> would pass 5% of the comparisons, making a search more sensitive
+but slower. Setting the threshold to ≥ 1.0 (--F1 99 for example) assures
+that all comparisons will pass. Shutting off the MSV filter may be
+worthwhile if you want to make sure you don’t miss comparisons that have
+a lot of scattered insertions and deletions. Alternatively, the --max
+option causes the MSV filter step (and all other filter steps) to be
+bypassed.
+
+The MSV bit score is calculated as a log-odds score using the null model
+for comparison. No correction for a biased composition or repetitive
+sequence is done at this stage. For comparisons involving biased
+sequences and/or profiles, more than 2% of comparisons will pass the MSV
+filter. At the end of search output, there is a line like:
+
+    Passed MSV filter: 107917 (0.020272); expected 106468.8 (0.02)
+
+which tells you how many and what fraction of comparisons passed the MSV
+filter, versus how many (and what fraction) were expected.
+
+**Viterbi filter**
+
+The sequence is now aligned to the profile using a fast Viterbi algorithm for
+optimal gapped alignment.
+
+This Viterbi implementation is specialized for speed. It is implemented in
+8-way parallel SIMD vector instructions, using reduced precision scores that
+have been scaled to 16-bit integers. Only one row of the dynamic programming
+matrix is stored, so the routine only recovers the score, not the optimal
+alignment itself. The reduced representation has limited range; local alignment
+scores will not underflow, but high scoring comparisons can overflow and return
+infinity, in which case they automatically pass the filter.
+
+The final Viterbi filter bit score is then computed using the appropriate null
+model log likelihood (by default the biased composition filter model score, or
+if the biased filter is off, just the null model score). If the P-value of this
+score passes the Viterbi filter threshold, the sequence passes on to the next
+step of the pipeline.
+
+The --F2 <x> option controls the P-value threshold for passing the Viterbi
+filter score. The default is 0.001. The --max option bypasses all filters in
+the pipeline.  At the end of a search output, you will see a line like:
+
+    Passed Vit filter: 2207  (0.00443803); expected 497.3 (0.001)
+
+which tells you how many and what fraction of comparisons passed the Viterbi
+filter, versus how many were expected.
+
+**Forward filter/parser**
+
+The sequence is now aligned to the profile using the full Forward algorithm,
+which calculates the likelihood of the target sequence given the profile,
+summed over the ensemble of all possible alignments.
+
+This is a specialized time- and memory-efficient Forward implementation called
+the “Forward parser”. It is implemented in 4-way parallel SIMD vector
+instructions, in full precision (32-bit floating point). It stores just enough
+information that, in combination with the results of the Backward parser
+(below), posterior probabilities of start and stop points of alignments
+(domains) can be calculated in the domain definition step (below), although the
+detailed alignments themselves cannot be.
+
+The Forward filter bit score is calculated by correcting this score using the
+appropriate null model log likelihood (by default the biased composition filter
+model score, or if the biased filter is off, just the null model score). If the
+P-value of this bit score passes the Forward filter threshold, the sequence
+passes on to the next step of the pipeline.
+
+The bias filter score has no further effect in the pipeline. It is only used in
+filter stages. It has no effect on final reported bit scores or P-values.
+Biased composition compensation for final bit scores is done by a more complex
+domain-specific algorithm, described below.
+
+The --F3 <x> option controls the P-value threshold for passing the Forward
+filter score. The default is 1e-5. The --max option bypasses all filters in the
+pipeline.  At the end of a search output, you will see a line like:
+
+    Passed Fwd filter: 1076 (0.00216371); expected 5.0 (1e-05)
+
+which tells you how many and what fraction of comparisons passed the Forward
+filter, versus how many were expected.
+
+**Bias Filter Options**
+
+The --max option bypasses all filters in the pipeline, including the bias
+filter.
+
+The --nobias option turns off (bypasses) the biased composition filter. The
+simple null model is used as a null hypothesis for MSV and in subsequent filter
+steps. The biased composition filter step compromises a small amount of
+sensitivity. Though it is good to have it on by default, you may want to shut
+it off if you know you will have no problem with biased composition hits.
+
+
+**Advanced Documentation**
+
+A more detailed look at the internals of the various filter pipelines was
+posted on the `developer's blog <http://cryptogenomicon.org/hmmer3-is-stubborn.html>`__.
+The information posted there may be useful to those who are struggling with
+poor-scoring sequences.
+]]></token>
+  <token name="@ADV_OPTS_HELP@"><![CDATA[
+Advanced Options
+----------------
+
+**nonull2**
+
+can be too aggressive sometimes, causing you to miss homologs. You can turn the
+biased-composition score correction off with the --nonull2 option (and if
+you’re doing that, you may also want to set --nobias, to turn off another
+biased composition step called the bias filter, which affects which sequences
+get scored at all).
+
+**domZ**
+
+Assert that the total number of targets in your searches is <x>, for the
+purposes of per-domain conditional E-value calculations, rather than the number
+of targets that passed the reporting thresholds.
+
+**Z**
+
+Assert that the total number of targets in your searches is <x>, for the
+purposes of per-sequence E-value calculations, rather than the actual number of
+targets seen.
+]]></token>
+  <token name="@AEEWS_HELP@"><![CDATA[
+Effective Sequence Number
+-------------------------
+
+After relative weights are determined, they are normalized to sum to a total
+effective sequence number, eff nseq. This number may be the actual number of
+sequences in the alignment, but it is almost always smaller than that. The
+default entropy weighting method (--eent) reduces the effective sequence num-
+ber to reduce the information content (relative entropy, or average expected
+score on true homologs) per consensus position. The target relative entropy is
+controlled by a two-parameter function, where the two parameters are settable
+with --ere and --esigma.
+
+**--eent**
+
+Adjust effective sequence number to achieve a specific relative entropy per
+position (see --ere). This is the default.
+
+**--eclust**
+
+Set effective sequence number to the number of single-linkage clusters at a
+specific identity threshold (see --eid). This option is not recommended; it’s
+for experiments evaluating how much better --eent is.
+
+**--enone**
+
+Turn off effective sequence number determination and just use the actual number
+of sequences. One reason you might want to do this is to try to maximize the
+relative entropy/position of your model, which may be useful for short models.
+
+**--eset**
+
+Explicitly set the effective sequence number for all models to <x>.
+
+**--ere**
+
+Set the minimum relative entropy/position target to <x>. Requires --eent. Default
+depends on the sequence alphabet. For protein sequences, it is 0.59 bits/position;
+for nucleotide sequences, it is 0.45 bits/position.
+
+**--esigma**
+
+Sets the minimum relative entropy contributed by an entire model alignment, over
+its whole length. This has the effect of making short models have higher relative
+entropy per position than --ere alone would give. The default is 45.0 bits.
+
+**--eid**
+
+Sets the fractional pairwise identity cutoff used by single linkage clustering
+with the --eclust option. The default is 0.62.
+]]></token>
+  <token name="@ARSWS_HELP@"><![CDATA[
+Options Controlling Relative Weights
+------------------------------------
+
+HMMER uses an ad hoc sequence weighting algorithm to downweight closely related
+sequences and up-weight distantly related ones. This has the effect of making
+models less biased by uneven phylogenetic representation. For example, two
+identical sequences would typically each receive half the weight that one
+sequence would. These options control which algorithm gets used.
+
+
+**--wpb**
+
+Use the Henikoff position-based sequence weighting scheme [Henikoff and
+Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default.
+
+**--wgsc**
+
+Use the Gerstein/Sonnhammer/Chothia weighting algorithm [Gerstein et al, J.
+Mol. Biol. 235:1067, 1994].
+
+**--wblosum**
+
+Use the same clustering scheme that was used to weight data in calculating
+BLOSUM subsitution matrices [Henikoff and Henikoff, Proc. Natl. Acad. Sci
+89:10915, 1992]. Sequences are single-linkage clustered at an identity
+threshold (default 0.62; see --wid) and within each cluster of c sequences,
+each sequence gets rela- tive weight 1/c.
+
+**--wnone**
+
+No relative weights. All sequences are assigned uniform weight.
+
+**--wid**
+
+Sets the identity threshold used by single-linkage clustering when using
+--wblosum.  Invalid with any other weighting scheme. Default is 0.62.
+]]></token>
+  <token name="@BIAS_COMP_HELP@"><![CDATA[
+Bias Composition
+----------------
+
+The next number, the bias, is a correction term for biased sequence composition
+that has been applied to the sequence bit score.1 For instance, for the top hit
+MYG PHYCA that scored 222.7 bits, the bias of 3.2 bits means that this sequence
+originally scored 225.9 bits, which was adjusted by the slight 3.2 bit biased-
+composition correction. The only time you really need to pay attention to the
+bias value is when it’s large, on the same order of magnitude as the sequence
+bit score. Sometimes (rarely) the bias correction isn’t aggressive enough, and
+allows a non-homolog to retain too much score.  Conversely, the bias correction
+can be too aggressive sometimes, causing you to miss homologs. You can turn the
+biased-composition score correction off with the --nonull2 option (and if
+you’re doing that, you may also want to set --nobias, to turn off another
+biased composition step called the bias filter, which affects which sequences
+get scored at all).
+
+]]></token>
+  <token name="@CUT_HELP@"><![CDATA[
+Options for Model-specific Score Thresholding
+---------------------------------------------
+
+Curated profile databases may define specific bit score thresholds for each
+profile, superseding any thresholding based on statistical significance alone.
+To use these options, the profile must contain the appropriate (GA, TC, and/or
+NC) optional score threshold annotation; this is picked up by hmmbuild from
+Stockholm format alignment files. Each thresholding option has two scores: the
+per-sequence threshold <x1> and the per-domain threshold <x2> These act as if
+-T<x1> --incT<x1> --domT<x2> --incdomT<x2> has been applied specifically using
+each model’s curated thresholds.
+
+**--cut_ga**
+
+Use the GA (gathering) bit scores in the model to set per-sequence (GA1) and
+per-domain (GA2) reporting and inclusion thresholds. GA thresholds are
+generally considered to be the reliable curated thresholds defining family
+membership; for example, in Pfam, these thresholds define what gets included in
+Pfam Full alignments based on searches with Pfam Seed models.
+
+**--cut_nc**
+
+Use the NC (noise cutoff) bit score thresholds in the model to set
+per-sequence (NC1) and per-domain (NC2) reporting and inclusion thresholds. NC
+thresholds are generally considered to be the score of the highest-scoring
+known false positive.
+
+**--cut_tc**
+
+Use the NC (trusted cutoff) bit score thresholds in the model to set
+per-sequence (TC1) and per-domain (TC2) reporting and inclusion thresholds. TC
+thresholds are generally considered to be the score of the lowest-scoring known
+true positive that is above all known false positives.
+]]></token>
+  <token name="@EVAL_CALIB_HELP@"><![CDATA[
+Options Controlling H3 Parameter Estimation Methods
+---------------------------------------------------
+
+H3 uses three short random sequence simulations to estimating the location
+parameters for the expected score distributions for MSV scores, Viterbi scores,
+and Forward scores. These options allow these simulations to be modified.
+
+**--EmL**
+
+Sets the sequence length in simulation that estimates the location parameter mu
+for MSV E-values. Default is 200.
+
+**--EmN**
+
+Sets the number of sequences in simulation that estimates the location parameter
+mu for MSV E-values. Default is 200.
+
+**--EvL**
+
+Sets the sequence length in simulation that estimates the location parameter mu
+for Viterbi E-values. Default is 200.
+
+**--EvN**
+
+Sets the number of sequences in simulation that estimates the location parameter
+mu for Viterbi E-values. Default is 200.
+
+
+**--EfL**
+
+Sets the sequence length in simulation that estimates the location parameter tau
+for Forward E-values. Default is 100.
+
+**--EfN**
+
+Sets the number of sequences in simulation that estimates the location parameter
+tau for Forward E-values. Default is 200.
+
+**--Eft**
+
+Sets the tail mass fraction to fit in the simulation that estimates the location param-
+eter tau for Forward evalues. Default is 0.04.
+]]></token>
+  <token name="@FORMAT_SELECTOR_HELP@"><![CDATA[
+Options for Specifying the Alphabet
+-----------------------------------
+
+The alphabet type (amino, DNA, or RNA) is autodetected by default, by looking
+at the composition of the msafile. Autodetection is normally quite reliable,
+but occasionally alphabet type may be ambiguous and autodetection can fail (for
+instance, on tiny toy alignments of just a few residues). To avoid this, or to
+increase robustness in automated analysis pipelines, you may specify the
+alphabet type of msafile with these options.
+]]></token>
+  <token name="@HSSI_HELP@"><![CDATA[
+Options Controlling Single Sequence Scoring (first Iteration)
+-------------------------------------------------------------
+
+By default, the first iteration uses a search model constructed from a single
+query sequence. This model is constructed using a standard 20x20 substitution
+matrix for residue probabilities, and two additional pa- rameters for
+position-independent gap open and gap extend probabilities. These options allow
+the default single-sequence scoring parameters to be changed.
+
+**Gap Open (--popen)**
+
+Set the gap open probability for a single sequence query model to <x>
+
+**Gap Extend (--pextend)**
+
+Set the gap extend probability for a single sequence query model to <x>.
+
+
+**--mx/--mxfile**
+
+These options are not currently supported
+]]></token>
+  <token name="@LENGTHS_HELP@"><![CDATA[
+Tail Mass Options
+-----------------
+
+**Window length tail mass (--w_beta)**
+
+The upper bound, W, on the length at which nhmmer expects to find an instance
+of the model is set such that the fraction of all sequences generated by the
+model with length >= W is less than <x>. The default is 1e-7.
+
+
+**Model instance length upper bound (--w length)**
+
+Override the model instance length upper bound, W, which is otherwise
+controlled by --w beta. It should be larger than the model length. The value of
+W is used deep in the acceleration pipeline, and modest changes are not
+expected to impact results (though larger values of W do lead to longer run
+time).
+
+]]></token>
+  <token name="@MCSS_HELP@"><![CDATA[
+**Options Controlling Profile Construction**
+
+These options control how consensus columns are defined in an alignment.
+
+**--fast**
+
+Define consensus columns as those that have a fraction >= symfrac of residues
+as opposed to gaps. (See below for the --symfrac option.) This is the default.
+
+**--hand**
+
+Define consensus columns in next profile using reference annotation to the multiple
+alignment. This allows you to define any consensus columns you like.
+
+
+**--symfrac**
+
+Define the residue fraction threshold necessary to define a consensus column
+when using the --fast option. The default is 0.5. The symbol fraction in each
+column is calculated after taking relative sequence weighting into account, and
+ignoring gap characters corresponding to ends of sequence fragments (as opposed
+to internal insertions/deletions). Setting this to 0.0 means that every
+alignment column will be assigned as consensus, which may be useful in some
+cases. Setting it to 1.0 means that only columns that include 0 gaps (internal
+insertions/deletions) will be assigned as consensus.
+
+**--fragthresh**
+
+We only want to count terminal gaps as deletions if the aligned sequence is
+known to be full-length, not if it is a fragment (for instance, because only
+part of it was sequenced). HMMER uses a simple rule to infer fragments: if the
+sequence length L is less than or equal to a fraction <x> times the alignment
+length in columns, then the sequence is handled as a fragment. The default is
+0.5. Setting --fragthresh0 will define no (nonempty) sequence as a fragment;
+you might want to do this if you know you’ve got a carefully curated alignment
+of full-length sequences. Setting --fragthresh1 will define all sequences as
+fragments; you might want to do this if you know your alignment is entirely
+composed of fragments, such as translated short reads in metagenomic shotgun
+data.
+
+]]></token>
+  <token name="@OFORMAT_WITH_OPTS_HELP@"><![CDATA[
+Options for Controlling Output
+------------------------------
+
+**Table of hits**
+
+Save a simple tabular (space-delimited) file summarizing the per-target output, with
+one data line per homologous target model found.
+
+**Table of per-domain hits**
+
+Save a simple tabular (space-delimited) file summarizing the per-domain output,
+with one data line per homologous domain detected in a query sequence for each
+homologous model.
+
+**Table of hits and domains in Pfam Format**
+
+Save an especially succinct tabular (space-delimited) file summarizing the
+per-target output, with one data line per homologous target model found.
+]]></token>
+  <token name="@OFORMAT_WITH_OPTS_NOPFAM_HELP@"><![CDATA[
+Options for Controlling Output
+------------------------------
+
+**Table of hits**
+
+Save a simple tabular (space-delimited) file summarizing the per-target output, with
+one data line per homologous target model found.
+
+**Table of per-domain hits**
+
+Save a simple tabular (space-delimited) file summarizing the per-domain output,
+with one data line per homologous domain detected in a query sequence for each
+homologous model.
+]]></token>
+  <token name="@OFORMAT_WITH_OPTS_N_HELP@"><![CDATA[
+Options for Controlling Output
+------------------------------
+
+**Table of hits**
+
+Save a simple tabular (space-delimited) file summarizing the per-target output, with
+one data line per homologous target model found.
+
+**Table of hits (dfam)**
+
+Save a tabular (space-delimited) file summarizing the per-hit output, similar
+to --tblout but more succinct.
+
+
+**List of per-position scores for each hit (--aliscoreout)**
+
+Save to file a list of per-position scores for each hit. This is useful, for
+example, in identifying regions of high score density for use in resolving
+overlapping hits from different models.
+
+]]></token>
+  <token name="@PRIOR_HELP@"><![CDATA[
+Options Controlling Priors
+--------------------------
+
+By default, weighted counts are converted to mean posterior probability
+parameter estimates using mixture Dirichlet priors. Default mixture Dirichlet
+prior parameters for protein models and for nucleic acid (RNA and DNA) models
+are built in. The following options allow you to override the default priors.
+
+**No priors (--pnone)**
+
+Don’t use any priors. Probability parameters will simply be the observed
+frequencies, after relative sequence weighting.
+
+**Laplace +1 prior**
+
+Use a Laplace +1 prior in place of the default mixture Dirichlet prior.
+]]></token>
+  <token name="@SEED_HELP@"><![CDATA[
+Random Seeding
+--------------
+
+Seed the random number generator with <n>, an integer >= 0. If <n> is nonzero,
+any stochastic simulations will be reproducible; the same command will give the
+same results. If <n> is 0, the random number generator is seeded arbitrarily,
+and stochastic simulations will vary from run to run of the same command.
+
+]]></token>
+  <token name="@THRESHOLDS_HELP@"><![CDATA[
+Options for Reporting Thresholds
+--------------------------------
+
+Reporting thresholds control which hits are reported in output files (the main
+output, --tblout, and --domtblout).
+
+**E-value (-E)**
+
+In the per-target output, report target profiles with an E-value of <= <x>. The
+default is 10.0, meaning that on average, about 10 false positives will be
+reported per query, so you can see the top of the noise and decide for yourself
+if it’s really noise.
+
+**Bit score (-T)**
+
+Instead of thresholding per-profile output on E-value, instead report target profiles
+with a bit score of >= <x>.
+
+**domain E-value (--domE)**
+
+In the per-domain output, for target profiles that have already satisfied the
+per-profile reporting threshold, report individual domains with a conditional
+E-value of <= <x>. The default is 10.0. A conditional E-value means the
+expected number of additional false positive domains in the smaller search
+space of those comparisons that already satisfied the per-profile reporting
+threshold (and thus must have at least one homologous domain already).
+
+**domain Bit scores (--domT)**
+
+Instead of thresholding per-domain output on E-value, instead report domains
+with a bit score of >= <x>.
+
+Options for Inclusion Thresholds
+--------------------------------
+
+Inclusion thresholds are stricter than reporting thresholds. Inclusion
+thresholds control which hits are considered to be reliable enough to be
+included in an output alignment or a subsequent search round. In hmmscan, which
+does not have any alignment output (like hmmsearch or phmmer) nor any iterative
+search steps (like jackhmmer), inclusion thresholds have little effect. They
+only affect what domains get marked as significant (!) or questionable (?) in
+domain output.
+
+**E-value of per target inclusion threshold**
+
+Use an E-value of <= <x> as the per-target inclusion threshold. The default is
+0.01, meaning that on average, about 1 false positive would be expected in
+every 100 searches with different query sequences.
+
+**Bit score of per target inclusion threshold**
+
+Instead of using E-values for setting the inclusion threshold, instead use a
+bit score of >= <x> as the per-target inclusion threshold. It would be unusual
+to use bit score thresholds with hmmscan, because you don’t expect a single
+score threshold to work for different profiles; different profiles have
+slightly different expected score distributions.
+
+**domain E-value per target inclusion treshold**
+
+Use a conditional E-value of <= <x> as the per-domain inclusion threshold, in
+targets that have already satisfied the overall per-target inclusion threshold.
+
+**domain Bit score per target inclusion treshold**
+
+Instead of using E-values, instead use a bit score of >= <x> as the per-domain
+inclusion threshold. As with --incT above, it would be unusual to use a single
+bit score threshold in hmmscan.
+
+]]></token>
+  <token name="@THRESHOLDS_NODOM_HELP@"><![CDATA[
+Options for Reporting Thresholds
+--------------------------------
+
+Reporting thresholds control which hits are reported in output files (the main
+output, --tblout, and --domtblout).
+
+**E-value (-E)**
+
+In the per-target output, report target profiles with an E-value of <= <x>. The
+default is 10.0, meaning that on average, about 10 false positives will be
+reported per query, so you can see the top of the noise and decide for yourself
+if it’s really noise.
+
+**Bit score (-T)**
+
+Instead of thresholding per-profile output on E-value, instead report target profiles
+with a bit score of >= <x>.
+
+Options for Inclusion Thresholds
+--------------------------------
+
+Inclusion thresholds are stricter than reporting thresholds. Inclusion
+thresholds control which hits are considered to be reliable enough to be
+included in an output alignment or a subsequent search round. In hmmscan, which
+does not have any alignment output (like hmmsearch or phmmer) nor any iterative
+search steps (like jackhmmer), inclusion thresholds have little effect. They
+only affect what domains get marked as significant (!) or questionable (?) in
+domain output.
+
+**E-value of per target inclusion threshold**
+
+Use an E-value of <= <x> as the per-target inclusion threshold. The default is
+0.01, meaning that on average, about 1 false positive would be expected in
+every 100 searches with different query sequences.
+
+**Bit score of per target inclusion threshold**
+
+Instead of using E-values for setting the inclusion threshold, instead use a
+bit score of >= <x> as the per-target inclusion threshold. It would be unusual
+to use bit score thresholds with hmmscan, because you don’t expect a single
+score threshold to work for different profiles; different profiles have
+slightly different expected score distributions.
+
+]]></token>
+  <token name="@ATTRIBUTION@"><![CDATA[
+
+Attribution
+-----------
+
+This Galaxy tool relies on HMMER3_
+Internally the software is cited as:
+
+::
+
+    # hmmscan :: search sequence(s) against a profile database
+    # HMMER 3.1 (February 2013); http://hmmer.org/
+    # Copyright (C) 2011 Howard Hughes Medical Institute.
+    # Freely distributed under the GNU General Public License (GPLv3).
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+The wrappers were written by the IUC and are licensed under Apache2_. The
+documentation is copied from the HMMER3 documentation.
+
+.. _Apache2: http://www.apache.org/licenses/LICENSE-2.0
+.. _HMMER3: http://hmmer.org/
+
+
+  ]]></token>
+  <token name="@HELP_PRE@"><![CDATA[
+
+What it does
+============
+  ]]></token>
+  <token name="@HELP_PRE_OTH@"><![CDATA[
+Options
+=======
+  ]]></token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nhmmer.xml.orig	Tue Aug 31 08:43:59 2021 +0000
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<<<<<<< HEAD
+<tool id="hmmer_nhmmer" name="nhmmer" version="@TOOL_VERSION@+galaxy1">
+  <description>search a DNA model or alignment against a DNA database (BLASTN-like)</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <expand macro="stdio"/>
+  <command><![CDATA[
+@ADDTHREADS@
+=======
+<tool id="hmmer_nhmmer" name="nhmmer" version="@TOOL_VERSION@">
+    <description>search a DNA model or alignment against a DNA database (BLASTN-like)</description>
+    <expand macro="bio_tools"/>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+>>>>>>> c37d72558 (add more bio.tool IDs)
+nhmmer
+
+@OFORMAT_WITH_OPTS@
+@HSSI@
+@THRESHOLDS_NODOM@
+@CUT@
+@ACCEL_HEUR@
+@FORMAT_SELECTOR@
+@ADV_OPTS@
+@LENGTHS@
+@CPU@
+@SEED@
+
+'$hmmfile'
+'$seqfile'
+> '$output'
+  ]]></command>
+  <inputs>
+    <expand macro="input_hmm" />
+    <param name="seqfile" type="data" format="fasta" label="Target sequence file"/>
+    <expand macro="oformat_with_opts_dfam_alisc"/>
+    <expand macro="hssi"/>
+    <expand macro="thresholds_nodom"/>
+    <expand macro="cut" />
+    <expand macro="accel_heur_xml"/>
+    <expand macro="format_selector_noprot"/>
+    <expand macro="adv_opts"/>
+    <expand macro="lengths"/>
+    <!-- TODO: block_length toponly bottomonly -->
+    <expand macro="seed"/>
+  </inputs>
+  <outputs>
+    <expand macro="output_dfam_alisc" tool="NHMMER"/>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="4">
+      <param name="hmmfile" value="MADE1.hmm"/>
+      <param name="seqfile" value="dna_target.fa"/>
+      <expand macro="oformat_test" />
+      <param name="oformat" value="tblout,dfamtblout,aliscoresout"/>
+      <expand macro="seed_test" />
+      <output name="output" file="nhmmer.out" lines_diff="16">
+        <expand macro="assert_out" tool="nhmmer"/>
+      </output>
+      <output name="tblout" file="nhmmer.out.tblout" lines_diff="12">
+        <expand macro="assert_tblout" tool="nhmmer"/>
+      </output>
+      <output name="aliscoresout" file="nhmmer.out.aliscoresout" lines_diff="10"/>
+      <output name="dfamtblout" file="nhmmer.out.dfamtblout" lines_diff="10">
+        <assert_contents>
+          <has_line_matching expression="# hit scores"/>
+        </assert_contents>
+      </output>
+    </test>
+    <test expect_num_outputs="1">
+      <param name="hmmfile" value="MADE1.hmm"/>
+      <param name="seqfile" value="dna_target.fa"/>
+      <expand macro="oformat_test" />
+      <param name="oformat" value=""/>
+      <expand macro="seed_test" />
+      <output name="output" file="nhmmer.out" lines_diff="16">
+        <expand macro="assert_out" tool="nhmmer"/>
+      </output>
+    </test>
+  </tests>
+  <help><![CDATA[
+@HELP_PRE@
+
+nhmmer is used to search one or more nucleotide queries against a nucleotide
+sequence database. For each query in <queryfile>, use that query to search the
+target database of sequences in <seqdb>, and output a ranked list of the hits
+with the most significant matches to the query. A query may be either a profile
+model built using hmmbuild, a sequence alignment, or a single sequence.
+Sequence based queries can be in a number of formats (see --qformat), and can
+typically be autodetected. Note that only Stockholm format supports the use of
+multiple sequence-based queries.
+
+@HELP_PRE_OTH@
+
+@OFORMAT_WITH_OPTS_N_HELP@
+@HSSI_HELP@
+@THRESHOLDS_NODOM_HELP@
+@CUT_HELP@
+@ACCEL_HEUR_HELP@
+@FORMAT_SELECTOR_HELP@
+@ADV_OPTS_HELP@
+@LENGTHS_HELP@
+@SEED_HELP@
+
+@ATTRIBUTION@
+  ]]></help>
+  <expand macro="citation"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nhmmscan.xml.orig	Tue Aug 31 08:43:59 2021 +0000
@@ -0,0 +1,113 @@
+<?xml version="1.0"?>
+<<<<<<< HEAD
+<tool id="hmmer_nhmmscan" name="nhmmscan" version="@TOOL_VERSION@+galaxy1">
+  <description>search DNA sequence(s) against a DNA profile database</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <expand macro="stdio"/>
+  <command><![CDATA[
+@ADDTHREADS@
+=======
+<tool id="hmmer_nhmmscan" name="nhmmscan" version="@TOOL_VERSION@">
+    <description>search DNA sequence(s) against a DNA profile database</description>
+    <expand macro="bio_tools"/>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+>>>>>>> c37d72558 (add more bio.tool IDs)
+@INPUTHMMCHOICE@
+nhmmscan
+
+@OFORMAT_WITH_OPTS@
+@THRESHOLDS_NODOM@
+@CUT@
+@ACCEL_HEUR@
+--B1 $B1
+--B2 $B2
+--B3 $B3
+
+@ADV_OPTS@
+@LENGTHS@
+@CPU@
+@SEED@
+
+'$input_hmm_filename'
+'$seqfile'
+> '$output'
+  ]]></command>
+  <inputs>
+    <expand macro="input_hmm_choice" />
+    <!-- todo use Galaxy features like data libraries/data tables/??? -->
+    <param name="seqfile" type="data" format="fasta" label="Sequence file"/>
+    <expand macro="oformat_with_opts_dfam_alisc"/>
+    <expand macro="thresholds_nodom"/>
+    <expand macro="cut"/>
+    <expand macro="accel_heur_xml"/>
+
+    <param argument="--B1" type="integer" value="110" label="window length for biased-composition modifier (MSV)" />
+    <param argument="--B2" type="integer" value="240" label="window length for biased-composition modifier (Vit)" />
+    <param argument="--B3" type="integer" value="1000" label="window length for biased-composition modifier (Fwd)" />
+
+    <expand macro="adv_opts"/>
+    <expand macro="lengths"/>
+    <expand macro="seed"/>
+  </inputs>
+  <outputs>
+    <expand macro="output_dfam_alisc" tool="NHMMSCAN"/>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="3">
+      <param name="input_hmm_conditional|input_hmm_source" value="history"/>
+      <param name="input_hmm_conditional|hmmfile" value="MADE1.hmm"/>
+      <param name="seqfile" value="dna_target.fa"/>
+      <expand macro="oformat_test" />
+      <expand macro="seed_test" />
+      <output name="output" file="MADE1.nhmmscan_out" lines_diff="12">
+          <expand macro="assert_out" tool="nhmmscan"/>
+      </output>
+      <output name="tblout" file="MADE1.nhmmscan_out.tblout" lines_diff="10">
+          <!-- nhmmscan reports as hmmscan https://github.com/EddyRivasLab/hmmer/issues/190 -->
+          <expand macro="assert_tblout" tool="hmmscan"/>
+      </output>
+      <output name="dfamtblout" file="MADE1.nhmmscan_out.dfamtblout">
+          <assert_contents>
+              <has_line_matching expression="# hit scores"/>
+          </assert_contents>
+      </output>
+      <!--not test because https://github.com/EddyRivasLab/hmmer/issues/190 <output name="aliscoresout" file="MADE1.nhmmscan_out.aliscoresout" lines_diff="10" />-->
+    </test>
+  </tests>
+  <help><![CDATA[
+@HELP_PRE@
+
+nhmmscan is used to search nucleotide sequences against collections of
+nucleotide profiles. For each sequence in <seqfile>, use that query sequence to
+search the target database of profiles in <hmmfile>, and output ranked lists of
+the profiles with the most significant matches to the sequence.
+
+The <seqfile> may contain more than one query sequence. It can be in FASTA
+format, or several other common sequence file formats (genbank, embl, and
+uniprot, among others), or in alignment file formats (stockholm, aligned fasta,
+and others). See the --qformat option for a complete list.
+
+@HELP_PRE_OTH@
+
+@OFORMAT_WITH_OPTS_N_HELP@
+@THRESHOLDS_NODOM_HELP@
+@CUT_HELP@
+@ACCEL_HEUR_HELP@
+@BIAS_COMP_HELP@
+@ADV_OPTS_HELP@
+@LENGTHS_HELP@
+@SEED_HELP@
+
+
+@ATTRIBUTION@
+]]></help>
+  <expand macro="citation"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phmmer.xml.orig	Tue Aug 31 08:43:59 2021 +0000
@@ -0,0 +1,107 @@
+<?xml version="1.0"?>
+<<<<<<< HEAD
+<tool id="hmmer_phmmer" name="phmmer" version="@TOOL_VERSION@+galaxy1">
+  <description>search a protein sequence against a protein database (BLASTP-like)</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <expand macro="stdio"/>
+  <command><![CDATA[
+@ADDTHREADS@
+=======
+<tool id="hmmer_phmmer" name="phmmer" version="@TOOL_VERSION@">
+    <description>search a protein sequence against a protein database (BLASTP-like)</description>
+    <expand macro="bio_tools"/>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+>>>>>>> c37d72558 (add more bio.tool IDs)
+phmmer
+
+@OFORMAT_WITH_OPTS@
+@HSSI@
+@THRESHOLDS@
+@ACCEL_HEUR@
+@EVAL_CALIB@
+@ADV_OPTS@
+
+@CPU@
+@SEED@
+
+'$seqfile'
+'$seqdb'
+> '$output'
+  ]]></command>
+  <inputs>
+    <param name="seqfile" type="data" format="fasta" label="Protein sequence to search with"/>
+    <!-- todo use Galaxy features like data libraries/data tables/??? -->
+    <param name="seqdb" type="data" format="fasta" label="Sequence Database"/>
+    <expand macro="oformat_with_opts_dom_pfam"/>
+    <expand macro="hssi"/>
+    <expand macro="thresholds_xml"/>
+    <expand macro="accel_heur_xml"/>
+    <expand macro="eval_calib_xml"/>
+    <expand macro="adv_opts"/>
+    <expand macro="seed"/>
+  </inputs>
+  <outputs>
+    <expand macro="output_dom_pfam" tool="PHMMER"/>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="4">
+      <param name="seqfile" value="globins45.fa"/>
+      <param name="seqdb" value="uniprot_matches.fasta"/>
+      <expand macro="oformat_test" />
+      <expand macro="seed_test" />
+      <output name="output" file="phmmer.out" lines_diff="200">
+          <expand macro="assert_out" tool="phmmer"/>
+      </output>
+      <!-- Lines diff is high due to a line of cpu/timing information for EACH sequence -->
+      <output name="domtblout" file="phmmer.domtblout" lines_diff="12">
+          <expand macro="assert_tblout" tool="phmmer"/>
+      </output>
+      <output name="pfamtblout" file="phmmer.pfamtblout" lines_diff="12">
+          <expand macro="assert_tblout" tool="phmmer"/>
+      </output>
+      <output name="tblout" file="phmmer.tblout" lines_diff="12">
+          <expand macro="assert_tblout" tool="phmmer"/>
+      </output>
+    </test>
+    <test expect_num_outputs="1">
+      <param name="seqfile" value="globins45.fa"/>
+      <param name="seqdb" value="uniprot_matches.fasta"/>
+      <expand macro="oformat_test" />
+      <param name="oformat" value=""/>
+      <expand macro="seed_test" />
+      <output name="output" file="phmmer.out" lines_diff="200">
+          <expand macro="assert_out" tool="phmmer"/>
+      </output>
+    </test>
+  </tests>
+  <help><![CDATA[
+@HELP_PRE@
+
+phmmer is used to search one or more query protein sequences against a protein
+sequence database.  For each query sequence in <seqfile>, use that sequence to
+search the target database of sequences in <seqdb>, and output ranked lists of
+the sequences with the most significant matches to the query.
+
+
+@HELP_PRE_OTH@
+
+@OFORMAT_WITH_OPTS_HELP@
+@HSSI_HELP@
+@THRESHOLDS_HELP@
+@ACCEL_HEUR_HELP@
+@EVAL_CALIB_HELP@
+@ADV_OPTS_HELP@
+@SEED_HELP@
+
+@ATTRIBUTION@
+  ]]></help>
+  <expand macro="citation"/>
+</tool>