annotate shinefind.xml @ 0:d5c3354c166d draft default tip

Uploaded
author cpt_testbed
date Fri, 29 Apr 2022 10:33:36 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
1 <tool id="edu.tamu.cpt.genbank.shinefind" name="Shine Find" version="21.1.0.0">
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
2 <description>Identify shine-dalgarno sequences</description>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
3 <macros>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
4 <import>macros.xml</import>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
5 <import>cpt-macros.xml</import>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
6 </macros>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
7 <expand macro="requirements"/>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
8 <command detect_errors="aggressive"><![CDATA[
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
9 @GENOME_SELECTOR_PRE@
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
10
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
11 python $__tool_directory__/shinefind.py
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
12
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
13 @GENOME_SELECTOR@
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
14 $gff3_data
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
15
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
16 --table_output $default_output
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
17 --gff3_output $gff3_output
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
18
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
19 --lookahead_min $lookahead_min
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
20 --lookahead_max $lookahead_max
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
21 $add
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
22 $top_only
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
23
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
24
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
25 > $stdout
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
26
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
27 ]]></command>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
28 <inputs>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
29 <expand macro="genome_selector" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
30 <expand macro="gff3_input" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
31
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
32 <param label="Minimum number of bases upstream of CDS for gap (--lookahead_min)" name="lookahead_min" type="integer" value="3"/>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
33 <param label="Maximum number of bases upstream of CDS for gap (--lookahead_max)" name="lookahead_max" type="integer" value="17"/>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
34 <param checked="true" label="Automatically add RBSs to input GFF3" name="add"
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
35 type="boolean" truevalue="--add" falsevalue="" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
36 <param checked="true" label="Only report best hits (--top_only)"
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
37 name="top_only" type="boolean" falsevalue="" truevalue="--top_only"/>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
38 </inputs>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
39 <outputs>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
40 <data format="tabular" name="default_output" label="ShineFind RBS list from $gff3_data.name"/>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
41 <data format="gff3" name="gff3_output" label="ShineFind GFF3 RBSs from $gff3_data.name"/>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
42 <data format="gff3" name="stdout" label="$gff3_data.name with RBSs"/>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
43 </outputs>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
44 <tests>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
45 <test>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
46 <param name="genome_fasta" value="Miro_ShineFindIn.fa" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
47 <param name="reference_genome_source" value="history" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
48 <param name="gff3_data" value="Miro_ShineFindIn.gff3" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
49 <param name="lookahead_max" value="15" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
50 <param name="lookahead_min" value="5" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
51 <param name="add" value="True"/>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
52 <param name="top_only" value="False" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
53 <output name="default_output" file="Miro_ShineFindOut.tbl" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
54 <output name="gff3_output" file="Miro_ShineFindOut1.gff3" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
55 <output name="stdout" file="Miro_ShineFindOut2.gff3" />
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
56 </test>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
57 </tests>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
58 <help><![CDATA[
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
59 **What it does**
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
60
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
61 From an input GFF3 and FASTA file, the upstream sequence within user-specified bounds is
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
62 extracted for all CDS features. Input CDS features with an RBS are not re-analyzed. For
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
63 the remaining CDS features, the upstream sequence is searched against the following
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
64 possible Shine-Dalgarno sequences:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
65 - AGGAGGT
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
66 - GGAGGT
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
67 - AGGAGG
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
68 - AGGAG
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
69 - GAGGT
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
70 - GGAGG
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
71 - AGGT
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
72 - GGGT
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
73 - GAGG
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
74 - GGGG
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
75 - AGGA
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
76 - GGAG
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
77 - GGA
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
78 - GAG
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
79 - AGG
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
80 - GGT
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
81
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
82 **Gap Min/Max Settings**
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
83 In previous versions of this tool, the min/max was set for the window to find a Shine-Dalgarno sequence, but
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
84 currently we specify the minimum and maxium GAP between the end of the SD and the start of the closest CDS.
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
85
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
86 By default, only the first (closest to consensus) hit to the list is returned to the GFF3. In the case of a tie, it will select the smaller gap.
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
87 By selecting ‘No’ for ‘Only report best hits’, **all hits** to the possible SD list will
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
88 be added to the GFF3 file.
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
89
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
90 **March 10, 2021 Update**
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
91 Previous to this date, when using "Only report best hits" option the algorithm would pick the "first" one (moving downstream) found in the case of tied sequence length. After this date, it now will report the sequence with the smaller gap between the END of the SD sequence and the START of the CDS.
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
92
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
93 ]]></help>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
94 <expand macro="citations" >
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
95 <expand macro="sl-citations-clm"/>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
96 </expand>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
97 </tool>
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
98