Mercurial > repos > iuc > teloscope
comparison teloscope.xml @ 0:be2c72b9798b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/teloscope commit 91ae0a450a15bf0a37c017b57831d40e2cea8dbc
| author | iuc |
|---|---|
| date | Thu, 27 Nov 2025 08:52:19 +0000 |
| parents | |
| children | 089c6e8122c8 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:be2c72b9798b |
|---|---|
| 1 <tool id="teloscope" name="Teloscope" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>Assembly telomere annotation</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <version_command>teloscope -v</version_command> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 mkdir -p output && | |
| 10 teloscope | |
| 11 --input-sequence '$input_sequence' | |
| 12 --output output | |
| 13 --canonical '$canonical' | |
| 14 --patterns '$patterns' | |
| 15 --window '$window' | |
| 16 --step '$step' | |
| 17 --threads "\${GALAXY_SLOTS:-2}" | |
| 18 --terminal-limit '$terminal_limit' | |
| 19 --max-match-distance '$max_match_distance' | |
| 20 --max-block-distance '$max_block_distance' | |
| 21 --min-block-length '$min_block_length' | |
| 22 --min-block-density '$min_block_density' | |
| 23 | |
| 24 $out_win_repeats | |
| 25 $out_gc | |
| 26 $out_entropy | |
| 27 $out_matches | |
| 28 $out_its | |
| 29 $ultra_fast | |
| 30 $verbose | |
| 31 > output/${input_sequence.name}.telo.report | |
| 32 ]]></command> | |
| 33 <inputs> | |
| 34 <param argument="--input-sequence" type="data" format="fasta,fasta.gz" label="Input assembly"/> | |
| 35 <param argument="--canonical" type="text" value="TTAGGG" label="Canonical telomeric pattern"> | |
| 36 <sanitizer> | |
| 37 <valid initial="string.printable"> | |
| 38 <remove value=" "/> | |
| 39 <remove value="\t"/> | |
| 40 <remove value="\r"/> | |
| 41 <remove value="\n"/> | |
| 42 </valid> | |
| 43 </sanitizer> | |
| 44 <validator type="regex" message="Canonical must contain only A,C,G,T.">^[ACGTacgt]+$</validator> | |
| 45 </param> | |
| 46 | |
| 47 <param argument="--patterns" type="text" value="TTAGGG,CCCTAA" | |
| 48 label="Patterns to explore (comma-separated), IUPAC allowed"> | |
| 49 <validator type="regex" | |
| 50 message="Use IUPAC letters ACGTRYSWKMBDHVN; comma-separated.">^[ACGTRYSWKMBDHVNacgtryswkmbdhvn]+(?:,[ACGTRYSWKMBDHVNacgtryswkmbdhvn]+)*$</validator> | |
| 51 </param> | |
| 52 | |
| 53 <param argument="--window" type="integer" min="100" value="1000" label="Sliding window size"/> | |
| 54 <param argument="--step" type="integer" min="100" value="500" label="Sliding window step"/> | |
| 55 <param argument="--terminal-limit" type="integer" min="10000" value="50000" label="Terminal limit (bp) in contigs"/> | |
| 56 <param argument="--max-match-distance" type="integer" min="0" value="50" label="Maximum distance (bp) for merging matches"/> | |
| 57 <param argument="--max-block-distance" type="integer" min="0" value="200" label="Maximum block distance for merging"/> | |
| 58 <param argument="--min-block-length" type="integer" min="0" value="500" label="Minimum block length"/> | |
| 59 <param argument="--min-block-density" type="float" min="0" max="1" value="0.5" label="Minimum block density (0–1)"/> | |
| 60 | |
| 61 <param argument="--out-win-repeats" type="boolean" truevalue="--out-win-repeats" falsevalue="" checked="false" label="Window repeat counts"/> | |
| 62 <param argument="--out-gc" type="boolean" truevalue="--out-gc" falsevalue="" checked="false" label="Window GC"/> | |
| 63 <param argument="--out-entropy" type="boolean" truevalue="--out-entropy" falsevalue="" checked="false" label="Window Shannon entropy"/> | |
| 64 <param argument="--out-matches" type="boolean" truevalue="--out-matches" falsevalue="" checked="false" label="Canonical/NonCanonical Matches"/> | |
| 65 <param argument="--out-its" type="boolean" truevalue="--out-its" falsevalue="" checked="false" label="Interstitial telomeres (ITSs)"/> | |
| 66 <param argument="--ultra-fast" type="boolean" truevalue="--ultra-fast" falsevalue="" checked="true" label="Ultra-fast mode (terminal regions only)"/> | |
| 67 <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" checked="false" label="Verbose output"/> | |
| 68 </inputs> | |
| 69 | |
| 70 <outputs> | |
| 71 <!-- BASIC OUTFILES --> | |
| 72 <data name="terminal_telomeres" format="bed" from_work_dir="output/*_terminal_telomeres.bed" label="${tool.name} on ${on_string}: Terminal telomeres"/> | |
| 73 <data name="telo_report" format="tabular" from_work_dir="output/*.telo.report" label="${tool.name} on ${on_string}: Summary report"/> | |
| 74 | |
| 75 <!-- OPTIONAL OUTFILES --> | |
| 76 <data name="interstitial_telomeres" format="bed" from_work_dir="output/*_interstitial_telomeres.bed" label="${tool.name} on ${on_string}: Interstitial telomeres"> | |
| 77 <filter>out_its</filter> | |
| 78 </data> | |
| 79 <data name="canonical_matches" format="bed" from_work_dir="output/*_canonical_matches.bed" label="${tool.name} on ${on_string}: Canonical matches"> | |
| 80 <filter>out_matches</filter> | |
| 81 </data> | |
| 82 <data name="noncanonical_matches" format="bed" from_work_dir="output/*_noncanonical_matches.bed" label="${tool.name} on ${on_string}: Noncanonical matches"> | |
| 83 <filter>out_matches</filter> | |
| 84 </data> | |
| 85 <data name="window_metrics" format="bedgraph" from_work_dir="output/*_window_metrics.bedgraph" label="${tool.name} on ${on_string}: Window metrics"> | |
| 86 <filter>out_gc or out_entropy or out_win_repeats</filter> | |
| 87 </data> | |
| 88 </outputs> | |
| 89 | |
| 90 <tests> | |
| 91 <!-- 1) Default (-u) : terminal telomeres + report --> | |
| 92 <test expect_num_outputs="2"> | |
| 93 <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/> | |
| 94 <param name="canonical" value="TTAGGG"/> | |
| 95 <param name="patterns" value="TTAGGG,CCCTAA"/> | |
| 96 <output name="terminal_telomeres"> | |
| 97 <assert_contents> | |
| 98 <!-- at least one BED-like line (match whole line to allow extra cols) --> | |
| 99 <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/> | |
| 100 </assert_contents> | |
| 101 </output> | |
| 102 <output name="telo_report"> | |
| 103 <assert_contents> | |
| 104 <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/> | |
| 105 <has_line_matching expression="\+\+\+ Assembly Summary Report \+\+\+"/> | |
| 106 <has_line_matching expression="\+\+\+ Telomere Statistics \+\+\+"/> | |
| 107 <has_line_matching expression="\+\+\+ Chromosome Telomere Counts\+\+\+"/> | |
| 108 <has_line_matching expression="\+\+\+ Chromosome Telomere/Gap Completeness\+\+\+"/> | |
| 109 </assert_contents> | |
| 110 </output> | |
| 111 </test> | |
| 112 | |
| 113 <!-- 2) Genome-wide (-g -e -r -m -i ; disable -u) --> | |
| 114 <test expect_num_outputs="6"> | |
| 115 <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/> | |
| 116 <param name="canonical" value="TTAGGG"/> | |
| 117 <param name="patterns" value="TTAGGG,CCCTAA"/> | |
| 118 <param name="out_gc" value="true"/> | |
| 119 <param name="out_entropy" value="true"/> | |
| 120 <param name="out_win_repeats" value="true"/> | |
| 121 <param name="out_matches" value="true"/> | |
| 122 <param name="out_its" value="true"/> | |
| 123 <param name="ultra_fast" value="false"/> | |
| 124 | |
| 125 <output name="terminal_telomeres"> | |
| 126 <assert_contents> | |
| 127 <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/> | |
| 128 </assert_contents> | |
| 129 </output> | |
| 130 <output name="interstitial_telomeres"> | |
| 131 <assert_contents> | |
| 132 <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/> | |
| 133 </assert_contents> | |
| 134 </output> | |
| 135 <output name="canonical_matches"> | |
| 136 <assert_contents> | |
| 137 <!-- ensure at least one CCCTAA match line --> | |
| 138 <has_line_matching expression="^\S+\t\d+\t\d+\tCCCTAA(?:\s|$)"/> | |
| 139 </assert_contents> | |
| 140 </output> | |
| 141 <output name="noncanonical_matches"> | |
| 142 <assert_contents> | |
| 143 <!-- expect an empty file --> | |
| 144 <has_n_lines n="0"/> | |
| 145 </assert_contents> | |
| 146 </output> | |
| 147 <output name="window_metrics"> | |
| 148 <assert_contents> | |
| 149 <has_text text="track type=bedGraph"/> | |
| 150 <!-- first data window line --> | |
| 151 <has_line_matching expression="^\S+\t0\t1000\t.*"/> | |
| 152 </assert_contents> | |
| 153 </output> | |
| 154 <output name="telo_report"> | |
| 155 <assert_contents> | |
| 156 <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/> | |
| 157 <has_line_matching expression="\+\+\+ Assembly Summary Report \+\+\+"/> | |
| 158 <has_line_matching expression="\+\+\+ Telomere Statistics \+\+\+"/> | |
| 159 <has_line_matching expression="\+\+\+ Chromosome Telomere Counts\+\+\+"/> | |
| 160 <has_line_matching expression="\+\+\+ Chromosome Telomere/Gap Completeness\+\+\+"/> | |
| 161 </assert_contents> | |
| 162 </output> | |
| 163 </test> | |
| 164 </tests> | |
| 165 | |
| 166 <help><![CDATA[ | |
| 167 Description: | |
| 168 Teloscope is a tool for telomere annotation in genome assemblies. | |
| 169 It scans for user-specified telomeric repeat patterns across assembly paths, contigs and windows. | |
| 170 Teloscope annotates terminal and interstitial telomeres, canonical/noncanonical matches and genome-wide metrics such as GC content, Shannon entropy, and repeat counts. | |
| 171 It generates a detailed telomere summary report for paths, telomere statistics, and chromosome labels to assess telomere completeness. | |
| 172 Teloscope can be used for both complete and fragmented assemblies, providing valuable information for genome manual curation and analysis. | |
| 173 | |
| 174 Usage: | |
| 175 Default (ultra-fast) scans terminal regions and reports terminal telomeres + a summary report. | |
| 176 * ${input_sequence.name}_terminal_telomeres.bed | |
| 177 * ${input_sequence.name}.telo.report | |
| 178 | |
| 179 Enabling window/match options (-g -e -r -m -i) performs a genome-wide scan and produces: | |
| 180 * ${input_sequence.name}_terminal_telomeres.bed | |
| 181 * ${input_sequence.name}_interstitial_telomeres.bed | |
| 182 * ${input_sequence.name}_canonical_matches.bed | |
| 183 * ${input_sequence.name}_noncanonical_matches.bed | |
| 184 * ${input_sequence.name}_window_metrics.bedgraph | |
| 185 * ${input_sequence.name}.telo.report | |
| 186 | |
| 187 Key parameters: | |
| 188 - -c / --canonical: Canonical repeat (default TTAGGG). This is the vertebrate telomeric motif found at chromosome ends that binds to shelterin complex to form a telomere. | |
| 189 - -p / --patterns: Variant patterns (comma-separated). These are additional telomeric repeat motifs to search for, besides the canonical repeat, it includes other variants that can be part of telomeres. | |
| 190 - -w / -s: window size / step (defaults 1000/500) | |
| 191 - -u / --ultra-fast: terminal scan only (default true); disabled automatically when -g/-e/-r/-m/-i are used. | |
| 192 ]]></help> | |
| 193 | |
| 194 <expand macro="citations"/> | |
| 195 </tool> |
