comparison iit_store.xml @ 0:10e3476429b5 draft

Uploaded
author jjohnson
date Fri, 05 Oct 2012 13:51:49 -0400
parents
children 74391fc6e3f2
comparison
equal deleted inserted replaced
-1:000000000000 0:10e3476429b5
1 <tool id="gmap_iit_store" name="GMAP IIT" version="2.0.0">
2 <description>Create a map store for known genes or SNPs</description>
3 <requirements>
4 <requirement type="binary">iit_store</requirement>
5 </requirements>
6 <version_string>iit_store --version</version_string>
7 <command interpreter="command"> /bin/bash $shscript 2> $log </command>
8 <inputs>
9 <!-- Input data -->
10 <conditional name="map">
11 <param name="type" type="select" label="Make map for" >
12 <option value="genes">Introns and Splice sites</option>
13 <option value="snps">SNPs</option>
14 <option value="gmap">GMAP Annotation</option>
15 </param>
16 <when value="genes">
17 <conditional name="src">
18 <param name="src_format" type="select" label="Add splice and intron info from" >
19 <option value="refGeneTable">refGenes table from UCSC table browser</option>
20 <option value="gtf">GTF</option>
21 <option value="gff3">GFF3</option>
22 </param>
23 <when value="refGeneTable">
24 <param name="genes" type="data" format="tabular" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
25 <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)"
26 help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
27 <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
28 </param>
29 </when>
30 <when value="gtf">
31 <param name="genes" type="data" format="gtf" label="Genes as GTF" help="" />
32 </when>
33 <when value="gff3">
34 <param name="genes" type="data" format="gff3" label="Genes in GFF3 format" help="" />
35 </when>
36 </conditional>
37 <param name="maps" type="select" display="checkboxes" multiple="true" force_select="true" label="Add splice and intron info from" >
38 <option value="splicesites" selected="true">splicesites.iit</option>
39 <option value="introns" selected="false">introns.iit</option>
40 </param>
41 </when>
42 <when value="snps">
43 <conditional name="src">
44 <param name="src_format" type="select" label="Add SNP info from" >
45 <option value="snpTable">UCSC SNP Table</option>
46 <option value="snpFile">GMAP SNP File</option>
47 </param>
48 <when value="snpTable">
49 <param name="snps" type="data" format="tabular" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
50 <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
51 <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
52 <option value="1" selected="true">1 (High)</option>
53 <option value="2">2 (Medium)</option>
54 <option value="3">3 (All)</option>
55 </param>
56 </when>
57 <when value="snpFile">
58 <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file"
59 help="Format (3 columns):&lt;B&gt;
60 &lt;br&gt;>rs62211261 21:14379270 CG
61 &lt;br&gt;>rs62211262 21:14379281 CG
62 &lt;/B&gt;
63 &lt;br&gt;Each line must start with a &gt; character, then be followed by an
64 identifier (which may have duplicates). Then there should be the
65 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so
66 the first character of a chromosome is number 1.) Finally, there
67 should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
68 &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.
69 If the one of these two letters does not match the allele in the reference
70 sequence, that SNP will be ignored in subsequent processing as a probable error.
71 The N stands for any other allele." />
72 </when>
73 </conditional>
74 </when>
75 <when value="gmap">
76 <param name="annotation" type="data" format="gmap_annotation" label="GMAP mapfile"
77 help="Format (2 or columns): &lt;B&gt;
78 &lt;br&gt;>label coords optional_tag
79 &lt;br&gt;optional_annotation (which may be zero, one, or multiple lines)
80 &lt;/B&gt;
81 &lt;br&gt;Each line must start with a &gt; character, then be followed by an identifier (which may have duplicates).
82 &lt;br&gt;Then there should be the chromosomal coordinate range. (Coordinates are all 1-based, so the first character of a chromosome is number 1.)
83 &lt;br&gt;The coords should be of the form
84 &lt;br&gt; chr:position
85 &lt;br&gt; chr:startposition..endposition
86 &lt;br&gt;The term chr:position is equivalent to chr:position..position.
87 &lt;br&gt;If you want to indicate that the interval is on the minus strand or reverse direction, then endposition may be less than startposition.
88 " />
89 </when>
90 </conditional>
91 </inputs>
92 <outputs>
93 <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
94 <data format="splicesites.iit" name="splicesites_iit" label="${tool.name} on ${on_string} splicesites.iit">
95 <filter>(map['type'] == 'genes' and 'splicesites' in map['maps'])</filter>
96 </data>
97 <data format="introns.iit" name="introns_iit" label="${tool.name} on ${on_string} introns.iit">
98 <filter>(map['type'] == 'genes' and 'introns' in map['maps'])</filter>
99 </data>
100 <data format="snps.iit" name="snps_iit" label="${tool.name} on ${on_string} snps.iit">
101 <filter>(map['type'] == 'snps')</filter>
102 </data>
103 <data format="iit" name="map_iit" label="${tool.name} on ${on_string} map.iit">
104 <filter>(map['type'] == 'gmap')</filter>
105 </data>
106 </outputs>
107 <configfiles>
108 <configfile name="shscript">
109 #!/bin/bash
110 #set $catcmd = 'gzcat -f'
111 #set $catcmd = 'cat'
112 #set $ds = chr(36)
113 #set $gt = chr(62)
114 #set $lt = chr(60)
115 #set $ad = chr(38)
116 #set $ep = chr(33)
117 #set $toerr = ''.join([$gt,$ad,'2'])
118 #import os.path
119 #if $map.type == 'genes':
120 if [ $ep -e $map.src.genes ]; then echo "$map.src.genes does not exist" $toerr; exit 1; fi
121 if [ $ep -s $map.src.genes ]; then echo "$map.src.genes is empty" $toerr; exit 2; fi
122 #if $map.src.src_format == 'refGeneTable':
123 #if 'splicesites' in [ $map.maps.__str__ ]:
124 $catcmd $map.src.genes | psl_splicesites -s $map.src.col_skip | iit_store -o $splicesites_iit
125 #end if
126 #if 'introns' in [ $map.maps.__str__ ]:
127 $catcmd $map.src.genes | psl_introns -s $map.src.col_skip | iit_store -o $introns_iit
128 #end if
129 #elif $map.src.src_format == 'gtf':
130 #if 'splicesites' in [ $map.maps.__str__ ]:
131 $catcmd $map.src.genes | gtf_splicesites | iit_store -o $splicesites_iit
132 #end if
133 #if 'introns' in [ $map.maps.__str__ ]:
134 $catcmd $map.src.genes | gtf_introns | iit_store -o $introns_iit
135 #end if
136 #elif $map.src.src_format == 'gff3':
137 #if 'splicesites' in [ $map.maps.__str__ ]:
138 $catcmd $map.src.genes | gff3_splicesites | iit_store -o $splicesites_iit
139 #end if
140 #if 'introns' in [ $map.maps.__str__ ]:
141 $catcmd $map.src.genes | gff3_introns | iit_store -o $introns_iit
142 #end if
143 #end if
144 #elif $map.type == 'snps':
145 if [ $ep -s $map.src.snps ]; then echo "$map.src.snps is empty" $toerr; exit 2; fi
146 #if $map.src.snpsex.__str__ != 'None':
147 $catcmd $map.src.snps | dbsnp_iit -w $map.src.weight -e $map.src.snpsex | iit_store -o $snps_iit
148 #else:
149 $catcmd $map.src.snps | dbsnp_iit -w $map.src.weight | iit_store -o $snps_iit
150 #end if
151 #else:
152 $catcmd $map.src.snps | iit_store -o $map_iit
153 #end if
154 </configfile>
155 </configfiles>
156
157 <tests>
158 </tests>
159
160 <help>
161
162
163 **iit_store**
164
165 GMAP IIT creates an Interval Index Tree map of known splice sites, introns, or SNPs (it uses iit_store described in the GMAP documentation). The maps can be used in GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). Maps are typically used for known splice sites, introns, or SNPs.
166
167 You will want to read the README_
168
169 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
170
171 .. _GMAP: http://research-pub.gene.com/gmap/
172 .. _GSNAP: http://research-pub.gene.com/gmap/
173 .. _README: http://research-pub.gene.com/gmap/src/README
174 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
175
176
177 **inputs**
178
179 </help>
180 </tool>
181