0
|
1 <tool id="gmap_build" name="GMAP Build" version="2.0.0">
|
|
2 <description>a database genome index for GMAP and GSNAP</description>
|
|
3 <requirements>
|
|
4 <requirement type="binary">gmap_build</requirement>
|
|
5 </requirements>
|
|
6 <version_string>gmap --version</version_string>
|
|
7 <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
|
|
8 <inputs>
|
|
9 <!-- Name for this gmapdb -->
|
|
10 <param name="refname" type="text" label="Name you want to give this gmap database" help="">
|
|
11 <validator type="empty_field" message="A database name is required."/>
|
|
12 </param>
|
|
13 <!-- Input data -->
|
|
14 <repeat name="inputs" title="Reference Sequence" min="1">
|
|
15 <param name="input" type="data" format="fasta" label="reference sequence fasta" />
|
|
16 </repeat>
|
|
17
|
|
18 <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="">
|
|
19 <option value="12">12</option>
|
|
20 <option value="13">13</option>
|
|
21 <option value="14">14</option>
|
|
22 <option value="15" selected="true">15</option>
|
|
23 </param>
|
|
24 <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/>
|
|
25 <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/>
|
|
26 <conditional name="splicesite">
|
|
27 <param name="splice_source" type="select" label="Add splice and intron info from" >
|
|
28 <option value="none"></option>
|
|
29 <option value="refGeneTable">refGenes table from UCSC table browser</option>
|
|
30 <option value="gtf">GTF</option>
|
|
31 <option value="gff3">GFF3</option>
|
|
32 </param>
|
|
33 <when value="none"/>
|
|
34 <when value="refGeneTable">
|
|
35 <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
|
|
36 <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)"
|
|
37 help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
|
|
38 <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
|
|
39 </param>
|
|
40
|
|
41 </when>
|
|
42 <when value="gtf">
|
|
43 <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" />
|
|
44 </when>
|
|
45 <when value="gff3">
|
|
46 <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" />
|
|
47 </when>
|
|
48 </conditional>
|
|
49 <conditional name="dbsnp">
|
|
50 <param name="snp_source" type="select" label="Add SNP info from" >
|
|
51 <option value="none"></option>
|
|
52 <option value="snpTable">UCSC SNP Table</option>
|
|
53 <option value="snpFile">GMAP SNP File</option>
|
|
54 </param>
|
|
55 <when value="none"/>
|
|
56 <when value="snpTable">
|
|
57 <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
|
|
58 <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
|
|
59 <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
|
|
60 <option value="1" selected="true">1 (High)</option>
|
|
61 <option value="2">2 (Medium)</option>
|
|
62 <option value="3">3 (All)</option>
|
|
63 </param>
|
|
64 </when>
|
|
65 <when value="snpFile">
|
|
66 <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file"
|
|
67 help="Format (3 columns):
|
|
68 <br>>rs62211261 21:14379270 CG
|
|
69 <br>>rs62211262 21:14379281 CG
|
|
70 <br>Each line must start with a > character, then be followed by an
|
|
71 identifier (which may have duplicates). Then there should be the
|
|
72 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so
|
|
73 the first character of a chromosome is number 1.) Finally, there
|
|
74 should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
|
|
75 <br>These alleles must correspond to the possible nucleotides on the plus strand of the genome.
|
|
76 If the one of these two letters does not match the allele in the reference
|
|
77 sequence, that SNP will be ignored in subsequent processing as a probable error.
|
|
78 The N stands for any other allele." />
|
|
79 </when>
|
|
80 </conditional>
|
|
81 </inputs>
|
|
82 <outputs>
|
|
83 <!--
|
|
84 <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
|
|
85 -->
|
|
86 <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" />
|
|
87 </outputs>
|
|
88 <configfiles>
|
|
89 <configfile name="shscript">
|
|
90 #!/bin/bash
|
|
91 #set $ds = chr(36)
|
|
92 #set $gt = chr(62)
|
|
93 #set $lt = chr(60)
|
|
94 #set $ad = chr(38)
|
|
95 ## #set $ref_files = ''
|
|
96 ## #for $i in $inputs:
|
|
97 ## #set $ref_files = $ref_files $i.input
|
|
98 ## #end for
|
|
99 ## echo $ref_files
|
|
100 #import os.path
|
|
101 #set $gmapdb = $output.extra_files_path
|
|
102 #set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
|
|
103 mkdir -p $gmapdb
|
|
104 ## export GMAPDB required for cmetindex and atoiindex
|
|
105 export GMAPDB=$gmapdb
|
|
106 #for $k in $kmer.__str__.split(','):
|
|
107 gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k #for i in $inputs# ${i.input}#end for#
|
|
108 #end for
|
|
109 get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /'
|
|
110 echo "kmers: " $kmer
|
|
111 #if $splicesite.splice_source == 'refGeneTable':
|
|
112 #if $splicesite.refGenes.__str__ != 'None':
|
|
113 cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'splicesites')
|
|
114 cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'introns')
|
|
115 #end if
|
|
116 #elif $splicesite.splice_source == 'gtf':
|
|
117 #if $splicesite.gtfGenes.__str__ != 'None':
|
|
118 cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
|
|
119 cat $splicesite.gtfGenes | gtf_introns | iit_store -o $os.path.join($mapsdir,'introns')
|
|
120 #end if
|
|
121 #elif $splicesite.splice_source == 'gff3':
|
|
122 #if $splicesite.gff3Genes.__str__ != 'None':
|
|
123 cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
|
|
124 cat $splicesite.gff3Genes | gff3_introns | iit_store -o $os.path.join($mapsdir,'introns')
|
|
125 #end if
|
|
126 #end if
|
|
127 #if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None':
|
|
128 #if $dbsnp.snp_source == 'snpTable':
|
|
129 #if $dbsnp.snpsex.__str__ != 'None':
|
|
130 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $os.path.join($mapsdir,'snps')
|
|
131 #else:
|
|
132 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $os.path.join($mapsdir,'snps')
|
|
133 #end if
|
|
134 #else:
|
|
135 cat $dbsnp.snps | iit_store -o $os.path.join($mapsdir,'snps')
|
|
136 #end if
|
|
137 snpindex -d $refname -v snps
|
|
138 echo "snpindex" -d $refname -v snps
|
|
139 #end if
|
|
140 #if $cmetindex.__str__ == 'yes':
|
|
141 cmetindex -d $refname
|
|
142 echo "cmetindex" -d $refname
|
|
143 #end if
|
|
144 #if $atoiindex.__str__ == 'yes':
|
|
145 atoiindex -d $refname
|
|
146 echo "atoiindex" -d $refname
|
|
147 #end if
|
|
148 get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /'
|
|
149 </configfile>
|
|
150 </configfiles>
|
|
151
|
|
152 <tests>
|
|
153 </tests>
|
|
154
|
|
155 <help>
|
|
156
|
|
157
|
|
158 **GMAP Build**
|
|
159
|
|
160 GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). (GMAP Build uses GMSP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.)
|
|
161
|
|
162 You will want to read the README_
|
|
163
|
|
164 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
|
|
165
|
|
166 .. _GMAP: http://research-pub.gene.com/gmap/
|
|
167 .. _GSNAP: http://research-pub.gene.com/gmap/
|
|
168 .. _README: http://research-pub.gene.com/gmap/src/README
|
|
169 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
|
|
170
|
|
171
|
|
172 </help>
|
|
173 </tool>
|
|
174
|