comparison gmap_build.xml @ 0:10e3476429b5 draft

Uploaded
author jjohnson
date Fri, 05 Oct 2012 13:51:49 -0400
parents
children 74391fc6e3f2
comparison
equal deleted inserted replaced
-1:000000000000 0:10e3476429b5
1 <tool id="gmap_build" name="GMAP Build" version="2.0.0">
2 <description>a database genome index for GMAP and GSNAP</description>
3 <requirements>
4 <requirement type="binary">gmap_build</requirement>
5 </requirements>
6 <version_string>gmap --version</version_string>
7 <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
8 <inputs>
9 <!-- Name for this gmapdb -->
10 <param name="refname" type="text" label="Name you want to give this gmap database" help="">
11 <validator type="empty_field" message="A database name is required."/>
12 </param>
13 <!-- Input data -->
14 <repeat name="inputs" title="Reference Sequence" min="1">
15 <param name="input" type="data" format="fasta" label="reference sequence fasta" />
16 </repeat>
17
18 <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="">
19 <option value="12">12</option>
20 <option value="13">13</option>
21 <option value="14">14</option>
22 <option value="15" selected="true">15</option>
23 </param>
24 <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/>
25 <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/>
26 <conditional name="splicesite">
27 <param name="splice_source" type="select" label="Add splice and intron info from" >
28 <option value="none"></option>
29 <option value="refGeneTable">refGenes table from UCSC table browser</option>
30 <option value="gtf">GTF</option>
31 <option value="gff3">GFF3</option>
32 </param>
33 <when value="none"/>
34 <when value="refGeneTable">
35 <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
36 <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)"
37 help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
38 <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
39 </param>
40
41 </when>
42 <when value="gtf">
43 <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" />
44 </when>
45 <when value="gff3">
46 <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" />
47 </when>
48 </conditional>
49 <conditional name="dbsnp">
50 <param name="snp_source" type="select" label="Add SNP info from" >
51 <option value="none"></option>
52 <option value="snpTable">UCSC SNP Table</option>
53 <option value="snpFile">GMAP SNP File</option>
54 </param>
55 <when value="none"/>
56 <when value="snpTable">
57 <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
58 <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
59 <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
60 <option value="1" selected="true">1 (High)</option>
61 <option value="2">2 (Medium)</option>
62 <option value="3">3 (All)</option>
63 </param>
64 </when>
65 <when value="snpFile">
66 <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file"
67 help="Format (3 columns):
68 &lt;br&gt;>rs62211261 21:14379270 CG
69 &lt;br&gt;>rs62211262 21:14379281 CG
70 &lt;br&gt;Each line must start with a &gt; character, then be followed by an
71 identifier (which may have duplicates). Then there should be the
72 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so
73 the first character of a chromosome is number 1.) Finally, there
74 should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
75 &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.
76 If the one of these two letters does not match the allele in the reference
77 sequence, that SNP will be ignored in subsequent processing as a probable error.
78 The N stands for any other allele." />
79 </when>
80 </conditional>
81 </inputs>
82 <outputs>
83 <!--
84 <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
85 -->
86 <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" />
87 </outputs>
88 <configfiles>
89 <configfile name="shscript">
90 #!/bin/bash
91 #set $ds = chr(36)
92 #set $gt = chr(62)
93 #set $lt = chr(60)
94 #set $ad = chr(38)
95 ## #set $ref_files = ''
96 ## #for $i in $inputs:
97 ## #set $ref_files = $ref_files $i.input
98 ## #end for
99 ## echo $ref_files
100 #import os.path
101 #set $gmapdb = $output.extra_files_path
102 #set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
103 mkdir -p $gmapdb
104 ## export GMAPDB required for cmetindex and atoiindex
105 export GMAPDB=$gmapdb
106 #for $k in $kmer.__str__.split(','):
107 gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k #for i in $inputs# ${i.input}#end for#
108 #end for
109 get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /'
110 echo "kmers: " $kmer
111 #if $splicesite.splice_source == 'refGeneTable':
112 #if $splicesite.refGenes.__str__ != 'None':
113 cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'splicesites')
114 cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'introns')
115 #end if
116 #elif $splicesite.splice_source == 'gtf':
117 #if $splicesite.gtfGenes.__str__ != 'None':
118 cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
119 cat $splicesite.gtfGenes | gtf_introns | iit_store -o $os.path.join($mapsdir,'introns')
120 #end if
121 #elif $splicesite.splice_source == 'gff3':
122 #if $splicesite.gff3Genes.__str__ != 'None':
123 cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
124 cat $splicesite.gff3Genes | gff3_introns | iit_store -o $os.path.join($mapsdir,'introns')
125 #end if
126 #end if
127 #if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None':
128 #if $dbsnp.snp_source == 'snpTable':
129 #if $dbsnp.snpsex.__str__ != 'None':
130 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $os.path.join($mapsdir,'snps')
131 #else:
132 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $os.path.join($mapsdir,'snps')
133 #end if
134 #else:
135 cat $dbsnp.snps | iit_store -o $os.path.join($mapsdir,'snps')
136 #end if
137 snpindex -d $refname -v snps
138 echo "snpindex" -d $refname -v snps
139 #end if
140 #if $cmetindex.__str__ == 'yes':
141 cmetindex -d $refname
142 echo "cmetindex" -d $refname
143 #end if
144 #if $atoiindex.__str__ == 'yes':
145 atoiindex -d $refname
146 echo "atoiindex" -d $refname
147 #end if
148 get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /'
149 </configfile>
150 </configfiles>
151
152 <tests>
153 </tests>
154
155 <help>
156
157
158 **GMAP Build**
159
160 GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). (GMAP Build uses GMSP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.)
161
162 You will want to read the README_
163
164 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
165
166 .. _GMAP: http://research-pub.gene.com/gmap/
167 .. _GSNAP: http://research-pub.gene.com/gmap/
168 .. _README: http://research-pub.gene.com/gmap/src/README
169 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
170
171
172 </help>
173 </tool>
174