annotate gmap_build.xml @ 0:10e3476429b5 draft

Uploaded
author jjohnson
date Fri, 05 Oct 2012 13:51:49 -0400
parents
children 74391fc6e3f2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
1 <tool id="gmap_build" name="GMAP Build" version="2.0.0">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
2 <description>a database genome index for GMAP and GSNAP</description>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
3 <requirements>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
4 <requirement type="binary">gmap_build</requirement>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
5 </requirements>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
6 <version_string>gmap --version</version_string>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
7 <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
8 <inputs>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
9 <!-- Name for this gmapdb -->
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
10 <param name="refname" type="text" label="Name you want to give this gmap database" help="">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
11 <validator type="empty_field" message="A database name is required."/>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
12 </param>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
13 <!-- Input data -->
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
14 <repeat name="inputs" title="Reference Sequence" min="1">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
15 <param name="input" type="data" format="fasta" label="reference sequence fasta" />
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
16 </repeat>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
17
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
18 <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
19 <option value="12">12</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
20 <option value="13">13</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
21 <option value="14">14</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
22 <option value="15" selected="true">15</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
23 </param>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
24 <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
25 <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
26 <conditional name="splicesite">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
27 <param name="splice_source" type="select" label="Add splice and intron info from" >
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
28 <option value="none"></option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
29 <option value="refGeneTable">refGenes table from UCSC table browser</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
30 <option value="gtf">GTF</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
31 <option value="gff3">GFF3</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
32 </param>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
33 <when value="none"/>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
34 <when value="refGeneTable">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
35 <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
36 <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)"
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
37 help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
38 <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
39 </param>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
40
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
41 </when>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
42 <when value="gtf">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
43 <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" />
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
44 </when>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
45 <when value="gff3">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
46 <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" />
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
47 </when>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
48 </conditional>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
49 <conditional name="dbsnp">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
50 <param name="snp_source" type="select" label="Add SNP info from" >
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
51 <option value="none"></option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
52 <option value="snpTable">UCSC SNP Table</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
53 <option value="snpFile">GMAP SNP File</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
54 </param>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
55 <when value="none"/>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
56 <when value="snpTable">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
57 <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
58 <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
59 <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
60 <option value="1" selected="true">1 (High)</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
61 <option value="2">2 (Medium)</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
62 <option value="3">3 (All)</option>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
63 </param>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
64 </when>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
65 <when value="snpFile">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
66 <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file"
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
67 help="Format (3 columns):
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
68 &lt;br&gt;>rs62211261 21:14379270 CG
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
69 &lt;br&gt;>rs62211262 21:14379281 CG
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
70 &lt;br&gt;Each line must start with a &gt; character, then be followed by an
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
71 identifier (which may have duplicates). Then there should be the
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
72 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
73 the first character of a chromosome is number 1.) Finally, there
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
74 should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
75 &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
76 If the one of these two letters does not match the allele in the reference
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
77 sequence, that SNP will be ignored in subsequent processing as a probable error.
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
78 The N stands for any other allele." />
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
79 </when>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
80 </conditional>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
81 </inputs>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
82 <outputs>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
83 <!--
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
84 <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
85 -->
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
86 <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" />
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
87 </outputs>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
88 <configfiles>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
89 <configfile name="shscript">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
90 #!/bin/bash
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
91 #set $ds = chr(36)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
92 #set $gt = chr(62)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
93 #set $lt = chr(60)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
94 #set $ad = chr(38)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
95 ## #set $ref_files = ''
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
96 ## #for $i in $inputs:
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
97 ## #set $ref_files = $ref_files $i.input
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
98 ## #end for
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
99 ## echo $ref_files
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
100 #import os.path
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
101 #set $gmapdb = $output.extra_files_path
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
102 #set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
103 mkdir -p $gmapdb
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
104 ## export GMAPDB required for cmetindex and atoiindex
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
105 export GMAPDB=$gmapdb
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
106 #for $k in $kmer.__str__.split(','):
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
107 gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k #for i in $inputs# ${i.input}#end for#
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
108 #end for
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
109 get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /'
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
110 echo "kmers: " $kmer
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
111 #if $splicesite.splice_source == 'refGeneTable':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
112 #if $splicesite.refGenes.__str__ != 'None':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
113 cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'splicesites')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
114 cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'introns')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
115 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
116 #elif $splicesite.splice_source == 'gtf':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
117 #if $splicesite.gtfGenes.__str__ != 'None':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
118 cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
119 cat $splicesite.gtfGenes | gtf_introns | iit_store -o $os.path.join($mapsdir,'introns')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
120 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
121 #elif $splicesite.splice_source == 'gff3':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
122 #if $splicesite.gff3Genes.__str__ != 'None':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
123 cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
124 cat $splicesite.gff3Genes | gff3_introns | iit_store -o $os.path.join($mapsdir,'introns')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
125 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
126 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
127 #if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
128 #if $dbsnp.snp_source == 'snpTable':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
129 #if $dbsnp.snpsex.__str__ != 'None':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
130 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $os.path.join($mapsdir,'snps')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
131 #else:
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
132 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $os.path.join($mapsdir,'snps')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
133 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
134 #else:
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
135 cat $dbsnp.snps | iit_store -o $os.path.join($mapsdir,'snps')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
136 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
137 snpindex -d $refname -v snps
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
138 echo "snpindex" -d $refname -v snps
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
139 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
140 #if $cmetindex.__str__ == 'yes':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
141 cmetindex -d $refname
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
142 echo "cmetindex" -d $refname
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
143 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
144 #if $atoiindex.__str__ == 'yes':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
145 atoiindex -d $refname
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
146 echo "atoiindex" -d $refname
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
147 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
148 get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /'
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
149 </configfile>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
150 </configfiles>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
151
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
152 <tests>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
153 </tests>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
154
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
155 <help>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
156
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
157
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
158 **GMAP Build**
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
159
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
160 GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). (GMAP Build uses GMSP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
161
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
162 You will want to read the README_
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
163
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
164 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
165
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
166 .. _GMAP: http://research-pub.gene.com/gmap/
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
167 .. _GSNAP: http://research-pub.gene.com/gmap/
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
168 .. _README: http://research-pub.gene.com/gmap/src/README
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
169 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
170
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
171
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
172 </help>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
173 </tool>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
174