annotate gmap_build.xml @ 3:488e9d642566 draft

GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
author peterjc
date Wed, 28 Sep 2016 10:47:28 -0400
parents f6ba0f12cca2
children 14561eb803a5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
1 <tool id="gmap_build" name="GMAP Build" version="3.0.1">
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
2 <description>a database genome index for GMAP and GSNAP</description>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
3 <requirements>
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
4 <requirement type="package" version="2013-05-09">gmap</requirement>
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
5 </requirements>
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
6 <version_command>gmap --version</version_command>
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
7 <command interpreter="command"> /bin/bash $shscript > $output </command>
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
8 <configfiles>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
9 <configfile name="shscript">
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
10 #!/bin/bash
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
11 #set $ds = chr(36)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
12 #set $gt = chr(62)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
13 #set $lt = chr(60)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
14 #set $ad = chr(38)
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
15 ## #set $ref_files = ''
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
16 ## #for $i in $inputs:
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
17 ## #set $ref_files = $ref_files $i.input
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
18 ## #end for
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
19 ## echo $ref_files
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
20 #set circular = ""
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
21 #if $circular_chroms.__str__.strip() != '':
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
22 #set circular = ('').join([' -c ','"', $circular_chroms.__str__,'"'])
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
23 #end if
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
24 #import os.path
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
25 #set $gmapdb = $output.extra_files_path
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
26 #set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
27 mkdir -p $gmapdb
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
28 ## export GMAPDB required for cmetindex and atoiindex
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
29 export GMAPDB=$gmapdb
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
30 #if $kmer:
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
31 #for $k in $kmer.__str__.split(','):
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
32 gmap_build -D $gmapdb -d $refname -s $sort $circular -k $k #for i in $inputs# ${i.input}#end for#
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
33 #end for
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
34 #else:
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
35 gmap_build -D $gmapdb -d $refname -s $sort $circular #for i in $inputs# ${i.input}#end for#
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
36 #end if
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
37 get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /'
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
38 echo "kmers: " $kmer
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
39 #if $splicesite.splice_source == 'refGeneTable':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
40 #if $splicesite.refGenes.__str__ != 'None':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
41 cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'splicesites')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
42 cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'introns')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
43 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
44 #elif $splicesite.splice_source == 'gtf':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
45 #if $splicesite.gtfGenes.__str__ != 'None':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
46 cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
47 cat $splicesite.gtfGenes | gtf_introns | iit_store -o $os.path.join($mapsdir,'introns')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
48 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
49 #elif $splicesite.splice_source == 'gff3':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
50 #if $splicesite.gff3Genes.__str__ != 'None':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
51 cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
52 cat $splicesite.gff3Genes | gff3_introns | iit_store -o $os.path.join($mapsdir,'introns')
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
53 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
54 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
55 #if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None':
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
56 #if $dbsnp.snp_source == 'snpTable':
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
57 #if $dbsnp.snpsex.__str__ != 'None':
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
58 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $os.path.join($mapsdir,'snps')
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
59 #else:
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
60 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $os.path.join($mapsdir,'snps')
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
61 #end if
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
62 #elif $dbsnp.snp_source == 'vcfFile':
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
63 #if $dbsnp.vcf_version and len($dbsnp.vcf_version.__str__.strip()) > 0:
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
64 cat $dbsnp.snps | vcf_iit -v $dbsnp.vcf_version.__str__.strip() | iit_store -o $os.path.join($mapsdir,'snps')
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
65 #else:
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
66 cat $dbsnp.snps | vcf_iit | iit_store -o $os.path.join($mapsdir,'snps')
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
67 #end if
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
68 #else:
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
69 cat $dbsnp.snps | iit_store -o $os.path.join($mapsdir,'snps')
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
70 #end if
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
71 snpindex -d $refname -v snps
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
72 echo "snpindex" -d $refname -v snps
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
73 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
74 #if $cmetindex.__str__ == 'yes':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
75 cmetindex -d $refname
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
76 echo "cmetindex" -d $refname
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
77 #end if
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
78 #if $atoiindex.__str__ == 'yes':
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
79 atoiindex -d $refname
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
80 echo "atoiindex" -d $refname
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
81 #end if
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
82 get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /'
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
83 </configfile>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
84 </configfiles>
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
85 <inputs>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
86 <!-- Name for this gmapdb -->
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
87 <param name="refname" type="text" label="Name you want to give this gmap database" help="">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
88 <validator type="empty_field" message="A database name is required."/>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
89 </param>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
90 <!-- Input data -->
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
91 <repeat name="inputs" title="Reference Sequence" min="1">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
92 <param name="input" type="data" format="fasta" label="reference sequence fasta" />
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
93 </repeat>
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
94
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
95 <param name="circular_chroms" type="text" value="" optional="true" label="Names of circular chromosomes"
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
96 help="a list of chromosomes, separated by commas, allow GSNAP and GMAP to align reads across the ends of the chromosome">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
97 </param>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
98
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
99 <param name="sort" type="select" label="Sort chromosomes" help="">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
100 <option value="none">none - use chromosomes as found in FASTA file(s)</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
101 <option value="alpha">alpha - sort chromosomes alphabetically (chr10 before chr 1)</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
102 <option value="numeric-alpha">numeric-alpha - chr1, chr1U, chr2, chrM, chrU, chrX, chrY</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
103 <option value="chrom">chrom - chr1, chr2, chrM, chrX, chrY, chr1U, chrU</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
104 </param>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
105
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
106 <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
107 <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
108 <conditional name="splicesite">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
109 <param name="splice_source" type="select" label="Add splice and intron info from" >
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
110 <option value="none"></option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
111 <option value="refGeneTable">refGenes table from UCSC table browser</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
112 <option value="gtf">GTF</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
113 <option value="gff3">GFF3</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
114 </param>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
115 <when value="none"/>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
116 <when value="refGeneTable">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
117 <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
118 <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)"
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
119 help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
120 <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
121 </param>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
122
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
123 </when>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
124 <when value="gtf">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
125 <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" />
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
126 </when>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
127 <when value="gff3">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
128 <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" />
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
129 </when>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
130 </conditional>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
131 <conditional name="dbsnp">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
132 <param name="snp_source" type="select" label="Add SNP info from" >
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
133 <option value="none"></option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
134 <option value="snpTable">UCSC SNP Table</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
135 <option value="snpFile">GMAP SNP File</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
136 <option value="vcfFile">VCF File</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
137 </param>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
138 <when value="none"/>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
139 <when value="snpTable">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
140 <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
141 <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
142 <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
143 <option value="1" selected="true">1 (High)</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
144 <option value="2">2 (Medium)</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
145 <option value="3">3 (All)</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
146 </param>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
147 </when>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
148 <when value="snpFile">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
149 <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file"
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
150 help="Format (3 columns):
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
151 &lt;br&gt;>rs62211261 21:14379270 CG
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
152 &lt;br&gt;>rs62211262 21:14379281 CG
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
153 &lt;br&gt;Each line must start with a &gt; character, then be followed by an
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
154 identifier (which may have duplicates). Then there should be the
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
155 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
156 the first character of a chromosome is number 1.) Finally, there
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
157 should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
158 &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
159 If the one of these two letters does not match the allele in the reference
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
160 sequence, that SNP will be ignored in subsequent processing as a probable error.
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
161 The N stands for any other allele." />
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
162 </when>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
163 <when value="vcfFile">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
164 <param name="snps" type="data" format="vcf" optional="true" label="VCF SNPs file"
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
165 help="Example: ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/00-All.vcf.gz
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
166 The VCF file contains multiple versions of dbSNP, so if you want a
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
167 particular version, such as 135. The vcf_iit program tries to pick
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
168 a subset of SNPs that somewhat parallel
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
169 the ones without exceptions in the UCSC dbSNP file. It keeps all SNPs
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
170 that have been validated (marked in the VCF file as &#34;VLD&#34;) or have a
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
171 submitter link-out (&#34;SLO&#34;). Otherwise, it excludes SNPs that are
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
172 individual genotypes (&#34;GNO&#34;). If none of these conditions hold, then
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
173 the SNP is allowed. "/>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
174 <param name="vcf_version" type="text" value="" optional="true" label="dbSNP version"
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
175 help="The VCF file contains multiple versions of dbSNP, so if you want a particular version, such as 135"/>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
176 </when>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
177 </conditional>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
178
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
179 <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="Use smaller values when building indexes on machines with limited RAM">
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
180 <option value="12">12 (64MB RAM)</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
181 <option value="13">13 (256MB RAM)</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
182 <option value="14">14 (1GB RAM)</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
183 <option value="15" selected="true">15 (4GB RAM)</option>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
184 </param>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
185 </inputs>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
186 <stdio>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
187 <exit_code range="1" level="fatal" description="Error running gmap_build" />
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
188 </stdio>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
189 <outputs>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
190 <!--
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
191 <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
192 -->
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
193 <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" />
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
194 </outputs>
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
195 <tests>
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
196 </tests>
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
197 <help>
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
198 **GMAP Build**
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
199
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
200 GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). (GMAP Build uses GMAP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.)
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
201
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
202 You will want to read the README_
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
203
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
204 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
205
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
206 .. _GMAP: http://research-pub.gene.com/gmap/
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
207 .. _GSNAP: http://research-pub.gene.com/gmap/
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
208 .. _README: http://research-pub.gene.com/gmap/src/README
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
209 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
210
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
211
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
212 **circular chromosomes**
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
213
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
214 Finally, you can provide information to gmap_build that certain
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
215 chromosomes are circular, with the -c or -\-circular flag. The value
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
216 for these flags is a list of chromosomes, separated by commas. The
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
217 gmap_build program will then allow GSNAP and GMAP to align reads
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
218 across the ends of the chromosome. For example, the mitochondrial
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
219 genome in human beings is circular.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
220
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
221
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
222 **Detecting known and novel splice sites in GSNAP**
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
223
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
224 GSNAP can detect splice junctions in individual reads.
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
225 GSNAP allows for known splicing at two levels: at the level of known
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
226 splice sites and at the level of known introns. At the site level,
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
227 GSNAP finds splicing between arbitrary combinations of donor and
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
228 acceptor splice sites, meaning that it can find alternative splicing
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
229 events. At the intron level, GSNAP finds splicing only between the
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
230 set of given donor-acceptor pairs, so it is constrained not to find
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
231 alternative splicing events, only introns included in the given list.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
232 For most purposes, I would recommend using known splice sites, rather
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
233 than known introns, unless you are certain that all alternative
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
234 splicing events are known are represented in your file.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
235
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
236 Splice site files can be generated from a GTF file
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
237 or from refGenes table from UCSC.
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
238
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
239
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
240 **SNP-tolerant alignment in GSNAP**
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
241
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
242 GSNAP has the ability to align to a reference space of all possible
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
243 major and minor alleles in a set of known SNPs provided by the user.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
244
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
245
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
246 Process known SNP data, either from older dbSNP files or from newer
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
247 files in VCF format. The older dbSNP files can be obtained from UCSC,
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
248 either from the Galaxy UCSC table browser or downloaded:
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
249
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
250 ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
251
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
252 For versions before snp132, you may also want to exclude exceptions,
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
253 which will require this file:
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
254
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
255 ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
256
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
257 The option "-w weight" makes use of the dbSNP item weight, a value
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
258 from 1 to 3, where lower weight means higher confidence. Items will
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
259 be included if the item weight is the given value weight or less.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
260 The default value of -w is 1, which is the criterion UCSC uses to
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
261 build its ambiguous version of the genome. To allow all item weights,
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
262 specify "-w 3".
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
263
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
264 The more recent SNP data are provided in VCF format, and can be
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
265 retrieved like this:
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
266
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
267 ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/00-All.vcf.gz
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
268
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
269 The VCF file contains multiple versions of dbSNP, so if you want a
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
270 particular version, such as 135, you would use the flag "-v 135". The
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
271 vcf_iit program tries to pick a subset of SNPs that somewhat parallel
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
272 the ones without exceptions in the UCSC dbSNP file. It keeps all SNPs
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
273 that have been validated (marked in the VCF file as "VLD") or have a
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
274 submitter link-out ("SLO"). Otherwise, it excludes SNPs that are
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
275 individual genotypes ("GNO"). If none of these conditions hold, then
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
276 the SNP is allowed. These rules might not be the best ones; I made
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
277 them up by trying to compare version 135 of the VCF data with
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
278 version 135 of the UCSC dbSNP data.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
279
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
280 **Alignment of reads from bisulfite-treated DNA in GSNAP**
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
281
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
282 GSNAP has the ability to align reads from bisulfite-treated DNA, which
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
283 converts unmethylated cytosines to uracils that appear as thymines in
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
284 reads. GSNAP is able to identify genomic-T to read-C mismatches,
2
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
285 if a cmetindex is generated.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
286
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
287 **RNA-editing tolerance in GSNAP**
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
288
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
289 Just as GSNAP has a program cmetindex and a mode called "cmet" for
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
290 tolerance to C-to-T changes, it can be tolerant to A-to-G changes
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
291 using the program atoiindex and a mode called "atoi". This mode is
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
292 designed to facilitate alignments that are tolerant to RNA editing
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
293 where A's are converted to I's, which appear as G's to a sequencer.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
294
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
295 To process reads under RNA-editing tolerance, you will first need to
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
296 create th atoi index.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
297
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
298
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
299
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
300 **K-mer size**
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
301
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
302 You can control the k-mer size
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
303 for the genomic index with the -k flag, which can range from 12 to 15.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
304 The default value for -k is 15, but this requires your machine to have
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
305 4 GB of RAM to build the indices. If you do not have 4 GB of RAM,
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
306 then you will need to reduce the value of -k or find another machine.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
307 Here are the RAM requirements for building various indices::
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
308
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
309 k-mer of 12: 64 MB
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
310 k-mer of 13: 256 MB
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
311 k-mer of 14: 1 GB
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
312 k-mer of 15: 4 GB
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
313
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
314 These are the RAM requirements for building indices, but not to run
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
315 the GMAP/GSNAP programs once the indices are built, because the
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
316 genomic indices are compressed. For example, the genomic index for a
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
317 k-mer of 15 gives a gammaptrs file of 64 MB and an offsetscomp file of
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
318 about 350 MB, much smaller than the 4 GB that would otherwise be
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
319 required. Therefore, you may want to build your genomic index on a
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
320 computer with sufficient RAM, and distribute that index to be used by
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
321 computers with less RAM.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
322
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
323 The amount of compression can be controlled using the -b or -\-basesize
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
324 parameter to gmap_build. By default, the value for k-mer size is 15,
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
325 and the value for basesize is 12. If you select a different value for
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
326 k-mer size, then basesize is made by default to be equal to that k-mer
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
327 size.
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
328
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
329 If you want to build your genomic databases with more than one k-mer
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
330 size, you can re-run gmap_build with different values of -k. This
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
331 will overwrite only the identical files from the previous runs. You
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
332 can then choose the k-mer size at run-time by using the -k flag for
f6ba0f12cca2 Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents: 1
diff changeset
333 either GMAP or GSNAP.
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
334 </help>
3
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
335 <citations>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
336 <citation type="doi">10.1093/bioinformatics/bti310</citation>
488e9d642566 GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents: 2
diff changeset
337 </citations>
0
10e3476429b5 Uploaded
jjohnson
parents:
diff changeset
338 </tool>