Mercurial > repos > jjohnson > gmap
annotate gmap_build.xml @ 3:488e9d642566 draft
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
author | peterjc |
---|---|
date | Wed, 28 Sep 2016 10:47:28 -0400 |
parents | f6ba0f12cca2 |
children | 14561eb803a5 |
rev | line source |
---|---|
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
1 <tool id="gmap_build" name="GMAP Build" version="3.0.1"> |
0 | 2 <description>a database genome index for GMAP and GSNAP</description> |
3 <requirements> | |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
4 <requirement type="package" version="2013-05-09">gmap</requirement> |
0 | 5 </requirements> |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
6 <version_command>gmap --version</version_command> |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
7 <command interpreter="command"> /bin/bash $shscript > $output </command> |
0 | 8 <configfiles> |
9 <configfile name="shscript"> | |
10 #!/bin/bash | |
11 #set $ds = chr(36) | |
12 #set $gt = chr(62) | |
13 #set $lt = chr(60) | |
14 #set $ad = chr(38) | |
15 ## #set $ref_files = '' | |
16 ## #for $i in $inputs: | |
17 ## #set $ref_files = $ref_files $i.input | |
18 ## #end for | |
19 ## echo $ref_files | |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
20 #set circular = "" |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
21 #if $circular_chroms.__str__.strip() != '': |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
22 #set circular = ('').join([' -c ','"', $circular_chroms.__str__,'"']) |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
23 #end if |
0 | 24 #import os.path |
25 #set $gmapdb = $output.extra_files_path | |
26 #set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps') | |
27 mkdir -p $gmapdb | |
28 ## export GMAPDB required for cmetindex and atoiindex | |
29 export GMAPDB=$gmapdb | |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
30 #if $kmer: |
0 | 31 #for $k in $kmer.__str__.split(','): |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
32 gmap_build -D $gmapdb -d $refname -s $sort $circular -k $k #for i in $inputs# ${i.input}#end for# |
0 | 33 #end for |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
34 #else: |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
35 gmap_build -D $gmapdb -d $refname -s $sort $circular #for i in $inputs# ${i.input}#end for# |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
36 #end if |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
37 get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /' |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
38 echo "kmers: " $kmer |
0 | 39 #if $splicesite.splice_source == 'refGeneTable': |
40 #if $splicesite.refGenes.__str__ != 'None': | |
41 cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'splicesites') | |
42 cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'introns') | |
43 #end if | |
44 #elif $splicesite.splice_source == 'gtf': | |
45 #if $splicesite.gtfGenes.__str__ != 'None': | |
46 cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites') | |
47 cat $splicesite.gtfGenes | gtf_introns | iit_store -o $os.path.join($mapsdir,'introns') | |
48 #end if | |
49 #elif $splicesite.splice_source == 'gff3': | |
50 #if $splicesite.gff3Genes.__str__ != 'None': | |
51 cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites') | |
52 cat $splicesite.gff3Genes | gff3_introns | iit_store -o $os.path.join($mapsdir,'introns') | |
53 #end if | |
54 #end if | |
55 #if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None': | |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
56 #if $dbsnp.snp_source == 'snpTable': |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
57 #if $dbsnp.snpsex.__str__ != 'None': |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
58 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $os.path.join($mapsdir,'snps') |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
59 #else: |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
60 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $os.path.join($mapsdir,'snps') |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
61 #end if |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
62 #elif $dbsnp.snp_source == 'vcfFile': |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
63 #if $dbsnp.vcf_version and len($dbsnp.vcf_version.__str__.strip()) > 0: |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
64 cat $dbsnp.snps | vcf_iit -v $dbsnp.vcf_version.__str__.strip() | iit_store -o $os.path.join($mapsdir,'snps') |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
65 #else: |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
66 cat $dbsnp.snps | vcf_iit | iit_store -o $os.path.join($mapsdir,'snps') |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
67 #end if |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
68 #else: |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
69 cat $dbsnp.snps | iit_store -o $os.path.join($mapsdir,'snps') |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
70 #end if |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
71 snpindex -d $refname -v snps |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
72 echo "snpindex" -d $refname -v snps |
0 | 73 #end if |
74 #if $cmetindex.__str__ == 'yes': | |
75 cmetindex -d $refname | |
76 echo "cmetindex" -d $refname | |
77 #end if | |
78 #if $atoiindex.__str__ == 'yes': | |
79 atoiindex -d $refname | |
80 echo "atoiindex" -d $refname | |
81 #end if | |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
82 get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /' |
0 | 83 </configfile> |
84 </configfiles> | |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
85 <inputs> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
86 <!-- Name for this gmapdb --> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
87 <param name="refname" type="text" label="Name you want to give this gmap database" help=""> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
88 <validator type="empty_field" message="A database name is required."/> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
89 </param> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
90 <!-- Input data --> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
91 <repeat name="inputs" title="Reference Sequence" min="1"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
92 <param name="input" type="data" format="fasta" label="reference sequence fasta" /> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
93 </repeat> |
0 | 94 |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
95 <param name="circular_chroms" type="text" value="" optional="true" label="Names of circular chromosomes" |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
96 help="a list of chromosomes, separated by commas, allow GSNAP and GMAP to align reads across the ends of the chromosome"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
97 </param> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
98 |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
99 <param name="sort" type="select" label="Sort chromosomes" help=""> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
100 <option value="none">none - use chromosomes as found in FASTA file(s)</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
101 <option value="alpha">alpha - sort chromosomes alphabetically (chr10 before chr 1)</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
102 <option value="numeric-alpha">numeric-alpha - chr1, chr1U, chr2, chrM, chrU, chrX, chrY</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
103 <option value="chrom">chrom - chr1, chr2, chrM, chrX, chrY, chr1U, chrU</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
104 </param> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
105 |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
106 <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
107 <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
108 <conditional name="splicesite"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
109 <param name="splice_source" type="select" label="Add splice and intron info from" > |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
110 <option value="none"></option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
111 <option value="refGeneTable">refGenes table from UCSC table browser</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
112 <option value="gtf">GTF</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
113 <option value="gff3">GFF3</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
114 </param> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
115 <when value="none"/> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
116 <when value="refGeneTable"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
117 <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" /> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
118 <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)" |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
119 help="Note that alignment tracks in UCSC sometimes have an extra column on the left."> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
120 <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
121 </param> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
122 |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
123 </when> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
124 <when value="gtf"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
125 <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" /> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
126 </when> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
127 <when value="gff3"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
128 <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" /> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
129 </when> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
130 </conditional> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
131 <conditional name="dbsnp"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
132 <param name="snp_source" type="select" label="Add SNP info from" > |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
133 <option value="none"></option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
134 <option value="snpTable">UCSC SNP Table</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
135 <option value="snpFile">GMAP SNP File</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
136 <option value="vcfFile">VCF File</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
137 </param> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
138 <when value="none"/> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
139 <when value="snpTable"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
140 <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" /> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
141 <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" /> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
142 <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help=""> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
143 <option value="1" selected="true">1 (High)</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
144 <option value="2">2 (Medium)</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
145 <option value="3">3 (All)</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
146 </param> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
147 </when> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
148 <when value="snpFile"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
149 <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file" |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
150 help="Format (3 columns): |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
151 <br>>rs62211261 21:14379270 CG |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
152 <br>>rs62211262 21:14379281 CG |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
153 <br>Each line must start with a > character, then be followed by an |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
154 identifier (which may have duplicates). Then there should be the |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
155 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
156 the first character of a chromosome is number 1.) Finally, there |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
157 should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN) |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
158 <br>These alleles must correspond to the possible nucleotides on the plus strand of the genome. |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
159 If the one of these two letters does not match the allele in the reference |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
160 sequence, that SNP will be ignored in subsequent processing as a probable error. |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
161 The N stands for any other allele." /> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
162 </when> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
163 <when value="vcfFile"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
164 <param name="snps" type="data" format="vcf" optional="true" label="VCF SNPs file" |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
165 help="Example: ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/00-All.vcf.gz |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
166 The VCF file contains multiple versions of dbSNP, so if you want a |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
167 particular version, such as 135. The vcf_iit program tries to pick |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
168 a subset of SNPs that somewhat parallel |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
169 the ones without exceptions in the UCSC dbSNP file. It keeps all SNPs |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
170 that have been validated (marked in the VCF file as "VLD") or have a |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
171 submitter link-out ("SLO"). Otherwise, it excludes SNPs that are |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
172 individual genotypes ("GNO"). If none of these conditions hold, then |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
173 the SNP is allowed. "/> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
174 <param name="vcf_version" type="text" value="" optional="true" label="dbSNP version" |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
175 help="The VCF file contains multiple versions of dbSNP, so if you want a particular version, such as 135"/> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
176 </when> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
177 </conditional> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
178 |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
179 <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="Use smaller values when building indexes on machines with limited RAM"> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
180 <option value="12">12 (64MB RAM)</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
181 <option value="13">13 (256MB RAM)</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
182 <option value="14">14 (1GB RAM)</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
183 <option value="15" selected="true">15 (4GB RAM)</option> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
184 </param> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
185 </inputs> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
186 <stdio> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
187 <exit_code range="1" level="fatal" description="Error running gmap_build" /> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
188 </stdio> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
189 <outputs> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
190 <!-- |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
191 <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
192 --> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
193 <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" /> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
194 </outputs> |
0 | 195 <tests> |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
196 </tests> |
0 | 197 <help> |
198 **GMAP Build** | |
199 | |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
200 GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). (GMAP Build uses GMAP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.) |
0 | 201 |
202 You will want to read the README_ | |
203 | |
204 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310 | |
205 | |
206 .. _GMAP: http://research-pub.gene.com/gmap/ | |
207 .. _GSNAP: http://research-pub.gene.com/gmap/ | |
208 .. _README: http://research-pub.gene.com/gmap/src/README | |
209 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859 | |
210 | |
211 | |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
212 **circular chromosomes** |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
213 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
214 Finally, you can provide information to gmap_build that certain |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
215 chromosomes are circular, with the -c or -\-circular flag. The value |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
216 for these flags is a list of chromosomes, separated by commas. The |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
217 gmap_build program will then allow GSNAP and GMAP to align reads |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
218 across the ends of the chromosome. For example, the mitochondrial |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
219 genome in human beings is circular. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
220 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
221 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
222 **Detecting known and novel splice sites in GSNAP** |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
223 |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
224 GSNAP can detect splice junctions in individual reads. |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
225 GSNAP allows for known splicing at two levels: at the level of known |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
226 splice sites and at the level of known introns. At the site level, |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
227 GSNAP finds splicing between arbitrary combinations of donor and |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
228 acceptor splice sites, meaning that it can find alternative splicing |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
229 events. At the intron level, GSNAP finds splicing only between the |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
230 set of given donor-acceptor pairs, so it is constrained not to find |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
231 alternative splicing events, only introns included in the given list. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
232 For most purposes, I would recommend using known splice sites, rather |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
233 than known introns, unless you are certain that all alternative |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
234 splicing events are known are represented in your file. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
235 |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
236 Splice site files can be generated from a GTF file |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
237 or from refGenes table from UCSC. |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
238 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
239 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
240 **SNP-tolerant alignment in GSNAP** |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
241 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
242 GSNAP has the ability to align to a reference space of all possible |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
243 major and minor alleles in a set of known SNPs provided by the user. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
244 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
245 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
246 Process known SNP data, either from older dbSNP files or from newer |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
247 files in VCF format. The older dbSNP files can be obtained from UCSC, |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
248 either from the Galaxy UCSC table browser or downloaded: |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
249 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
250 ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
251 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
252 For versions before snp132, you may also want to exclude exceptions, |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
253 which will require this file: |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
254 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
255 ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
256 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
257 The option "-w weight" makes use of the dbSNP item weight, a value |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
258 from 1 to 3, where lower weight means higher confidence. Items will |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
259 be included if the item weight is the given value weight or less. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
260 The default value of -w is 1, which is the criterion UCSC uses to |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
261 build its ambiguous version of the genome. To allow all item weights, |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
262 specify "-w 3". |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
263 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
264 The more recent SNP data are provided in VCF format, and can be |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
265 retrieved like this: |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
266 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
267 ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/00-All.vcf.gz |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
268 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
269 The VCF file contains multiple versions of dbSNP, so if you want a |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
270 particular version, such as 135, you would use the flag "-v 135". The |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
271 vcf_iit program tries to pick a subset of SNPs that somewhat parallel |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
272 the ones without exceptions in the UCSC dbSNP file. It keeps all SNPs |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
273 that have been validated (marked in the VCF file as "VLD") or have a |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
274 submitter link-out ("SLO"). Otherwise, it excludes SNPs that are |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
275 individual genotypes ("GNO"). If none of these conditions hold, then |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
276 the SNP is allowed. These rules might not be the best ones; I made |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
277 them up by trying to compare version 135 of the VCF data with |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
278 version 135 of the UCSC dbSNP data. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
279 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
280 **Alignment of reads from bisulfite-treated DNA in GSNAP** |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
281 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
282 GSNAP has the ability to align reads from bisulfite-treated DNA, which |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
283 converts unmethylated cytosines to uracils that appear as thymines in |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
284 reads. GSNAP is able to identify genomic-T to read-C mismatches, |
2
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
285 if a cmetindex is generated. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
286 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
287 **RNA-editing tolerance in GSNAP** |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
288 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
289 Just as GSNAP has a program cmetindex and a mode called "cmet" for |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
290 tolerance to C-to-T changes, it can be tolerant to A-to-G changes |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
291 using the program atoiindex and a mode called "atoi". This mode is |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
292 designed to facilitate alignments that are tolerant to RNA editing |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
293 where A's are converted to I's, which appear as G's to a sequencer. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
294 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
295 To process reads under RNA-editing tolerance, you will first need to |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
296 create th atoi index. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
297 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
298 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
299 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
300 **K-mer size** |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
301 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
302 You can control the k-mer size |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
303 for the genomic index with the -k flag, which can range from 12 to 15. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
304 The default value for -k is 15, but this requires your machine to have |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
305 4 GB of RAM to build the indices. If you do not have 4 GB of RAM, |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
306 then you will need to reduce the value of -k or find another machine. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
307 Here are the RAM requirements for building various indices:: |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
308 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
309 k-mer of 12: 64 MB |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
310 k-mer of 13: 256 MB |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
311 k-mer of 14: 1 GB |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
312 k-mer of 15: 4 GB |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
313 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
314 These are the RAM requirements for building indices, but not to run |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
315 the GMAP/GSNAP programs once the indices are built, because the |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
316 genomic indices are compressed. For example, the genomic index for a |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
317 k-mer of 15 gives a gammaptrs file of 64 MB and an offsetscomp file of |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
318 about 350 MB, much smaller than the 4 GB that would otherwise be |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
319 required. Therefore, you may want to build your genomic index on a |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
320 computer with sufficient RAM, and distribute that index to be used by |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
321 computers with less RAM. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
322 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
323 The amount of compression can be controlled using the -b or -\-basesize |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
324 parameter to gmap_build. By default, the value for k-mer size is 15, |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
325 and the value for basesize is 12. If you select a different value for |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
326 k-mer size, then basesize is made by default to be equal to that k-mer |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
327 size. |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
328 |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
329 If you want to build your genomic databases with more than one k-mer |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
330 size, you can re-run gmap_build with different values of -k. This |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
331 will overwrite only the identical files from the previous runs. You |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
332 can then choose the k-mer size at run-time by using the -k flag for |
f6ba0f12cca2
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
peterjc
parents:
1
diff
changeset
|
333 either GMAP or GSNAP. |
0 | 334 </help> |
3
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
335 <citations> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
336 <citation type="doi">10.1093/bioinformatics/bti310</citation> |
488e9d642566
GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
peterjc
parents:
2
diff
changeset
|
337 </citations> |
0 | 338 </tool> |