|
0
|
1 <tool id="rsem_prepare_reference" name="RSEM prepare reference" version="1.1.17">
|
|
|
2 <description></description>
|
|
|
3 <requirements>
|
|
|
4 <requirement type="package" version="1.2.28">rsem</requirement>
|
|
|
5 <requirement type="package" version="1.1.2">bowtie</requirement>
|
|
|
6 <requirement type="package" version="2.2.6">bowtie2</requirement>
|
|
|
7 </requirements>
|
|
|
8 <command>
|
|
|
9 echo $reference_name " " | tee $reference_file &&
|
|
|
10 mkdir $reference_file.files_path &&
|
|
|
11 cd $reference_file.files_path &&
|
|
|
12 rsem-prepare-reference
|
|
|
13 #if $polya.polya_use == 'add':
|
|
|
14 #if $polya.polya_length:
|
|
|
15 --polyA-length $polya.polya_length
|
|
|
16 #end if
|
|
|
17 #elif $polya.polya_use == 'subset':
|
|
|
18 --no-polyA-subset $polya.no_polya_subset
|
|
|
19 #if $polya.polya_length:
|
|
|
20 --polyA-length $polya.polya_length
|
|
|
21 #end if
|
|
|
22 #elif $polya.polya_use == 'none':
|
|
|
23 --no-polyA
|
|
|
24 #end if
|
|
|
25 $ntog
|
|
|
26 #if $transcript_to_gene_map:
|
|
|
27 --transcript-to-gene-map $transcript_to_gene_map
|
|
|
28 #end if
|
|
|
29 #if $reference.ref_type == 'transcripts':
|
|
|
30 $reference.reference_fasta_file
|
|
|
31 #else:
|
|
|
32 --gtf $reference.gtf
|
|
|
33 $reference.reference_fasta_file
|
|
|
34 #end if
|
|
|
35 $reference_name
|
|
|
36 > ${reference_name}.log
|
|
|
37 </command>
|
|
|
38 <inputs>
|
|
|
39 <conditional name="reference">
|
|
|
40 <param name="ref_type" type="select" label="Reference transcript source">
|
|
|
41 <option value="transcripts">transcript fasta</option>
|
|
|
42 <option value="genomic">reference genome and gtf</option>
|
|
|
43 </param>
|
|
|
44 <when value="transcripts">
|
|
|
45 <param name="reference_fasta_file" type="data" format="fasta" label="reference fasta file"
|
|
|
46 help="The files should contain the sequences of transcripts."/>
|
|
|
47 </when>
|
|
|
48 <when value="genomic">
|
|
|
49 <param name="reference_fasta_file" type="data" format="fasta" label="reference fasta file"
|
|
|
50 help="The file should contain the sequence of an entire genome."/>
|
|
|
51 <param name="gtf" type="data" format="gtf" label="gtf"
|
|
|
52 help="extract transcript reference sequences using the gene annotations specified in this GTF" />
|
|
|
53 </when>
|
|
|
54 </conditional>
|
|
|
55 <param name="transcript_to_gene_map" type="data" format="tabular" optional="true" label="Map of gene ids to transcript (isoform) ids" >
|
|
|
56 <help>
|
|
|
57 Each line of should be of the form: gene_id transcript_id ( with the two fields separated by a tab character )
|
|
|
58 The map can be obtained from the UCSC table browser
|
|
|
59 group: Genes and Gene Prediction Tracks
|
|
|
60 table: knownIsoforms
|
|
|
61 Without a map:
|
|
|
62 If a reference genome and gtf is used, then RSEM uses the "gene_id" and "transcript_id" attributes in the GTF file.
|
|
|
63 Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene.
|
|
|
64 </help>
|
|
|
65 </param>
|
|
|
66 <param name="reference_name" type="text" value="rsem_ref_name" label="reference name">
|
|
|
67 <help>A one word name for this RSEM reference containing only letters, digits, and underscore characters</help>
|
|
|
68 <validator type="regex" message="Use only letters, digits, and underscore characters">^\w+$</validator>
|
|
|
69 </param>
|
|
|
70 <conditional name="polya">
|
|
|
71 <param name="polya_use" type="select" label="PolyA ">
|
|
|
72 <option value="add" selected="true">Add poly(A) tails to all transcripts</option>
|
|
|
73 <option value="subset">Exclude poly(A) tails from selected transcripts</option>
|
|
|
74 <option value="none">Do not add poly(A) tails to any transcripts</option>
|
|
|
75 </param>
|
|
|
76 <when value="add">
|
|
|
77 <param name="polya_length" type="integer" value="125" optional="true" label="The length of the poly(A) tails to be added. (Default: 125)">
|
|
|
78 <validator type="in_range" message="must be positive " min="1"/>
|
|
|
79 </param>
|
|
|
80 </when>
|
|
|
81 <when value="subset">
|
|
|
82 <param name="no_polya_subset" type="data" format="tabular" optional="true" label="List of transcript IDs (one per line) that should should not have polyA tails added."/>
|
|
|
83 <param name="polya_length" type="integer" value="125" optional="true" label="The length of the poly(A) tails to be added. (Default: 125)">
|
|
|
84 <validator type="in_range" message="must be positive " min="1"/>
|
|
|
85 </param>
|
|
|
86 </when>
|
|
|
87 <when value="none"/>
|
|
|
88 </conditional>
|
|
|
89 <param name="ntog" type="boolean" truevalue="--no-ntog" falsevalue="" checked="false" label="Disable the conversion of 'N' characters to 'G' characters in the reference sequences" help="Bowite uses the automatic N to G conversion to to align against all positions in the reference."/>
|
|
|
90 </inputs>
|
|
|
91 <stdio>
|
|
|
92 <exit_code range="1:" level="fatal" description="Error Running RSEM" />
|
|
|
93 </stdio>
|
|
|
94 <outputs>
|
|
|
95 <data format="rsem_ref" name="reference_file" label="RSEM ${reference_name} reference"/>
|
|
|
96 </outputs>
|
|
|
97 <tests>
|
|
|
98 <test>
|
|
|
99 <param name="ref_type" value="genomic"/>
|
|
|
100 <param name="reference_fasta_file" value="ref.fasta" ftype="fasta"/>
|
|
|
101 <param name="gtf" value="ref.gtf" ftype="gtf"/>
|
|
|
102 <param name="reference_name" value="ref"/>
|
|
|
103 <output name="rsem_ref">
|
|
|
104 <assert_contents>
|
|
|
105 <has_text text="ref.grp" />
|
|
|
106 </assert_contents>
|
|
|
107 </output>
|
|
|
108 </test>
|
|
|
109 </tests>
|
|
|
110 <help>
|
|
|
111
|
|
|
112 RSEM HOME PAGE - http://deweylab.biostat.wisc.edu/rsem/
|
|
|
113
|
|
|
114 NAME
|
|
|
115 rsem-prepare-reference
|
|
|
116
|
|
|
117 SYNOPSIS
|
|
|
118 rsem-prepare-reference [options] reference_fasta_file(s) reference_name
|
|
|
119
|
|
|
120 DESCRIPTION
|
|
|
121 The rsem-prepare-reference program extracts/preprocesses the reference sequences and builds Bowtie indices using default parameters.
|
|
|
122 This program is used in conjunction with the 'rsem-calculate-expression' program.
|
|
|
123
|
|
|
124 INPUTS
|
|
|
125 A fasta file of transcripts
|
|
|
126 or
|
|
|
127 A genome sequence fasta file and a GTF gene annotation file. (When using UCSC data, include the related knownIsoforms.txt)
|
|
|
128
|
|
|
129 </help>
|
|
|
130 </tool>
|