annotate ncPRO-QC.xml @ 2:1027a330d606 draft default tip

Uploaded
author jbrayet
date Thu, 29 Oct 2015 10:25:06 -0400
parents 0c34e0bef7d3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
1 <tool id="ncPRO-QC" name="Alignment and QC">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
2 <description>of sRNA-seq data</description>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
3 <requirements>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
4 <requirement type="package" version="0.1">docker_nc_pro_seq</requirement>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
5 </requirements>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
6 <command interpreter="bash">ncPRO-QC.sh
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
7 #for $i in $input_conditional.sampleNumber.samples
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
8 -i ${i.input}
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
9 #end for
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
10 #for $i in $input_conditional.sampleNumber.samples
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
11 -s ${i.sampleName}
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
12 #end for
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
13 -t $input_conditional.input_type
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
14 -n $projectName
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
15 -g $genome
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
16 -f $Rfam
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
17 -l $outlog
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
18 -r $report
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
19 -h $outhtml
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
20 -p $outpdf
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
21 #if $input_conditional.input_type == "fastq"
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
22 -a $input_conditional.mapping
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
23 #if $input_conditional.sampleNumber.numberOfSample == "1"
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
24 -o $outbam_0
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
25 #end if
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
26 #if $input_conditional.sampleNumber.numberOfSample == "2"
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
27 -o $outbam_1 -o $outbam_2
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
28 #end if
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
29 #if $input_conditional.sampleNumber.numberOfSample == "3"
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
30 -o $outbam_3 -o $outbam_4 -o $outbam_5
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
31 #end if
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
32 #if $input_conditional.sampleNumber.numberOfSample == "4"
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
33 -o $outbam_6 -o $outbam_7 -o $outbam_8 -o $outbam_9
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
34 #end if
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
35 #end if
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
36 </command>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
37 <inputs>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
38 <param name="projectName" type="text" value="Project_1" size="20" label="Give a project name" >
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
39 <sanitizer invalid_char="">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
40 <valid initial="string.letters,string.digits"><add value="_"/></valid>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
41 </sanitizer>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
42 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
43 <conditional name="input_conditional">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
44 <param name="input_type" type="select" label="Select your input file format" help="Raw datafile (fastq) or aligned file (BAM) are allowed. Different treatment will be performed according to the data type.">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
45 <option value="fastq" selected="true">fastq</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
46 <option value="bam">bam</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
47 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
48 <when value="fastq">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
49 <conditional name="sampleNumber">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
50 <param name="numberOfSample" type="select" label="Number of sample(s)">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
51 <option value="1" selected="true">1</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
52 <option value="2">2</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
53 <option value="3">3</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
54 <option value="4">4</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
55 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
56 <when value="1">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
57 <repeat name="samples" title="Sample Name" min="1" max="1" default="1">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
58 <param name="sampleName" type="text" value="input" size="30" label="Name">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
59 <sanitizer invalid_char="">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
60 <valid initial="string.letters,string.digits"><add value="_"/></valid>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
61 </sanitizer>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
62 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
63 <param name="input" type="data" format="fastq" label="Raw Input file"/>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
64 </repeat>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
65 </when>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
66 <when value="2">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
67 <repeat name="samples" title="Sample Name" min="2" max="2" default="2">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
68 <param name="sampleName" type="text" value="input" size="30" label="Name">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
69 <sanitizer invalid_char="">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
70 <valid initial="string.letters,string.digits"><add value="_"/></valid>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
71 </sanitizer>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
72 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
73 <param name="input" type="data" format="fastq" label="Raw Input file"/>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
74 </repeat>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
75 </when>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
76 <when value="3">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
77 <repeat name="samples" title="Sample Name" min="3" max="3" default="3">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
78 <param name="sampleName" type="text" value="input" size="30" label="Name">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
79 <sanitizer invalid_char="">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
80 <valid initial="string.letters,string.digits"><add value="_"/></valid>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
81 </sanitizer>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
82 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
83 <param name="input" type="data" format="fastq" label="Raw Input file"/>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
84 </repeat>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
85 </when>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
86 <when value="4">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
87 <repeat name="samples" title="Sample Name" min="4" max="4" default="4">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
88 <param name="sampleName" type="text" value="input" size="30" label="Name">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
89 <sanitizer invalid_char="">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
90 <valid initial="string.letters,string.digits"><add value="_"/></valid>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
91 </sanitizer>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
92 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
93 <param name="input" type="data" format="fastq" label="Raw Input file"/>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
94 </repeat>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
95 </when>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
96 </conditional>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
97 <param name="mapping" type="boolean" value="False" truevalue="True" falsevalue="False" label="Run Alignment" help="ncPRO-seq proposes to align the reads on a reference genome using the Bowtie aligner"/>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
98 </when>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
99 <when value="bam">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
100 <repeat name="samples" title="Sample Name" min="1" max="4" default="1">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
101 <param name="sampleName" type="text" value="input" size="30" label="Name">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
102 <sanitizer invalid_char="">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
103 <valid initial="string.letters,string.digits"><add value="_"/></valid>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
104 </sanitizer>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
105 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
106 <param name="input" type="data" format="bam" label="Input file"/>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
107 </repeat>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
108 </when>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
109 </conditional>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
110 <param name="genome" type="select" label="Select a reference genome">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
111 <option value="mm9">mm9</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
112 <option value="hg19">hg19</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
113 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
114 <param name="Rfam" type="boolean" value="False" truevalue="True" falsevalue="False" label="Generate the annotation overview using the RFAM and RepeatMasker database (only for aligned data)" />
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
115 <!--<param name="Rmsk" type="boolean" value="False" truevalue="True" falsevalue="False" label="Rmsk overview" />-->
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
116 <param name="report" type="select" label="Select your report format" >
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
117 <option value="all" selected="True">html and pdf</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
118 <option value="html">html</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
119 <option value="pdf">pdf</option>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
120 </param>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
121 </inputs>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
122 <outputs>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
123 <data format="bam" name="outbam_0" label="ncPRO mapped file">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
124 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '1'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
125 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
126 <data format="bam" name="outbam_1" label="ncPRO mapped file 1">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
127 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '2'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
128 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
129 <data format="bam" name="outbam_2" label="ncPRO mapped file 2">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
130 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '2'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
131 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
132 <data format="bam" name="outbam_3" label="ncPRO mapped file 1">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
133 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '3'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
134 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
135 <data format="bam" name="outbam_4" label="ncPRO mapped file 2">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
136 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '3'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
137 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
138 <data format="bam" name="outbam_5" label="ncPRO mapped file 3">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
139 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '3'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
140 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
141 <data format="bam" name="outbam_6" label="ncPRO mapped file 1">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
142 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '4'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
143 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
144 <data format="bam" name="outbam_7" label="ncPRO mapped file 2">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
145 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '4'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
146 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
147 <data format="bam" name="outbam_8" label="ncPRO mapped file 3">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
148 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '4'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
149 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
150 <data format="bam" name="outbam_9" label="ncPRO mapped file 4">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
151 <filter>((input_conditional['input_type'] == 'fastq') and (input_conditional['mapping'] == True) and (input_conditional['sampleNumber']['numberOfSample'] == '4'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
152 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
153 <data format="html" name="outhtml" label="ncPRO html report">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
154 <filter>((report == 'all') or (report == 'html'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
155 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
156 <data format="pdf" name="outpdf" label="ncPRO pdf report">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
157 <filter>((report == 'all') or (report == 'pdf'))</filter>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
158 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
159 <data format="txt" name="outlog" label="ncPRO log">
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
160 </data>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
161 </outputs>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
162 <help>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
163
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
164 **What ncPRO-seq does ?**
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
165
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
166 ------
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
167
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
168 ncPRO-seq is a tool for annotation and profiling of ncRNAs from smallRNA sequencing data. It aims to interrogate and perform detailed analysis on small RNAs derived from annotated non-coding regions in miRBase, Rfam and repeatMasker, and regions defined by users. A command line version and an online version are available at http://ncpro.curie.fr.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
169 If you use the ncPRO-seq tool for your analysis, please cite the following paper :
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
170 Chen C., Servant N., Toedling J., Sarazin A., Marchais A., Duvernois-Berthet E., Cognat V., Colot V., Voinnet O., Heard E., Ciaudo C. and Barillot E. (2012) ncPRO-seq: a tool for annotation and profiling analysis of ncRNAs from small RNA-seq.Bioinformatics.28(23):3147-9.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
171
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
172 ------
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
173
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
174 **Input Formats**
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
175
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
176 Raw datafile (fastq) or aligned file (BAM) are allowed. In all the case, ncPRO-seq will performed a quality control of your data.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
177
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
178 ------
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
179
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
180 **Quality Control of raw data**
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
181
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
182 -Base Composition Information
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
183
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
184 Display the proportion of each base position for which each of the four normal DNA bases has been called (or GC content). If you see strong biases which change in different bases then this usually indicates an overrepresented sequence which is contaminating your library. A bias which is consistent across all bases either indicates that the original library was sequence biased, or that there was a systematic problem during the sequencing of the library.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
185
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
186 -Quality Score
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
187
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
188 This view presents the quality values across all bases at each position in the FastQ file.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
189 The y-axis on the graph shows the mean quality scores. The higher the score the better the base call. The quality of calls on most platforms will degrade as the run progresses, so it is common to see base calls falling into the orange area towards the end of a read.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
190 We usually consider as good quality, the data with a mean quality higher than 20.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
191
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
192 -Reads Length Distribution
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
193
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
194 The insert size distribution is the most important quality control in sRNA-seq data. ncPRO-seq provides two types of information, i.e. the abundant versus the distinct reads length distribution. The abundant distribution considers all reads as they are described in the fastq file. The distinct distribution merges all duplicated sequence as one. This view usually decreases the importance of miRNAs to highlight other population-based ncRNAs.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
195
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
196 ------
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
197
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
198 **Reads Alignment**
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
199
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
200 In case of raw data, ncPRO-seq proposes to align them on a reference genome using the Bowtie aligner. A default alignment is performed to return the best read alignment with a few mismatches allowed (--best --strata -e 50 -nomaqround). Up to 20 locations for a given read are allowed (-a -m 20) in order to deal with ncRNAs repeated on the genome.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
201
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
202 ------
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
203
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
204 **Quality Control of aligned data**
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
205
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
206 -Mapping statistics
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
207
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
208 The proportions of reads with unique, multiple mapping sites in the genome, and unmapped reads is plotted. For sRNA-seq data, we usually expect to have a large proportion of unique hits.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
209
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
210 -Annotation overview
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
211
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
212 The reads annotation family is the most general overview, and counts the reads based on the following annotations: coding genes, ncRNAs from Rfam, smallRNAs from repeated regions, rRNAs, piRNAs from piRBase and precursor miRNAs from miRBase.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
213
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
214 -miRNA reads proportion (miRBase)
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
215
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
216 A dedicated plot is available for pre-miRNAs. In this step, abundant reads mapped in mature miRNA regions are counted, and plotted as the proportion of all mapped reads in the genome. The annotation file of mature miRNA is generated using files from miRBase. Each miRNA count is calculated using the intersection of the reads alignment with the precursor position.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
217 In a classical sRNA-seq experiment, we usually expect to have a high level of miRNAs (around 70%). This information can be used as a quality control for mammals. If a small proportion of miRNAs is observed, it means that another population of ncRNA predominates. This can be real biological information, or a contamination (tRNA, rRNA, etc.)
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
218
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
219 ------
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
220
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
221 **RFAM and RepeatMasker annotation overview**
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
222
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
223 After alignment, ncPRO-seq can give a first overview of your data annotation, by overlapping the aligned read with the known annotations from the RFAM or RepeatMasker database.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
224
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
225 -ncRNA annotation (RFAM)
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
226
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
227 To compare the read expression in different repeat/Rfam families, we count the number of abundant reads in each family and plot the relative proportion.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
228 We catalogue non-coding RNA genes in Rfam annotation into five big classes: tRNA, rRNA, snRNA, snoRNA and others. Note that miRNA annotations are excluded in the Rfam noncoding RNA analyses to be replaced by the miRBase annotation.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
229
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
230 -Repeats annotation (RepeatMasker)
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
231
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
232 ncPRO-seq uses repeat annotations from RepeatMasker database. We classify different repeats based on the name of repeat family.
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
233
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
234 </help>
0c34e0bef7d3 Uploaded
jbrayet
parents:
diff changeset
235 </tool>