2
|
1 #!/bin/bash
|
|
2
|
|
3 # Copyleft ↄ⃝ 2012 Institut Curie
|
|
4 # Author(s): Jocelyn Brayet, Laurene Syx, Chongjian Chen, Nicolas Servant(Institut Curie) 2012 - 2015
|
|
5 # Contact: bioinfo.ncproseq@curie.fr
|
|
6 # This software is distributed without any guarantee under the terms of the GNU General
|
|
7 # Public License, either Version 2, June 1991 or Version 3, June 2007.
|
|
8
|
|
9 while getopts "i:s:n:g:m:o:f:r:h:p:l:t:a:q:d:" optionName; do
|
|
10 case "$optionName" in
|
|
11
|
|
12 i) INPUT+="$OPTARG,";;
|
|
13 s) SAMPLENAME+="$OPTARG,";;
|
|
14 n) PROJECTNAME="$OPTARG";;
|
|
15 g) GENOME="$OPTARG";;
|
|
16 m) RMSK="$OPTARG";;
|
|
17 o) BAM_OUT+="$OPTARG,";;
|
|
18 f) RFAM="$OPTARG";;
|
|
19 r) REPORT="$OPTARG";;
|
|
20 h) HTML_REPORT="$OPTARG";;
|
|
21 p) PDF_REPORT="$OPTARG";;
|
|
22 l) LOG_FILE="$OPTARG";;
|
|
23 t) INPUT_TYPE="$OPTARG";;
|
|
24 a) ALIGNMENT="$OPTARG";;
|
|
25 q) FASTQ_FORMAT+="$OPTARG,";;
|
|
26 d) ROOT_DIR="$OPTARG";;
|
|
27
|
|
28 esac
|
|
29 done
|
|
30
|
|
31 ##### ncPRO-seq annotation - Galaxy #####
|
|
32
|
|
33 GENOME_2=`echo $GENOME | cut -d"_" -f2`
|
|
34
|
|
35 databasePath=$ROOT_DIR/database/files
|
|
36
|
|
37 mkdir -p $databasePath/ncproseqAnnotation
|
|
38 mkdir -p $databasePath/ncproseqAnnotation/annotation
|
|
39 annotationPath=$databasePath/ncproseqAnnotation/annotation
|
|
40 [ ! -d $annotationPath/$GENOME_2 ] && wget http://ncpro.curie.fr/ncproseq/install_dir/annotation/$GENOME.tar.gz -P $annotationPath -q && cd $annotationPath && tar -zxf $GENOME.tar.gz && rm -rf $GENOME.tar.gz
|
|
41
|
|
42 #########
|
|
43
|
|
44
|
|
45 sampleArray=(${INPUT//,/ })
|
|
46 nameArray=(${SAMPLENAME//,/ })
|
|
47 bamArray=(${BAM_OUT//,/ })
|
|
48
|
|
49 if [[ $REPORT == "pdf" ]];then
|
|
50 OUTPUT_PATH_DIR=`dirname $LOG_FILE`
|
|
51 OUTPUT_PATH_NAME=`basename $LOG_FILE .dat`
|
|
52 else
|
|
53 OUTPUT_PATH_DIR=`dirname $HTML_REPORT`
|
|
54 OUTPUT_PATH_NAME=`basename $HTML_REPORT .dat`
|
|
55 fi
|
|
56
|
|
57 OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files"
|
|
58
|
|
59
|
|
60
|
|
61 #ME
|
|
62 mkdir -p $OUTPUT_PATH
|
|
63
|
|
64 chmod 777 -R $OUTPUT_PATH
|
|
65
|
|
66 #VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3`
|
|
67 #VERSION=`echo $VERSION | cut -d"_" -f2`
|
|
68
|
|
69 #DEBUG_MODE
|
|
70
|
|
71 DEBUG_MODE="on"
|
|
72 DEBUG="/dev/null"
|
|
73
|
|
74 if [[ $DEBUG_MODE == "on" ]];then
|
|
75
|
|
76 DEBUG="$OUTPUT_PATH/ncPRO-QC.debug"
|
|
77
|
|
78 fi
|
|
79
|
|
80 #Deploy ncPRO directories structure
|
|
81
|
|
82 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG
|
|
83
|
|
84 echo "$INPUT" >> $DEBUG
|
|
85 echo "$SAMPLENAME" >> $DEBUG
|
|
86 echo "$PROJECTNAME" >> $DEBUG
|
|
87 echo "$GENOME_2" >> $DEBUG
|
|
88 echo "$RMSK" >> $DEBUG
|
|
89 echo "$BAM_OUT" >> $DEBUG
|
|
90 echo "$RFAM" >> $DEBUG
|
|
91 echo "$REPORT" >> $DEBUG
|
|
92 echo "$HTML_REPORT" >> $DEBUG
|
|
93 echo "$PDF_REPORT" >> $DEBUG
|
|
94 echo "$LOG_FILE" >> $DEBUG
|
|
95 echo "$INPUT_TYPE" >> $DEBUG
|
|
96 echo "$ALIGNMENT" >> $DEBUG
|
|
97
|
|
98 echo "$sampleArray" >> $DEBUG
|
|
99 echo "$nameArray" >> $DEBUG
|
|
100 echo "$bamArray" >> $DEBUG
|
|
101
|
|
102 #Go to working directory
|
|
103
|
|
104 cp -rf /usr/curie_ngs/ncproseq_v1.6.5/annotation/*.item $annotationPath
|
|
105 cp -rf /usr/curie_ngs/ncproseq_v1.6.5/annotation/*_items.txt $annotationPath
|
|
106
|
|
107 if [[ ! -L "$databasePath/ncproseqBowtieIndexes" && ! -d "$databasePath/ncproseqBowtieIndexes" ]]
|
|
108 then
|
|
109 mkdir -p $databasePath/ncproseqBowtieIndexes
|
|
110 fi
|
|
111
|
|
112 cd /usr/curie_ngs
|
|
113 rm -rf bowtie_indexes
|
|
114 ln -s $databasePath/ncproseqBowtieIndexes bowtie_indexes
|
|
115
|
|
116 cd $OUTPUT_PATH
|
|
117
|
|
118 rm annotation
|
|
119
|
|
120 ln -s $annotationPath annotation
|
|
121
|
|
122 rm manuals
|
|
123
|
|
124 #Create symbolic link to input
|
|
125 #********************************************************************************* NEW: for BAM files, check if reads in input are grouped or not and change cmd accordignly ***********
|
|
126 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
127
|
|
128 count=0
|
|
129 for i in ${sampleArray[*]}
|
|
130 do
|
|
131 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.fastq
|
|
132 count=$(( $count + 1 ))
|
|
133 done
|
|
134 fi
|
|
135
|
|
136 if [[ $INPUT_TYPE == "bam" ]];then
|
|
137
|
|
138 count=0
|
|
139 for i in ${sampleArray[*]}
|
|
140 do
|
|
141 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam
|
|
142 count=$(( $count + 1 ))
|
|
143
|
|
144 #check if grouped
|
|
145 RG=`samtools view $i | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'`
|
|
146 done
|
|
147 fi
|
|
148
|
|
149 #Edit config-ncrna.txt ##### A REFAIRE ####
|
|
150 CONFIG_FILE=config-ncrna.txt
|
|
151
|
|
152 sed -i "s:^BOWTIE_GENOME_REFERENCE =.*$:BOWTIE_GENOME_REFERENCE = $GENOME_2:g" $CONFIG_FILE
|
|
153 sed -i "s:^ORGANISM.*$:ORGANISM = $GENOME_2:g" $CONFIG_FILE
|
|
154
|
|
155 sed -i "/N_CPU/c\N_CPU = 6" $CONFIG_FILE #****** Make sure this value matches universe.ini files
|
|
156 sed -i "s/test_Curie/$PROJECTNAME/g" $CONFIG_FILE
|
|
157 sed -i "s:^FASTQ_FORMAT =.*$:FASTQ_FORMAT = $FASTQ_FORMAT:g" $CONFIG_FILE
|
|
158
|
|
159
|
|
160 if [[ -f "$annotationPath/$GENOME_2/cluster_pirna.gff" ]]
|
|
161 then
|
|
162 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/cluster_pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff"
|
|
163 else
|
|
164 if [[ -f "$annotationPath/$GENOME_2/pirna.gff" ]]
|
|
165 then
|
|
166 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff"
|
|
167 else
|
|
168 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff"
|
|
169 fi
|
|
170 fi
|
|
171
|
|
172 sed -i "s:^ANNO_CATALOG.*$:ANNO_CATALOG = $ANNO_CATALOG:g" $CONFIG_FILE
|
|
173
|
|
174 #Build command line
|
|
175
|
|
176 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
177
|
|
178 if [[ $ALIGNMENT == "True" ]]; then
|
|
179
|
|
180
|
|
181 COMMAND_LINE="-c $CONFIG_FILE -s processRead -s mapGenome -s mapGenomeStat -s mapAnnOverview"
|
|
182
|
|
183 if [[ $RFAM == "True" ]];then
|
|
184
|
|
185 COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk"
|
|
186
|
|
187 fi
|
|
188
|
|
189 else
|
|
190
|
|
191 COMMAND_LINE="-c $CONFIG_FILE -s processRead"
|
|
192
|
|
193 fi
|
|
194
|
|
195 fi
|
|
196
|
|
197
|
|
198
|
|
199 #### NEW if BAM already grouped, omit [ -s processBam ] + put input.bam in /bowtie_results
|
|
200 if [[ $INPUT_TYPE == "bam" ]];then
|
|
201
|
|
202 if [[ $RG = 0 ]]; then #if bam file is NOT grouped
|
|
203
|
|
204 COMMAND_LINE="-c $CONFIG_FILE -s processBam -s mapGenomeStat -s mapAnnOverview"
|
|
205
|
|
206 else
|
|
207
|
|
208 count=0
|
|
209 for i in ${sampleArray[*]}
|
|
210 do
|
|
211 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam
|
|
212 count=$(( $count + 1 ))
|
|
213 done
|
|
214 COMMAND_LINE="-c $CONFIG_FILE -s mapGenomeStat -s mapAnnOverview"
|
|
215 fi
|
|
216
|
|
217
|
|
218 if [[ $RFAM == "True" ]];then
|
|
219
|
|
220 COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk"
|
|
221
|
|
222 fi
|
|
223
|
|
224 fi
|
|
225
|
|
226 #************************* new
|
|
227
|
|
228 ##### Function to create HTML report in Galaxy ######
|
|
229
|
|
230 function createHtmlReport
|
|
231 {
|
|
232
|
|
233 # galaxy part :
|
|
234
|
|
235 #Reformat html output
|
|
236
|
|
237 tr '>' '\n' < ${OUTPUT_PATH}/report.html | sed -ne "s@.*<img src='\([^']*\)'.*@\1@p" -e 's@.*<img src="\([^"]*\)".*@\1@p' > ${OUTPUT_PATH}/img_list.txt
|
|
238
|
|
239 NC_LOGO=`head -1 ${OUTPUT_PATH}/img_list.txt`
|
|
240
|
|
241 #HTTP_PATH=`echo $OUTPUT_PATH | sed "s/\/data\/kdi_${VERSION}/http:\/\/data-kdi-${VERSION}.curie.fr\/file/g"`
|
|
242
|
|
243 i=1
|
|
244
|
|
245 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
246
|
|
247 echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>QUALITY CONTROL</u></b></p>" > ${HTML_REPORT}
|
|
248 fi
|
|
249
|
|
250 if [[ $INPUT_TYPE == "bam" ]];then
|
|
251
|
|
252 echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>DATA MAPPING</u></b></p>" > $HTML_REPORT
|
|
253 fi
|
|
254
|
|
255 while read line
|
|
256 do
|
|
257
|
|
258 if [[ $line != "" ]];then
|
|
259
|
|
260 if [[ $i == "3" ]];then
|
|
261
|
|
262 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
263
|
|
264 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
265 echo "<p align=center><b>Base Composition Information</b></p><table align=center><tr><td align=center><a href=$ahref><img src=$line></a></td>" >> $HTML_REPORT
|
|
266 fi
|
|
267
|
|
268 if [[ $INPUT_TYPE == "bam" ]];then
|
|
269 echo "<p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
270 fi
|
|
271
|
|
272 fi
|
|
273
|
|
274 if [[ $i == "4" ]];then
|
|
275
|
|
276 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
277
|
|
278 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
279 echo "<td align=center><a href=$ahref><img src=$line></td></a></td><table>" >> $HTML_REPORT
|
|
280 fi
|
|
281
|
|
282 if [[ $INPUT_TYPE == "bam" ]];then
|
|
283 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
284 fi
|
|
285
|
|
286 fi
|
|
287
|
|
288 if [[ $i == "5" ]];then
|
|
289
|
|
290 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
291
|
|
292 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
293 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
294 fi
|
|
295
|
|
296 if [[ $INPUT_TYPE == "bam" ]];then
|
|
297 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
298 fi
|
|
299
|
|
300 fi
|
|
301
|
|
302 if [[ $i == "6" ]];then
|
|
303
|
|
304 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
305
|
|
306 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
307 echo "<hr width=500><p align=center><b>Quality Score</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
308 fi
|
|
309
|
|
310 if [[ $INPUT_TYPE == "bam" ]];then
|
|
311 echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
312 fi
|
|
313
|
|
314 fi
|
|
315
|
|
316 if [[ $i == "7" ]];then
|
|
317
|
|
318 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
319 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
320 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
321 fi
|
|
322 if [[ $INPUT_TYPE == "bam" ]];then
|
|
323
|
|
324 if [[ $RFAM == "True" ]];then
|
|
325 echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
326 else
|
|
327 break
|
|
328 fi
|
|
329 fi
|
|
330 fi
|
|
331
|
|
332 if [[ $i == "8" ]];then
|
|
333
|
|
334 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
335 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
336
|
|
337 if [[ $ALIGNMENT == "True" ]]; then
|
|
338
|
|
339 echo "<hr size=20><p align=center style=font-size:25px;><b><u>DATA MAPPING</u></b></p><p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
340 else
|
|
341 break
|
|
342 fi
|
|
343 fi
|
|
344 if [[ $INPUT_TYPE == "bam" ]];then
|
|
345
|
|
346 echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
347 fi
|
|
348 fi
|
|
349
|
|
350 if [[ $i == "9" ]];then
|
|
351
|
|
352 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
353 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
354 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
355 fi
|
|
356
|
|
357 if [[ $INPUT_TYPE == "bam" ]];then
|
|
358
|
|
359 echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
360 break
|
|
361 fi
|
|
362
|
|
363 fi
|
|
364
|
|
365 if [[ $i == "10" ]];then
|
|
366
|
|
367 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
368
|
|
369 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
370
|
|
371 fi
|
|
372
|
|
373 if [[ $i == "11" ]];then
|
|
374
|
|
375 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
376
|
|
377 echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
378
|
|
379 fi
|
|
380
|
|
381 if [[ $RFAM == "True" ]];then
|
|
382
|
|
383 if [[ $i == "12" ]];then
|
|
384
|
|
385 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
386
|
|
387 echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
388
|
|
389 fi
|
|
390
|
|
391 if [[ $i == "13" ]];then
|
|
392
|
|
393 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
394
|
|
395 echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
396
|
|
397 fi
|
|
398
|
|
399 if [[ $i == "14" ]];then
|
|
400
|
|
401 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
402
|
|
403 echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
404
|
|
405 fi
|
|
406
|
|
407 fi
|
|
408
|
|
409 i=$(( $i + 1 ))
|
|
410
|
|
411 fi
|
|
412
|
|
413
|
|
414 done < ${OUTPUT_PATH}/img_list.txt
|
|
415
|
|
416 rm ${OUTPUT_PATH}/img_list.txt
|
|
417
|
|
418
|
|
419
|
|
420 }
|
|
421
|
|
422
|
|
423 #Launch ncPRO analysis
|
|
424
|
|
425 #FAIRE une boucle pour -s hrml_builder ou -s pdf_builder
|
|
426
|
|
427 if [[ $REPORT == "all" ]];then
|
|
428
|
|
429
|
|
430 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE -s html_builder -s pdf_builder>> $DEBUG
|
|
431
|
|
432 createHtmlReport
|
|
433
|
|
434 cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT
|
|
435
|
|
436 fi
|
|
437
|
|
438 if [[ $REPORT == "pdf" ]];then
|
|
439
|
|
440
|
|
441 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE -s pdf_builder>> $DEBUG
|
|
442
|
|
443 cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT
|
|
444
|
|
445
|
|
446 fi
|
|
447
|
|
448 if [[ $REPORT == "html" ]];then
|
|
449
|
|
450
|
|
451 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE -s html_builder>> $DEBUG
|
|
452
|
|
453 createHtmlReport
|
|
454
|
|
455 fi
|
|
456
|
|
457 #Galaxy output handling
|
|
458
|
|
459 cp ${OUTPUT_PATH}/pipeline.log $LOG_FILE
|
|
460
|
|
461 if [[ $ALIGNMENT == "True" ]];then
|
|
462
|
|
463 count=0
|
|
464 for i in ${bamArray[*]}
|
|
465 do
|
|
466 cp ${OUTPUT_PATH}/bowtie_results/${nameArray[count]/_/.}_$GENOME_2.bam $i
|
|
467 count=$(( $count + 1 ))
|
|
468 done
|
|
469 fi
|