2
|
1 #!/bin/bash
|
|
2
|
|
3 # I added the 'me' lines
|
|
4 #assumption : a user can't begin with a fastq file already grouped !
|
|
5
|
|
6 while getopts "i:s:n:g:m:o:f:r:h:p:l:t:a:" optionName; do
|
|
7 case "$optionName" in
|
|
8
|
|
9 i) INPUT+="$OPTARG,";;
|
|
10 s) SAMPLENAME+="$OPTARG,";;
|
|
11 n) PROJECTNAME="$OPTARG";;
|
|
12 g) GENOME="$OPTARG";;
|
|
13 m) RMSK="$OPTARG";;
|
|
14 o) BAM_OUT+="$OPTARG,";;
|
|
15 f) RFAM="$OPTARG";;
|
|
16 r) REPORT="$OPTARG";;
|
|
17 h) HTML_REPORT="$OPTARG";;
|
|
18 p) PDF_REPORT="$OPTARG";;
|
|
19 l) LOG_FILE="$OPTARG";;
|
|
20 t) INPUT_TYPE="$OPTARG";;
|
|
21 a) ALIGNMENT="$OPTARG";;
|
|
22
|
|
23 esac
|
|
24 done
|
|
25
|
|
26 sampleArray=(${INPUT//,/ })
|
|
27 nameArray=(${SAMPLENAME//,/ })
|
|
28 bamArray=(${BAM_OUT//,/ })
|
|
29
|
|
30 if [[ $REPORT == "pdf" ]];then
|
|
31 OUTPUT_PATH_DIR=`dirname $LOG_FILE`
|
|
32 OUTPUT_PATH_NAME=`basename $LOG_FILE .dat`
|
|
33 else
|
|
34 OUTPUT_PATH_DIR=`dirname $HTML_REPORT`
|
|
35 OUTPUT_PATH_NAME=`basename $HTML_REPORT .dat`
|
|
36 fi
|
|
37
|
|
38 OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files"
|
|
39
|
|
40
|
|
41
|
|
42 #ME
|
|
43 mkdir -p $OUTPUT_PATH
|
|
44
|
|
45 chmod 777 -R $OUTPUT_PATH
|
|
46
|
|
47 #VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3`
|
|
48 #VERSION=`echo $VERSION | cut -d"_" -f2`
|
|
49
|
|
50 #DEBUG_MODE
|
|
51
|
|
52 DEBUG_MODE="on"
|
|
53 DEBUG="/dev/null"
|
|
54
|
|
55 if [[ $DEBUG_MODE == "on" ]];then
|
|
56
|
|
57 DEBUG="$OUTPUT_PATH/ncPRO-QC.debug"
|
|
58
|
|
59 fi
|
|
60
|
|
61 #Deploy ncPRO directories structure
|
|
62
|
|
63 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG
|
|
64
|
|
65 echo "$INPUT" >> $DEBUG
|
|
66 echo "$SAMPLENAME" >> $DEBUG
|
|
67 echo "$PROJECTNAME" >> $DEBUG
|
|
68 echo "$GENOME" >> $DEBUG
|
|
69 echo "$RMSK" >> $DEBUG
|
|
70 echo "$BAM_OUT" >> $DEBUG
|
|
71 echo "$RFAM" >> $DEBUG
|
|
72 echo "$REPORT" >> $DEBUG
|
|
73 echo "$HTML_REPORT" >> $DEBUG
|
|
74 echo "$PDF_REPORT" >> $DEBUG
|
|
75 echo "$LOG_FILE" >> $DEBUG
|
|
76 echo "$INPUT_TYPE" >> $DEBUG
|
|
77 echo "$ALIGNMENT" >> $DEBUG
|
|
78
|
|
79 echo "$sampleArray" >> $DEBUG
|
|
80 echo "$nameArray" >> $DEBUG
|
|
81 echo "$bamArray" >> $DEBUG
|
|
82
|
|
83 #Go to working directory
|
|
84
|
|
85 cd $OUTPUT_PATH
|
|
86
|
|
87 rm annotation
|
|
88
|
|
89 ln -s /bioinfo/local/curie/ngs-data-analysis/annotation .
|
|
90
|
|
91 rm manuals
|
|
92
|
|
93 #Create symbolic link to input
|
|
94 #********************************************************************************* NEW: for BAM files, check if reads in input are grouped or not and change cmd accordignly ***********
|
|
95 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
96
|
|
97 count=0
|
|
98 for i in ${sampleArray[*]}
|
|
99 do
|
|
100 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.fastq
|
|
101 count=$(( $count + 1 ))
|
|
102 done
|
|
103 fi
|
|
104
|
|
105 if [[ $INPUT_TYPE == "bam" ]];then
|
|
106
|
|
107 count=0
|
|
108 for i in ${sampleArray[*]}
|
|
109 do
|
|
110 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam
|
|
111 count=$(( $count + 1 ))
|
|
112
|
|
113 #check if grouped
|
|
114 RG=`samtools view $i | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'`
|
|
115 done
|
|
116 fi
|
|
117
|
|
118 #Edit config-ncrna.txt
|
|
119 CONFIG_FILE=config-ncrna.txt
|
|
120 sed -i "s/mm9/$GENOME/g" $CONFIG_FILE
|
|
121 sed -i "s/hg19/$GENOME/g" $CONFIG_FILE
|
|
122
|
|
123 sed -i "/N_CPU/c\N_CPU = 6" $CONFIG_FILE #****** Make sure this value matches universe.ini files
|
|
124
|
|
125 sed -i "s/test_Curie/$PROJECTNAME/g" $CONFIG_FILE
|
|
126
|
|
127
|
|
128 #Build command line
|
|
129
|
|
130 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
131
|
|
132 if [[ $ALIGNMENT == "True" ]]; then
|
|
133
|
|
134
|
|
135 COMMAND_LINE="-c $CONFIG_FILE -s processRead -s mapGenome -s mapGenomeStat -s mapAnnOverview"
|
|
136
|
|
137 if [[ $RFAM == "True" ]];then
|
|
138
|
|
139 COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk"
|
|
140
|
|
141 fi
|
|
142
|
|
143 else
|
|
144
|
|
145 COMMAND_LINE="-c $CONFIG_FILE -s processRead"
|
|
146
|
|
147 fi
|
|
148
|
|
149 fi
|
|
150
|
|
151
|
|
152
|
|
153 #### NEW if BAM already grouped, omit [ -s processBam ] + put input.bam in /bowtie_results
|
|
154 if [[ $INPUT_TYPE == "bam" ]];then
|
|
155
|
|
156 if [[ $RG = 0 ]]; then #if bam file is NOT grouped
|
|
157
|
|
158 COMMAND_LINE="-c $CONFIG_FILE -s processBam -s mapGenomeStat -s mapAnnOverview"
|
|
159
|
|
160 else
|
|
161
|
|
162 count=0
|
|
163 for i in ${sampleArray[*]}
|
|
164 do
|
|
165 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam
|
|
166 count=$(( $count + 1 ))
|
|
167 done
|
|
168 COMMAND_LINE="-c $CONFIG_FILE -s mapGenomeStat -s mapAnnOverview"
|
|
169 fi
|
|
170
|
|
171
|
|
172 if [[ $RFAM == "True" ]];then
|
|
173
|
|
174 COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk"
|
|
175
|
|
176 fi
|
|
177
|
|
178 fi
|
|
179
|
|
180 #************************* new
|
|
181
|
|
182 ##### Function to create HTML report in Galaxy ######
|
|
183
|
|
184 function createHtmlReport
|
|
185 {
|
|
186
|
|
187 # galaxy part :
|
|
188
|
|
189 #Reformat html output
|
|
190
|
|
191 tr '>' '\n' < ${OUTPUT_PATH}/report.html | sed -ne "s@.*<img src='\([^']*\)'.*@\1@p" -e 's@.*<img src="\([^"]*\)".*@\1@p' > ${OUTPUT_PATH}/img_list.txt
|
|
192
|
|
193 NC_LOGO=`head -1 ${OUTPUT_PATH}/img_list.txt`
|
|
194
|
|
195 #HTTP_PATH=`echo $OUTPUT_PATH | sed "s/\/data\/kdi_${VERSION}/http:\/\/data-kdi-${VERSION}.curie.fr\/file/g"`
|
|
196
|
|
197 i=1
|
|
198
|
|
199 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
200
|
|
201 echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>QUALITY CONTROL</u></b></p>" > ${HTML_REPORT}
|
|
202 fi
|
|
203
|
|
204 if [[ $INPUT_TYPE == "bam" ]];then
|
|
205
|
|
206 echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>DATA MAPPING</u></b></p>" > $HTML_REPORT
|
|
207 fi
|
|
208
|
|
209 while read line
|
|
210 do
|
|
211
|
|
212 if [[ $line != "" ]];then
|
|
213
|
|
214 if [[ $i == "3" ]];then
|
|
215
|
|
216 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
217
|
|
218 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
219 echo "<p align=center><b>Base Composition Information</b></p><table align=center><tr><td align=center><a href=$ahref><img src=$line></a></td>" >> $HTML_REPORT
|
|
220 fi
|
|
221
|
|
222 if [[ $INPUT_TYPE == "bam" ]];then
|
|
223 echo "<p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
224 fi
|
|
225
|
|
226 fi
|
|
227
|
|
228 if [[ $i == "4" ]];then
|
|
229
|
|
230 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
231
|
|
232 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
233 echo "<td align=center><a href=$ahref><img src=$line></td></a></td><table>" >> $HTML_REPORT
|
|
234 fi
|
|
235
|
|
236 if [[ $INPUT_TYPE == "bam" ]];then
|
|
237 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
238 fi
|
|
239
|
|
240 fi
|
|
241
|
|
242 if [[ $i == "5" ]];then
|
|
243
|
|
244 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
245
|
|
246 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
247 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
248 fi
|
|
249
|
|
250 if [[ $INPUT_TYPE == "bam" ]];then
|
|
251 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
252 fi
|
|
253
|
|
254 fi
|
|
255
|
|
256 if [[ $i == "6" ]];then
|
|
257
|
|
258 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
259
|
|
260 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
261 echo "<hr width=500><p align=center><b>Quality Score</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
262 fi
|
|
263
|
|
264 if [[ $INPUT_TYPE == "bam" ]];then
|
|
265 echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
266 fi
|
|
267
|
|
268 fi
|
|
269
|
|
270 if [[ $i == "7" ]];then
|
|
271
|
|
272 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
273 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
274 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
275 fi
|
|
276 if [[ $INPUT_TYPE == "bam" ]];then
|
|
277
|
|
278 if [[ $RFAM == "True" ]];then
|
|
279 echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
280 else
|
|
281 break
|
|
282 fi
|
|
283 fi
|
|
284 fi
|
|
285
|
|
286 if [[ $i == "8" ]];then
|
|
287
|
|
288 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
289 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
290
|
|
291 if [[ $ALIGNMENT == "True" ]]; then
|
|
292
|
|
293 echo "<hr size=20><p align=center style=font-size:25px;><b><u>DATA MAPPING</u></b></p><p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
294 else
|
|
295 break
|
|
296 fi
|
|
297 fi
|
|
298 if [[ $INPUT_TYPE == "bam" ]];then
|
|
299
|
|
300 echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
301 fi
|
|
302 fi
|
|
303
|
|
304 if [[ $i == "9" ]];then
|
|
305
|
|
306 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
307 if [[ $INPUT_TYPE == "fastq" ]];then
|
|
308 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
309 fi
|
|
310
|
|
311 if [[ $INPUT_TYPE == "bam" ]];then
|
|
312
|
|
313 echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
314 break
|
|
315 fi
|
|
316
|
|
317 fi
|
|
318
|
|
319 if [[ $i == "10" ]];then
|
|
320
|
|
321 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
322
|
|
323 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
324
|
|
325 fi
|
|
326
|
|
327 if [[ $i == "11" ]];then
|
|
328
|
|
329 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
330
|
|
331 echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
332
|
|
333 fi
|
|
334
|
|
335 if [[ $RFAM == "True" ]];then
|
|
336
|
|
337 if [[ $i == "12" ]];then
|
|
338
|
|
339 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
340
|
|
341 echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
342
|
|
343 fi
|
|
344
|
|
345 if [[ $i == "13" ]];then
|
|
346
|
|
347 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
348
|
|
349 echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
350
|
|
351 fi
|
|
352
|
|
353 if [[ $i == "14" ]];then
|
|
354
|
|
355 ahref=`sed "s/html\/thumb/pic/" <<< $line`
|
|
356
|
|
357 echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
|
|
358
|
|
359 fi
|
|
360
|
|
361 fi
|
|
362
|
|
363 i=$(( $i + 1 ))
|
|
364
|
|
365 fi
|
|
366
|
|
367
|
|
368 done < ${OUTPUT_PATH}/img_list.txt
|
|
369
|
|
370 rm ${OUTPUT_PATH}/img_list.txt
|
|
371
|
|
372
|
|
373
|
|
374 }
|
|
375
|
|
376
|
|
377 #Launch ncPRO analysis
|
|
378
|
|
379 #FAIRE une boucle pour -s hrml_builder ou -s pdf_builder
|
|
380
|
|
381 if [[ $REPORT == "all" ]];then
|
|
382
|
|
383
|
|
384 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-seq $COMMAND_LINE -s html_builder -s pdf_builder>> $DEBUG
|
|
385
|
|
386 createHtmlReport
|
|
387
|
|
388 cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT
|
|
389
|
|
390 fi
|
|
391
|
|
392 if [[ $REPORT == "pdf" ]];then
|
|
393
|
|
394
|
|
395 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-seq $COMMAND_LINE -s pdf_builder>> $DEBUG
|
|
396
|
|
397 cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT
|
|
398
|
|
399
|
|
400 fi
|
|
401
|
|
402 if [[ $REPORT == "html" ]];then
|
|
403
|
|
404
|
|
405 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-seq $COMMAND_LINE -s html_builder>> $DEBUG
|
|
406
|
|
407 createHtmlReport
|
|
408
|
|
409 fi
|
|
410
|
|
411 #Galaxy output handling
|
|
412
|
|
413 cp ${OUTPUT_PATH}/pipeline.log $LOG_FILE
|
|
414
|
|
415 if [[ $ALIGNMENT == "True" ]];then
|
|
416
|
|
417 count=0
|
|
418 for i in ${bamArray[*]}
|
|
419 do
|
|
420 cp ${OUTPUT_PATH}/bowtie_results/${nameArray[count]/_/.}_$GENOME.bam $i
|
|
421 count=$(( $count + 1 ))
|
|
422 done
|
|
423 fi
|