comparison ncPRO-QC.sh @ 2:1027a330d606 draft default tip

Uploaded
author jbrayet
date Thu, 29 Oct 2015 10:25:06 -0400
parents
children
comparison
equal deleted inserted replaced
1:0c34e0bef7d3 2:1027a330d606
1 #!/bin/bash
2
3 # I added the 'me' lines
4 #assumption : a user can't begin with a fastq file already grouped !
5
6 while getopts "i:s:n:g:m:o:f:r:h:p:l:t:a:" optionName; do
7 case "$optionName" in
8
9 i) INPUT+="$OPTARG,";;
10 s) SAMPLENAME+="$OPTARG,";;
11 n) PROJECTNAME="$OPTARG";;
12 g) GENOME="$OPTARG";;
13 m) RMSK="$OPTARG";;
14 o) BAM_OUT+="$OPTARG,";;
15 f) RFAM="$OPTARG";;
16 r) REPORT="$OPTARG";;
17 h) HTML_REPORT="$OPTARG";;
18 p) PDF_REPORT="$OPTARG";;
19 l) LOG_FILE="$OPTARG";;
20 t) INPUT_TYPE="$OPTARG";;
21 a) ALIGNMENT="$OPTARG";;
22
23 esac
24 done
25
26 sampleArray=(${INPUT//,/ })
27 nameArray=(${SAMPLENAME//,/ })
28 bamArray=(${BAM_OUT//,/ })
29
30 if [[ $REPORT == "pdf" ]];then
31 OUTPUT_PATH_DIR=`dirname $LOG_FILE`
32 OUTPUT_PATH_NAME=`basename $LOG_FILE .dat`
33 else
34 OUTPUT_PATH_DIR=`dirname $HTML_REPORT`
35 OUTPUT_PATH_NAME=`basename $HTML_REPORT .dat`
36 fi
37
38 OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files"
39
40
41
42 #ME
43 mkdir -p $OUTPUT_PATH
44
45 chmod 777 -R $OUTPUT_PATH
46
47 #VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3`
48 #VERSION=`echo $VERSION | cut -d"_" -f2`
49
50 #DEBUG_MODE
51
52 DEBUG_MODE="on"
53 DEBUG="/dev/null"
54
55 if [[ $DEBUG_MODE == "on" ]];then
56
57 DEBUG="$OUTPUT_PATH/ncPRO-QC.debug"
58
59 fi
60
61 #Deploy ncPRO directories structure
62
63 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG
64
65 echo "$INPUT" >> $DEBUG
66 echo "$SAMPLENAME" >> $DEBUG
67 echo "$PROJECTNAME" >> $DEBUG
68 echo "$GENOME" >> $DEBUG
69 echo "$RMSK" >> $DEBUG
70 echo "$BAM_OUT" >> $DEBUG
71 echo "$RFAM" >> $DEBUG
72 echo "$REPORT" >> $DEBUG
73 echo "$HTML_REPORT" >> $DEBUG
74 echo "$PDF_REPORT" >> $DEBUG
75 echo "$LOG_FILE" >> $DEBUG
76 echo "$INPUT_TYPE" >> $DEBUG
77 echo "$ALIGNMENT" >> $DEBUG
78
79 echo "$sampleArray" >> $DEBUG
80 echo "$nameArray" >> $DEBUG
81 echo "$bamArray" >> $DEBUG
82
83 #Go to working directory
84
85 cd $OUTPUT_PATH
86
87 rm annotation
88
89 ln -s /bioinfo/local/curie/ngs-data-analysis/annotation .
90
91 rm manuals
92
93 #Create symbolic link to input
94 #********************************************************************************* NEW: for BAM files, check if reads in input are grouped or not and change cmd accordignly ***********
95 if [[ $INPUT_TYPE == "fastq" ]];then
96
97 count=0
98 for i in ${sampleArray[*]}
99 do
100 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.fastq
101 count=$(( $count + 1 ))
102 done
103 fi
104
105 if [[ $INPUT_TYPE == "bam" ]];then
106
107 count=0
108 for i in ${sampleArray[*]}
109 do
110 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam
111 count=$(( $count + 1 ))
112
113 #check if grouped
114 RG=`samtools view $i | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'`
115 done
116 fi
117
118 #Edit config-ncrna.txt
119 CONFIG_FILE=config-ncrna.txt
120 sed -i "s/mm9/$GENOME/g" $CONFIG_FILE
121 sed -i "s/hg19/$GENOME/g" $CONFIG_FILE
122
123 sed -i "/N_CPU/c\N_CPU = 6" $CONFIG_FILE #****** Make sure this value matches universe.ini files
124
125 sed -i "s/test_Curie/$PROJECTNAME/g" $CONFIG_FILE
126
127
128 #Build command line
129
130 if [[ $INPUT_TYPE == "fastq" ]];then
131
132 if [[ $ALIGNMENT == "True" ]]; then
133
134
135 COMMAND_LINE="-c $CONFIG_FILE -s processRead -s mapGenome -s mapGenomeStat -s mapAnnOverview"
136
137 if [[ $RFAM == "True" ]];then
138
139 COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk"
140
141 fi
142
143 else
144
145 COMMAND_LINE="-c $CONFIG_FILE -s processRead"
146
147 fi
148
149 fi
150
151
152
153 #### NEW if BAM already grouped, omit [ -s processBam ] + put input.bam in /bowtie_results
154 if [[ $INPUT_TYPE == "bam" ]];then
155
156 if [[ $RG = 0 ]]; then #if bam file is NOT grouped
157
158 COMMAND_LINE="-c $CONFIG_FILE -s processBam -s mapGenomeStat -s mapAnnOverview"
159
160 else
161
162 count=0
163 for i in ${sampleArray[*]}
164 do
165 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam
166 count=$(( $count + 1 ))
167 done
168 COMMAND_LINE="-c $CONFIG_FILE -s mapGenomeStat -s mapAnnOverview"
169 fi
170
171
172 if [[ $RFAM == "True" ]];then
173
174 COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk"
175
176 fi
177
178 fi
179
180 #************************* new
181
182 ##### Function to create HTML report in Galaxy ######
183
184 function createHtmlReport
185 {
186
187 # galaxy part :
188
189 #Reformat html output
190
191 tr '>' '\n' < ${OUTPUT_PATH}/report.html | sed -ne "s@.*<img src='\([^']*\)'.*@\1@p" -e 's@.*<img src="\([^"]*\)".*@\1@p' > ${OUTPUT_PATH}/img_list.txt
192
193 NC_LOGO=`head -1 ${OUTPUT_PATH}/img_list.txt`
194
195 #HTTP_PATH=`echo $OUTPUT_PATH | sed "s/\/data\/kdi_${VERSION}/http:\/\/data-kdi-${VERSION}.curie.fr\/file/g"`
196
197 i=1
198
199 if [[ $INPUT_TYPE == "fastq" ]];then
200
201 echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>QUALITY CONTROL</u></b></p>" > ${HTML_REPORT}
202 fi
203
204 if [[ $INPUT_TYPE == "bam" ]];then
205
206 echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>DATA MAPPING</u></b></p>" > $HTML_REPORT
207 fi
208
209 while read line
210 do
211
212 if [[ $line != "" ]];then
213
214 if [[ $i == "3" ]];then
215
216 ahref=`sed "s/html\/thumb/pic/" <<< $line`
217
218 if [[ $INPUT_TYPE == "fastq" ]];then
219 echo "<p align=center><b>Base Composition Information</b></p><table align=center><tr><td align=center><a href=$ahref><img src=$line></a></td>" >> $HTML_REPORT
220 fi
221
222 if [[ $INPUT_TYPE == "bam" ]];then
223 echo "<p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
224 fi
225
226 fi
227
228 if [[ $i == "4" ]];then
229
230 ahref=`sed "s/html\/thumb/pic/" <<< $line`
231
232 if [[ $INPUT_TYPE == "fastq" ]];then
233 echo "<td align=center><a href=$ahref><img src=$line></td></a></td><table>" >> $HTML_REPORT
234 fi
235
236 if [[ $INPUT_TYPE == "bam" ]];then
237 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
238 fi
239
240 fi
241
242 if [[ $i == "5" ]];then
243
244 ahref=`sed "s/html\/thumb/pic/" <<< $line`
245
246 if [[ $INPUT_TYPE == "fastq" ]];then
247 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
248 fi
249
250 if [[ $INPUT_TYPE == "bam" ]];then
251 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
252 fi
253
254 fi
255
256 if [[ $i == "6" ]];then
257
258 ahref=`sed "s/html\/thumb/pic/" <<< $line`
259
260 if [[ $INPUT_TYPE == "fastq" ]];then
261 echo "<hr width=500><p align=center><b>Quality Score</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
262 fi
263
264 if [[ $INPUT_TYPE == "bam" ]];then
265 echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
266 fi
267
268 fi
269
270 if [[ $i == "7" ]];then
271
272 ahref=`sed "s/html\/thumb/pic/" <<< $line`
273 if [[ $INPUT_TYPE == "fastq" ]];then
274 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
275 fi
276 if [[ $INPUT_TYPE == "bam" ]];then
277
278 if [[ $RFAM == "True" ]];then
279 echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
280 else
281 break
282 fi
283 fi
284 fi
285
286 if [[ $i == "8" ]];then
287
288 ahref=`sed "s/html\/thumb/pic/" <<< $line`
289 if [[ $INPUT_TYPE == "fastq" ]];then
290
291 if [[ $ALIGNMENT == "True" ]]; then
292
293 echo "<hr size=20><p align=center style=font-size:25px;><b><u>DATA MAPPING</u></b></p><p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
294 else
295 break
296 fi
297 fi
298 if [[ $INPUT_TYPE == "bam" ]];then
299
300 echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
301 fi
302 fi
303
304 if [[ $i == "9" ]];then
305
306 ahref=`sed "s/html\/thumb/pic/" <<< $line`
307 if [[ $INPUT_TYPE == "fastq" ]];then
308 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
309 fi
310
311 if [[ $INPUT_TYPE == "bam" ]];then
312
313 echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
314 break
315 fi
316
317 fi
318
319 if [[ $i == "10" ]];then
320
321 ahref=`sed "s/html\/thumb/pic/" <<< $line`
322
323 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
324
325 fi
326
327 if [[ $i == "11" ]];then
328
329 ahref=`sed "s/html\/thumb/pic/" <<< $line`
330
331 echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
332
333 fi
334
335 if [[ $RFAM == "True" ]];then
336
337 if [[ $i == "12" ]];then
338
339 ahref=`sed "s/html\/thumb/pic/" <<< $line`
340
341 echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
342
343 fi
344
345 if [[ $i == "13" ]];then
346
347 ahref=`sed "s/html\/thumb/pic/" <<< $line`
348
349 echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
350
351 fi
352
353 if [[ $i == "14" ]];then
354
355 ahref=`sed "s/html\/thumb/pic/" <<< $line`
356
357 echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT
358
359 fi
360
361 fi
362
363 i=$(( $i + 1 ))
364
365 fi
366
367
368 done < ${OUTPUT_PATH}/img_list.txt
369
370 rm ${OUTPUT_PATH}/img_list.txt
371
372
373
374 }
375
376
377 #Launch ncPRO analysis
378
379 #FAIRE une boucle pour -s hrml_builder ou -s pdf_builder
380
381 if [[ $REPORT == "all" ]];then
382
383
384 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-seq $COMMAND_LINE -s html_builder -s pdf_builder>> $DEBUG
385
386 createHtmlReport
387
388 cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT
389
390 fi
391
392 if [[ $REPORT == "pdf" ]];then
393
394
395 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-seq $COMMAND_LINE -s pdf_builder>> $DEBUG
396
397 cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT
398
399
400 fi
401
402 if [[ $REPORT == "html" ]];then
403
404
405 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-seq $COMMAND_LINE -s html_builder>> $DEBUG
406
407 createHtmlReport
408
409 fi
410
411 #Galaxy output handling
412
413 cp ${OUTPUT_PATH}/pipeline.log $LOG_FILE
414
415 if [[ $ALIGNMENT == "True" ]];then
416
417 count=0
418 for i in ${bamArray[*]}
419 do
420 cp ${OUTPUT_PATH}/bowtie_results/${nameArray[count]/_/.}_$GENOME.bam $i
421 count=$(( $count + 1 ))
422 done
423 fi