Mercurial > repos > jbrayet > ncpro_seq
comparison ncPRO-QC.sh @ 2:1027a330d606 draft default tip
Uploaded
author | jbrayet |
---|---|
date | Thu, 29 Oct 2015 10:25:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:0c34e0bef7d3 | 2:1027a330d606 |
---|---|
1 #!/bin/bash | |
2 | |
3 # I added the 'me' lines | |
4 #assumption : a user can't begin with a fastq file already grouped ! | |
5 | |
6 while getopts "i:s:n:g:m:o:f:r:h:p:l:t:a:" optionName; do | |
7 case "$optionName" in | |
8 | |
9 i) INPUT+="$OPTARG,";; | |
10 s) SAMPLENAME+="$OPTARG,";; | |
11 n) PROJECTNAME="$OPTARG";; | |
12 g) GENOME="$OPTARG";; | |
13 m) RMSK="$OPTARG";; | |
14 o) BAM_OUT+="$OPTARG,";; | |
15 f) RFAM="$OPTARG";; | |
16 r) REPORT="$OPTARG";; | |
17 h) HTML_REPORT="$OPTARG";; | |
18 p) PDF_REPORT="$OPTARG";; | |
19 l) LOG_FILE="$OPTARG";; | |
20 t) INPUT_TYPE="$OPTARG";; | |
21 a) ALIGNMENT="$OPTARG";; | |
22 | |
23 esac | |
24 done | |
25 | |
26 sampleArray=(${INPUT//,/ }) | |
27 nameArray=(${SAMPLENAME//,/ }) | |
28 bamArray=(${BAM_OUT//,/ }) | |
29 | |
30 if [[ $REPORT == "pdf" ]];then | |
31 OUTPUT_PATH_DIR=`dirname $LOG_FILE` | |
32 OUTPUT_PATH_NAME=`basename $LOG_FILE .dat` | |
33 else | |
34 OUTPUT_PATH_DIR=`dirname $HTML_REPORT` | |
35 OUTPUT_PATH_NAME=`basename $HTML_REPORT .dat` | |
36 fi | |
37 | |
38 OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files" | |
39 | |
40 | |
41 | |
42 #ME | |
43 mkdir -p $OUTPUT_PATH | |
44 | |
45 chmod 777 -R $OUTPUT_PATH | |
46 | |
47 #VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3` | |
48 #VERSION=`echo $VERSION | cut -d"_" -f2` | |
49 | |
50 #DEBUG_MODE | |
51 | |
52 DEBUG_MODE="on" | |
53 DEBUG="/dev/null" | |
54 | |
55 if [[ $DEBUG_MODE == "on" ]];then | |
56 | |
57 DEBUG="$OUTPUT_PATH/ncPRO-QC.debug" | |
58 | |
59 fi | |
60 | |
61 #Deploy ncPRO directories structure | |
62 | |
63 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG | |
64 | |
65 echo "$INPUT" >> $DEBUG | |
66 echo "$SAMPLENAME" >> $DEBUG | |
67 echo "$PROJECTNAME" >> $DEBUG | |
68 echo "$GENOME" >> $DEBUG | |
69 echo "$RMSK" >> $DEBUG | |
70 echo "$BAM_OUT" >> $DEBUG | |
71 echo "$RFAM" >> $DEBUG | |
72 echo "$REPORT" >> $DEBUG | |
73 echo "$HTML_REPORT" >> $DEBUG | |
74 echo "$PDF_REPORT" >> $DEBUG | |
75 echo "$LOG_FILE" >> $DEBUG | |
76 echo "$INPUT_TYPE" >> $DEBUG | |
77 echo "$ALIGNMENT" >> $DEBUG | |
78 | |
79 echo "$sampleArray" >> $DEBUG | |
80 echo "$nameArray" >> $DEBUG | |
81 echo "$bamArray" >> $DEBUG | |
82 | |
83 #Go to working directory | |
84 | |
85 cd $OUTPUT_PATH | |
86 | |
87 rm annotation | |
88 | |
89 ln -s /bioinfo/local/curie/ngs-data-analysis/annotation . | |
90 | |
91 rm manuals | |
92 | |
93 #Create symbolic link to input | |
94 #********************************************************************************* NEW: for BAM files, check if reads in input are grouped or not and change cmd accordignly *********** | |
95 if [[ $INPUT_TYPE == "fastq" ]];then | |
96 | |
97 count=0 | |
98 for i in ${sampleArray[*]} | |
99 do | |
100 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.fastq | |
101 count=$(( $count + 1 )) | |
102 done | |
103 fi | |
104 | |
105 if [[ $INPUT_TYPE == "bam" ]];then | |
106 | |
107 count=0 | |
108 for i in ${sampleArray[*]} | |
109 do | |
110 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam | |
111 count=$(( $count + 1 )) | |
112 | |
113 #check if grouped | |
114 RG=`samtools view $i | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'` | |
115 done | |
116 fi | |
117 | |
118 #Edit config-ncrna.txt | |
119 CONFIG_FILE=config-ncrna.txt | |
120 sed -i "s/mm9/$GENOME/g" $CONFIG_FILE | |
121 sed -i "s/hg19/$GENOME/g" $CONFIG_FILE | |
122 | |
123 sed -i "/N_CPU/c\N_CPU = 6" $CONFIG_FILE #****** Make sure this value matches universe.ini files | |
124 | |
125 sed -i "s/test_Curie/$PROJECTNAME/g" $CONFIG_FILE | |
126 | |
127 | |
128 #Build command line | |
129 | |
130 if [[ $INPUT_TYPE == "fastq" ]];then | |
131 | |
132 if [[ $ALIGNMENT == "True" ]]; then | |
133 | |
134 | |
135 COMMAND_LINE="-c $CONFIG_FILE -s processRead -s mapGenome -s mapGenomeStat -s mapAnnOverview" | |
136 | |
137 if [[ $RFAM == "True" ]];then | |
138 | |
139 COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk" | |
140 | |
141 fi | |
142 | |
143 else | |
144 | |
145 COMMAND_LINE="-c $CONFIG_FILE -s processRead" | |
146 | |
147 fi | |
148 | |
149 fi | |
150 | |
151 | |
152 | |
153 #### NEW if BAM already grouped, omit [ -s processBam ] + put input.bam in /bowtie_results | |
154 if [[ $INPUT_TYPE == "bam" ]];then | |
155 | |
156 if [[ $RG = 0 ]]; then #if bam file is NOT grouped | |
157 | |
158 COMMAND_LINE="-c $CONFIG_FILE -s processBam -s mapGenomeStat -s mapAnnOverview" | |
159 | |
160 else | |
161 | |
162 count=0 | |
163 for i in ${sampleArray[*]} | |
164 do | |
165 ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam | |
166 count=$(( $count + 1 )) | |
167 done | |
168 COMMAND_LINE="-c $CONFIG_FILE -s mapGenomeStat -s mapAnnOverview" | |
169 fi | |
170 | |
171 | |
172 if [[ $RFAM == "True" ]];then | |
173 | |
174 COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk" | |
175 | |
176 fi | |
177 | |
178 fi | |
179 | |
180 #************************* new | |
181 | |
182 ##### Function to create HTML report in Galaxy ###### | |
183 | |
184 function createHtmlReport | |
185 { | |
186 | |
187 # galaxy part : | |
188 | |
189 #Reformat html output | |
190 | |
191 tr '>' '\n' < ${OUTPUT_PATH}/report.html | sed -ne "s@.*<img src='\([^']*\)'.*@\1@p" -e 's@.*<img src="\([^"]*\)".*@\1@p' > ${OUTPUT_PATH}/img_list.txt | |
192 | |
193 NC_LOGO=`head -1 ${OUTPUT_PATH}/img_list.txt` | |
194 | |
195 #HTTP_PATH=`echo $OUTPUT_PATH | sed "s/\/data\/kdi_${VERSION}/http:\/\/data-kdi-${VERSION}.curie.fr\/file/g"` | |
196 | |
197 i=1 | |
198 | |
199 if [[ $INPUT_TYPE == "fastq" ]];then | |
200 | |
201 echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>QUALITY CONTROL</u></b></p>" > ${HTML_REPORT} | |
202 fi | |
203 | |
204 if [[ $INPUT_TYPE == "bam" ]];then | |
205 | |
206 echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>DATA MAPPING</u></b></p>" > $HTML_REPORT | |
207 fi | |
208 | |
209 while read line | |
210 do | |
211 | |
212 if [[ $line != "" ]];then | |
213 | |
214 if [[ $i == "3" ]];then | |
215 | |
216 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
217 | |
218 if [[ $INPUT_TYPE == "fastq" ]];then | |
219 echo "<p align=center><b>Base Composition Information</b></p><table align=center><tr><td align=center><a href=$ahref><img src=$line></a></td>" >> $HTML_REPORT | |
220 fi | |
221 | |
222 if [[ $INPUT_TYPE == "bam" ]];then | |
223 echo "<p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
224 fi | |
225 | |
226 fi | |
227 | |
228 if [[ $i == "4" ]];then | |
229 | |
230 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
231 | |
232 if [[ $INPUT_TYPE == "fastq" ]];then | |
233 echo "<td align=center><a href=$ahref><img src=$line></td></a></td><table>" >> $HTML_REPORT | |
234 fi | |
235 | |
236 if [[ $INPUT_TYPE == "bam" ]];then | |
237 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
238 fi | |
239 | |
240 fi | |
241 | |
242 if [[ $i == "5" ]];then | |
243 | |
244 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
245 | |
246 if [[ $INPUT_TYPE == "fastq" ]];then | |
247 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
248 fi | |
249 | |
250 if [[ $INPUT_TYPE == "bam" ]];then | |
251 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
252 fi | |
253 | |
254 fi | |
255 | |
256 if [[ $i == "6" ]];then | |
257 | |
258 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
259 | |
260 if [[ $INPUT_TYPE == "fastq" ]];then | |
261 echo "<hr width=500><p align=center><b>Quality Score</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
262 fi | |
263 | |
264 if [[ $INPUT_TYPE == "bam" ]];then | |
265 echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
266 fi | |
267 | |
268 fi | |
269 | |
270 if [[ $i == "7" ]];then | |
271 | |
272 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
273 if [[ $INPUT_TYPE == "fastq" ]];then | |
274 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
275 fi | |
276 if [[ $INPUT_TYPE == "bam" ]];then | |
277 | |
278 if [[ $RFAM == "True" ]];then | |
279 echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
280 else | |
281 break | |
282 fi | |
283 fi | |
284 fi | |
285 | |
286 if [[ $i == "8" ]];then | |
287 | |
288 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
289 if [[ $INPUT_TYPE == "fastq" ]];then | |
290 | |
291 if [[ $ALIGNMENT == "True" ]]; then | |
292 | |
293 echo "<hr size=20><p align=center style=font-size:25px;><b><u>DATA MAPPING</u></b></p><p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
294 else | |
295 break | |
296 fi | |
297 fi | |
298 if [[ $INPUT_TYPE == "bam" ]];then | |
299 | |
300 echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
301 fi | |
302 fi | |
303 | |
304 if [[ $i == "9" ]];then | |
305 | |
306 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
307 if [[ $INPUT_TYPE == "fastq" ]];then | |
308 echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
309 fi | |
310 | |
311 if [[ $INPUT_TYPE == "bam" ]];then | |
312 | |
313 echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
314 break | |
315 fi | |
316 | |
317 fi | |
318 | |
319 if [[ $i == "10" ]];then | |
320 | |
321 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
322 | |
323 echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
324 | |
325 fi | |
326 | |
327 if [[ $i == "11" ]];then | |
328 | |
329 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
330 | |
331 echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
332 | |
333 fi | |
334 | |
335 if [[ $RFAM == "True" ]];then | |
336 | |
337 if [[ $i == "12" ]];then | |
338 | |
339 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
340 | |
341 echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
342 | |
343 fi | |
344 | |
345 if [[ $i == "13" ]];then | |
346 | |
347 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
348 | |
349 echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
350 | |
351 fi | |
352 | |
353 if [[ $i == "14" ]];then | |
354 | |
355 ahref=`sed "s/html\/thumb/pic/" <<< $line` | |
356 | |
357 echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT | |
358 | |
359 fi | |
360 | |
361 fi | |
362 | |
363 i=$(( $i + 1 )) | |
364 | |
365 fi | |
366 | |
367 | |
368 done < ${OUTPUT_PATH}/img_list.txt | |
369 | |
370 rm ${OUTPUT_PATH}/img_list.txt | |
371 | |
372 | |
373 | |
374 } | |
375 | |
376 | |
377 #Launch ncPRO analysis | |
378 | |
379 #FAIRE une boucle pour -s hrml_builder ou -s pdf_builder | |
380 | |
381 if [[ $REPORT == "all" ]];then | |
382 | |
383 | |
384 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-seq $COMMAND_LINE -s html_builder -s pdf_builder>> $DEBUG | |
385 | |
386 createHtmlReport | |
387 | |
388 cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT | |
389 | |
390 fi | |
391 | |
392 if [[ $REPORT == "pdf" ]];then | |
393 | |
394 | |
395 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-seq $COMMAND_LINE -s pdf_builder>> $DEBUG | |
396 | |
397 cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT | |
398 | |
399 | |
400 fi | |
401 | |
402 if [[ $REPORT == "html" ]];then | |
403 | |
404 | |
405 /bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-seq $COMMAND_LINE -s html_builder>> $DEBUG | |
406 | |
407 createHtmlReport | |
408 | |
409 fi | |
410 | |
411 #Galaxy output handling | |
412 | |
413 cp ${OUTPUT_PATH}/pipeline.log $LOG_FILE | |
414 | |
415 if [[ $ALIGNMENT == "True" ]];then | |
416 | |
417 count=0 | |
418 for i in ${bamArray[*]} | |
419 do | |
420 cp ${OUTPUT_PATH}/bowtie_results/${nameArray[count]/_/.}_$GENOME.bam $i | |
421 count=$(( $count + 1 )) | |
422 done | |
423 fi |