Mercurial > repos > jbrayet > ncproseq_1_6_5_docker
changeset 2:f8c76a03d758 draft
Uploaded
author | jbrayet |
---|---|
date | Thu, 28 Jan 2016 07:43:32 -0500 |
parents | b21296a98eb2 |
children | 8f3347849abf |
files | ncPRO-QC.sh |
diffstat | 1 files changed, 469 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ncPRO-QC.sh Thu Jan 28 07:43:32 2016 -0500 @@ -0,0 +1,469 @@ +#!/bin/bash + +# Copyleft ↄ⃝ 2012 Institut Curie +# Author(s): Jocelyn Brayet, Laurene Syx, Chongjian Chen, Nicolas Servant(Institut Curie) 2012 - 2015 +# Contact: bioinfo.ncproseq@curie.fr +# This software is distributed without any guarantee under the terms of the GNU General +# Public License, either Version 2, June 1991 or Version 3, June 2007. + +while getopts "i:s:n:g:m:o:f:r:h:p:l:t:a:q:d:" optionName; do +case "$optionName" in + +i) INPUT+="$OPTARG,";; +s) SAMPLENAME+="$OPTARG,";; +n) PROJECTNAME="$OPTARG";; +g) GENOME="$OPTARG";; +m) RMSK="$OPTARG";; +o) BAM_OUT+="$OPTARG,";; +f) RFAM="$OPTARG";; +r) REPORT="$OPTARG";; +h) HTML_REPORT="$OPTARG";; +p) PDF_REPORT="$OPTARG";; +l) LOG_FILE="$OPTARG";; +t) INPUT_TYPE="$OPTARG";; +a) ALIGNMENT="$OPTARG";; +q) FASTQ_FORMAT+="$OPTARG,";; +d) ROOT_DIR="$OPTARG";; + +esac +done + +##### ncPRO-seq annotation - Galaxy ##### + +GENOME_2=`echo $GENOME | cut -d"_" -f2` + +databasePath=$ROOT_DIR/database/files + +mkdir -p $databasePath/ncproseqAnnotation +mkdir -p $databasePath/ncproseqAnnotation/annotation +annotationPath=$databasePath/ncproseqAnnotation/annotation +[ ! -d $annotationPath/$GENOME_2 ] && wget http://ncpro.curie.fr/ncproseq/install_dir/annotation/$GENOME.tar.gz -P $annotationPath -q && cd $annotationPath && tar -zxf $GENOME.tar.gz && rm -rf $GENOME.tar.gz + +######### + + +sampleArray=(${INPUT//,/ }) +nameArray=(${SAMPLENAME//,/ }) +bamArray=(${BAM_OUT//,/ }) + +if [[ $REPORT == "pdf" ]];then + OUTPUT_PATH_DIR=`dirname $LOG_FILE` + OUTPUT_PATH_NAME=`basename $LOG_FILE .dat` +else + OUTPUT_PATH_DIR=`dirname $HTML_REPORT` + OUTPUT_PATH_NAME=`basename $HTML_REPORT .dat` +fi + +OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files" + + + +#ME +mkdir -p $OUTPUT_PATH + +chmod 777 -R $OUTPUT_PATH + +#VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3` +#VERSION=`echo $VERSION | cut -d"_" -f2` + +#DEBUG_MODE + +DEBUG_MODE="on" +DEBUG="/dev/null" + +if [[ $DEBUG_MODE == "on" ]];then + + DEBUG="$OUTPUT_PATH/ncPRO-QC.debug" + +fi + +#Deploy ncPRO directories structure + +/usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG + +echo "$INPUT" >> $DEBUG +echo "$SAMPLENAME" >> $DEBUG +echo "$PROJECTNAME" >> $DEBUG +echo "$GENOME_2" >> $DEBUG +echo "$RMSK" >> $DEBUG +echo "$BAM_OUT" >> $DEBUG +echo "$RFAM" >> $DEBUG +echo "$REPORT" >> $DEBUG +echo "$HTML_REPORT" >> $DEBUG +echo "$PDF_REPORT" >> $DEBUG +echo "$LOG_FILE" >> $DEBUG +echo "$INPUT_TYPE" >> $DEBUG +echo "$ALIGNMENT" >> $DEBUG + +echo "$sampleArray" >> $DEBUG +echo "$nameArray" >> $DEBUG +echo "$bamArray" >> $DEBUG + +#Go to working directory + +cp -rf /usr/curie_ngs/ncproseq_v1.6.5/annotation/*.item $annotationPath +cp -rf /usr/curie_ngs/ncproseq_v1.6.5/annotation/*_items.txt $annotationPath + +if [[ ! -L "$databasePath/ncproseqBowtieIndexes" && ! -d "$databasePath/ncproseqBowtieIndexes" ]] +then + mkdir -p $databasePath/ncproseqBowtieIndexes +fi + +cd /usr/curie_ngs +rm -rf bowtie_indexes +ln -s $databasePath/ncproseqBowtieIndexes bowtie_indexes + +cd $OUTPUT_PATH + +rm annotation + +ln -s $annotationPath annotation + +rm manuals + +#Create symbolic link to input +#********************************************************************************* NEW: for BAM files, check if reads in input are grouped or not and change cmd accordignly *********** +if [[ $INPUT_TYPE == "fastq" ]];then + + count=0 + for i in ${sampleArray[*]} + do + ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.fastq + count=$(( $count + 1 )) + done +fi + +if [[ $INPUT_TYPE == "bam" ]];then + + count=0 + for i in ${sampleArray[*]} + do + ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam + count=$(( $count + 1 )) + + #check if grouped + RG=`samtools view $i | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'` + done +fi + +#Edit config-ncrna.txt ##### A REFAIRE #### +CONFIG_FILE=config-ncrna.txt + +sed -i "s:^BOWTIE_GENOME_REFERENCE =.*$:BOWTIE_GENOME_REFERENCE = $GENOME_2:g" $CONFIG_FILE +sed -i "s:^ORGANISM.*$:ORGANISM = $GENOME_2:g" $CONFIG_FILE + +sed -i "/N_CPU/c\N_CPU = 6" $CONFIG_FILE #****** Make sure this value matches universe.ini files +sed -i "s/test_Curie/$PROJECTNAME/g" $CONFIG_FILE +sed -i "s:^FASTQ_FORMAT =.*$:FASTQ_FORMAT = $FASTQ_FORMAT:g" $CONFIG_FILE + + +if [[ -f "$annotationPath/$GENOME_2/cluster_pirna.gff" ]] +then + ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/cluster_pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff" +else + if [[ -f "$annotationPath/$GENOME_2/pirna.gff" ]] + then + ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff" + else + ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff" + fi +fi + +sed -i "s:^ANNO_CATALOG.*$:ANNO_CATALOG = $ANNO_CATALOG:g" $CONFIG_FILE + +#Build command line + +if [[ $INPUT_TYPE == "fastq" ]];then + + if [[ $ALIGNMENT == "True" ]]; then + + + COMMAND_LINE="-c $CONFIG_FILE -s processRead -s mapGenome -s mapGenomeStat -s mapAnnOverview" + + if [[ $RFAM == "True" ]];then + + COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk" + + fi + + else + + COMMAND_LINE="-c $CONFIG_FILE -s processRead" + + fi + +fi + + + +#### NEW if BAM already grouped, omit [ -s processBam ] + put input.bam in /bowtie_results +if [[ $INPUT_TYPE == "bam" ]];then + + if [[ $RG = 0 ]]; then #if bam file is NOT grouped + + COMMAND_LINE="-c $CONFIG_FILE -s processBam -s mapGenomeStat -s mapAnnOverview" + + else + + count=0 + for i in ${sampleArray[*]} + do + ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam + count=$(( $count + 1 )) + done + COMMAND_LINE="-c $CONFIG_FILE -s mapGenomeStat -s mapAnnOverview" + fi + + + if [[ $RFAM == "True" ]];then + + COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk" + + fi + +fi + +#************************* new + +##### Function to create HTML report in Galaxy ###### + +function createHtmlReport +{ + + # galaxy part : + + #Reformat html output + + tr '>' '\n' < ${OUTPUT_PATH}/report.html | sed -ne "s@.*<img src='\([^']*\)'.*@\1@p" -e 's@.*<img src="\([^"]*\)".*@\1@p' > ${OUTPUT_PATH}/img_list.txt + + NC_LOGO=`head -1 ${OUTPUT_PATH}/img_list.txt` + + #HTTP_PATH=`echo $OUTPUT_PATH | sed "s/\/data\/kdi_${VERSION}/http:\/\/data-kdi-${VERSION}.curie.fr\/file/g"` + + i=1 + + if [[ $INPUT_TYPE == "fastq" ]];then + + echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>QUALITY CONTROL</u></b></p>" > ${HTML_REPORT} + fi + + if [[ $INPUT_TYPE == "bam" ]];then + + echo "<p align=center><img src=$NC_LOGO></p><p align=center><b><u>DATA MAPPING</u></b></p>" > $HTML_REPORT + fi + + while read line + do + + if [[ $line != "" ]];then + + if [[ $i == "3" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + if [[ $INPUT_TYPE == "fastq" ]];then + echo "<p align=center><b>Base Composition Information</b></p><table align=center><tr><td align=center><a href=$ahref><img src=$line></a></td>" >> $HTML_REPORT + fi + + if [[ $INPUT_TYPE == "bam" ]];then + echo "<p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + fi + + fi + + if [[ $i == "4" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + if [[ $INPUT_TYPE == "fastq" ]];then + echo "<td align=center><a href=$ahref><img src=$line></td></a></td><table>" >> $HTML_REPORT + fi + + if [[ $INPUT_TYPE == "bam" ]];then + echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + fi + + fi + + if [[ $i == "5" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + if [[ $INPUT_TYPE == "fastq" ]];then + echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + fi + + if [[ $INPUT_TYPE == "bam" ]];then + echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + fi + + fi + + if [[ $i == "6" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + if [[ $INPUT_TYPE == "fastq" ]];then + echo "<hr width=500><p align=center><b>Quality Score</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + fi + + if [[ $INPUT_TYPE == "bam" ]];then + echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + fi + + fi + + if [[ $i == "7" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + if [[ $INPUT_TYPE == "fastq" ]];then + echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + fi + if [[ $INPUT_TYPE == "bam" ]];then + + if [[ $RFAM == "True" ]];then + echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + else + break + fi + fi + fi + + if [[ $i == "8" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + if [[ $INPUT_TYPE == "fastq" ]];then + + if [[ $ALIGNMENT == "True" ]]; then + + echo "<hr size=20><p align=center style=font-size:25px;><b><u>DATA MAPPING</u></b></p><p align=center><b>Mapping proportions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + else + break + fi + fi + if [[ $INPUT_TYPE == "bam" ]];then + + echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + fi + fi + + if [[ $i == "9" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + if [[ $INPUT_TYPE == "fastq" ]];then + echo "<hr width=500><p align=center><b>Distinct Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + fi + + if [[ $INPUT_TYPE == "bam" ]];then + + echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + break + fi + + fi + + if [[ $i == "10" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + echo "<hr width=500><p align=center><b>Abundant Reads Length Distribution</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + + fi + + if [[ $i == "11" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + echo "<hr size=20><p align=center style=font-size:25px;><b><u>ncRNAs OVERVIEW</u></b><p align=center><b>Reads Annotation Overview</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + + fi + + if [[ $RFAM == "True" ]];then + + if [[ $i == "12" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + echo "<p align=center><b>Precursor miRNAs Annotation</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + + fi + + if [[ $i == "13" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + echo "<p align=center><b>Annotation of ncRNAs from RFAM</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + + fi + + if [[ $i == "14" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + echo "<p align=center><b>Annotation of Repetitive Regions</b></p><p align=center><a href=$ahref><img src=$line></a></p>" >> $HTML_REPORT + + fi + + fi + + i=$(( $i + 1 )) + + fi + + + done < ${OUTPUT_PATH}/img_list.txt + + rm ${OUTPUT_PATH}/img_list.txt + + + +} + + +#Launch ncPRO analysis + +#FAIRE une boucle pour -s hrml_builder ou -s pdf_builder + +if [[ $REPORT == "all" ]];then + + + /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE -s html_builder -s pdf_builder>> $DEBUG + + createHtmlReport + + cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT + +fi + +if [[ $REPORT == "pdf" ]];then + + + /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE -s pdf_builder>> $DEBUG + + cp ${OUTPUT_PATH}/Analysis_report_ncPRO-seq.pdf $PDF_REPORT + + +fi + +if [[ $REPORT == "html" ]];then + + + /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE -s html_builder>> $DEBUG + + createHtmlReport + +fi + +#Galaxy output handling + +cp ${OUTPUT_PATH}/pipeline.log $LOG_FILE + +if [[ $ALIGNMENT == "True" ]];then + + count=0 + for i in ${bamArray[*]} + do + cp ${OUTPUT_PATH}/bowtie_results/${nameArray[count]/_/.}_$GENOME_2.bam $i + count=$(( $count + 1 )) + done +fi