# HG changeset patch
# User jbrayet
# Date 1446128706 14400
# Node ID 1027a330d606abcf5db1f1bba78d37754f37e687
# Parent 0c34e0bef7d33e8ec94c2e6bd380c3f61c117ca3
Uploaded
diff -r 0c34e0bef7d3 -r 1027a330d606 ncPRO-QC.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ncPRO-QC.sh Thu Oct 29 10:25:06 2015 -0400
@@ -0,0 +1,423 @@
+#!/bin/bash
+
+# I added the 'me' lines
+#assumption : a user can't begin with a fastq file already grouped !
+
+while getopts "i:s:n:g:m:o:f:r:h:p:l:t:a:" optionName; do
+case "$optionName" in
+
+i) INPUT+="$OPTARG,";;
+s) SAMPLENAME+="$OPTARG,";;
+n) PROJECTNAME="$OPTARG";;
+g) GENOME="$OPTARG";;
+m) RMSK="$OPTARG";;
+o) BAM_OUT+="$OPTARG,";;
+f) RFAM="$OPTARG";;
+r) REPORT="$OPTARG";;
+h) HTML_REPORT="$OPTARG";;
+p) PDF_REPORT="$OPTARG";;
+l) LOG_FILE="$OPTARG";;
+t) INPUT_TYPE="$OPTARG";;
+a) ALIGNMENT="$OPTARG";;
+
+esac
+done
+
+sampleArray=(${INPUT//,/ })
+nameArray=(${SAMPLENAME//,/ })
+bamArray=(${BAM_OUT//,/ })
+
+if [[ $REPORT == "pdf" ]];then
+ OUTPUT_PATH_DIR=`dirname $LOG_FILE`
+ OUTPUT_PATH_NAME=`basename $LOG_FILE .dat`
+else
+ OUTPUT_PATH_DIR=`dirname $HTML_REPORT`
+ OUTPUT_PATH_NAME=`basename $HTML_REPORT .dat`
+fi
+
+OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files"
+
+
+
+#ME
+mkdir -p $OUTPUT_PATH
+
+chmod 777 -R $OUTPUT_PATH
+
+#VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3`
+#VERSION=`echo $VERSION | cut -d"_" -f2`
+
+#DEBUG_MODE
+
+DEBUG_MODE="on"
+DEBUG="/dev/null"
+
+if [[ $DEBUG_MODE == "on" ]];then
+
+ DEBUG="$OUTPUT_PATH/ncPRO-QC.debug"
+
+fi
+
+#Deploy ncPRO directories structure
+
+/bioinfo/local/curie/ngs-data-analysis/ncPRO-seq/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG
+
+echo "$INPUT" >> $DEBUG
+echo "$SAMPLENAME" >> $DEBUG
+echo "$PROJECTNAME" >> $DEBUG
+echo "$GENOME" >> $DEBUG
+echo "$RMSK" >> $DEBUG
+echo "$BAM_OUT" >> $DEBUG
+echo "$RFAM" >> $DEBUG
+echo "$REPORT" >> $DEBUG
+echo "$HTML_REPORT" >> $DEBUG
+echo "$PDF_REPORT" >> $DEBUG
+echo "$LOG_FILE" >> $DEBUG
+echo "$INPUT_TYPE" >> $DEBUG
+echo "$ALIGNMENT" >> $DEBUG
+
+echo "$sampleArray" >> $DEBUG
+echo "$nameArray" >> $DEBUG
+echo "$bamArray" >> $DEBUG
+
+#Go to working directory
+
+cd $OUTPUT_PATH
+
+rm annotation
+
+ln -s /bioinfo/local/curie/ngs-data-analysis/annotation .
+
+rm manuals
+
+#Create symbolic link to input
+#********************************************************************************* NEW: for BAM files, check if reads in input are grouped or not and change cmd accordignly ***********
+if [[ $INPUT_TYPE == "fastq" ]];then
+
+ count=0
+ for i in ${sampleArray[*]}
+ do
+ ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.fastq
+ count=$(( $count + 1 ))
+ done
+fi
+
+if [[ $INPUT_TYPE == "bam" ]];then
+
+ count=0
+ for i in ${sampleArray[*]}
+ do
+ ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam
+ count=$(( $count + 1 ))
+
+ #check if grouped
+ RG=`samtools view $i | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'`
+ done
+fi
+
+#Edit config-ncrna.txt
+CONFIG_FILE=config-ncrna.txt
+sed -i "s/mm9/$GENOME/g" $CONFIG_FILE
+sed -i "s/hg19/$GENOME/g" $CONFIG_FILE
+
+sed -i "/N_CPU/c\N_CPU = 6" $CONFIG_FILE #****** Make sure this value matches universe.ini files
+
+sed -i "s/test_Curie/$PROJECTNAME/g" $CONFIG_FILE
+
+
+#Build command line
+
+if [[ $INPUT_TYPE == "fastq" ]];then
+
+ if [[ $ALIGNMENT == "True" ]]; then
+
+
+ COMMAND_LINE="-c $CONFIG_FILE -s processRead -s mapGenome -s mapGenomeStat -s mapAnnOverview"
+
+ if [[ $RFAM == "True" ]];then
+
+ COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk"
+
+ fi
+
+ else
+
+ COMMAND_LINE="-c $CONFIG_FILE -s processRead"
+
+ fi
+
+fi
+
+
+
+#### NEW if BAM already grouped, omit [ -s processBam ] + put input.bam in /bowtie_results
+if [[ $INPUT_TYPE == "bam" ]];then
+
+ if [[ $RG = 0 ]]; then #if bam file is NOT grouped
+
+ COMMAND_LINE="-c $CONFIG_FILE -s processBam -s mapGenomeStat -s mapAnnOverview"
+
+ else
+
+ count=0
+ for i in ${sampleArray[*]}
+ do
+ ln -s $i ${OUTPUT_PATH}/rawdata/${nameArray[count]}.bam
+ count=$(( $count + 1 ))
+ done
+ COMMAND_LINE="-c $CONFIG_FILE -s mapGenomeStat -s mapAnnOverview"
+ fi
+
+
+ if [[ $RFAM == "True" ]];then
+
+ COMMAND_LINE="$COMMAND_LINE -s overviewRfam -s overviewRmsk"
+
+ fi
+
+fi
+
+#************************* new
+
+##### Function to create HTML report in Galaxy ######
+
+function createHtmlReport
+{
+
+ # galaxy part :
+
+ #Reformat html output
+
+ tr '>' '\n' < ${OUTPUT_PATH}/report.html | sed -ne "s@.* ${OUTPUT_PATH}/img_list.txt
+
+ NC_LOGO=`head -1 ${OUTPUT_PATH}/img_list.txt`
+
+ #HTTP_PATH=`echo $OUTPUT_PATH | sed "s/\/data\/kdi_${VERSION}/http:\/\/data-kdi-${VERSION}.curie.fr\/file/g"`
+
+ i=1
+
+ if [[ $INPUT_TYPE == "fastq" ]];then
+
+ echo "
QUALITY CONTROL
" > ${HTML_REPORT} + fi + + if [[ $INPUT_TYPE == "bam" ]];then + + echo "DATA MAPPING
" > $HTML_REPORT + fi + + while read line + do + + if [[ $line != "" ]];then + + if [[ $i == "3" ]];then + + ahref=`sed "s/html\/thumb/pic/" <<< $line` + + if [[ $INPUT_TYPE == "fastq" ]];then + echo "Base Composition Information