Mercurial > repos > jbrayet > ncproseq_1_6_5_docker
changeset 0:50c2566d6b4c draft
Uploaded
author | jbrayet |
---|---|
date | Thu, 28 Jan 2016 07:43:11 -0500 |
parents | |
children | b21296a98eb2 |
files | ncPRO-ANNOTATION.sh |
diffstat | 1 files changed, 259 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ncPRO-ANNOTATION.sh Thu Jan 28 07:43:11 2016 -0500 @@ -0,0 +1,259 @@ +#!/bin/bash + +# Copyleft ↄ⃝ 2012 Institut Curie +# Author(s): Jocelyn Brayet, Laurene Syx, Chongjian Chen, Nicolas Servant(Institut Curie) 2012 - 2015 +# Contact: bioinfo.ncproseq@curie.fr +# This software is distributed without any guarantee under the terms of the GNU General +# Public License, either Version 2, June 1991 or Version 3, June 2007. + +while getopts "i:g:t:e:l:u:v:o:a::n:r:p:" optionName; do +case "$optionName" in + +i) INPUT="$OPTARG";; +g) GENOME="$OPTARG";; +t) DATATYPE="$OPTARG";; +e) EXT="$OPTARG";; +l) LOG_FILE="$OPTARG";; +u) UCSC="$OPTARG";; +v) UCSC_TRACK="$OPTARG";; +o) OUT="$OPTARG";; +a) OUT_ALL="$OPTARG";; +n) NORM="$OPTARG";; +r) ROOT_DIR="$OPTARG";; +p) PROJECTNAME="$OPTARG";; + + +esac +done + +##### ncPRO-seq annotation - Galaxy ##### + +GENOME_2=`echo $GENOME | cut -d"_" -f2` + +databasePath=$ROOT_DIR/database/files + +mkdir -p $databasePath/ncproseqAnnotation +mkdir -p $databasePath/ncproseqAnnotation/annotation +annotationPath=$databasePath/ncproseqAnnotation/annotation +echo $annotationPath +[ ! -d $annotationPath/$GENOME_2 ] && wget http://ncpro.curie.fr/ncproseq/install_dir/annotation/$GENOME.tar.gz -P $annotationPath && cd $annotationPath && tar -zxf $GENOME.tar.gz && rm -rf $GENOME.tar.gz + +######### + +OUTPUT_PATH_DIR=`dirname $LOG_FILE` +OUTPUT_PATH_NAME=`basename $LOG_FILE .dat` + +OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files" + +# this was missing +mkdir -p $OUTPUT_PATH + +VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3` +VERSION=`echo $VERSION | cut -d"_" -f2` + +#DEBUG_MODE + +DEBUG_MODE="on" +DEBUG="/dev/null" + +if [[ $DEBUG_MODE == "on" ]];then + + DEBUG="$OUTPUT_PATH/ncPRO-ANNOTATION.debug" + +fi + +#Deploy ncPRO directories structure + +/usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG +# READ_GROUP = 1 ! (always) + + +chmod 777 -R $OUTPUT_PATH +#Go to working directory + +cd $OUTPUT_PATH + +rm annotation + +echo "ln -s $annotationPath annotation" + +ln -s $annotationPath annotation + +#Create symbolic link to input + +ln -s $INPUT ${OUTPUT_PATH}/rawdata/input.bam + +#Edit config-ncrna.txt + +CONFIG_FILE=config-ncrna.txt + +sed -i "s:^BOWTIE_GENOME_REFERENCE =.*$:BOWTIE_GENOME_REFERENCE = $GENOME_2:g" $CONFIG_FILE +sed -i "s:^ORGANISM.*$:ORGANISM = $GENOME_2:g" $CONFIG_FILE + +sed -i "s:^N_CPU.*$:N_CPU = 4:g" $CONFIG_FILE #****** Make sure this value matches universe.ini files +sed -i "s:^PROJECT_NAME =.*$:PROJECT_NAME = $PROJECTNAME:g" $CONFIG_FILE + + +#sed -i "s/LOGFILE = pipeline.log/LOGFILE = $LOG_FILE/g" $CONFIG_FILE + +if [[ -f "$annotationPath/$GENOME_2/cluster_pirna.gff" ]] +then + ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/cluster_pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff" +else + if [[ -f "$annotationPath/$GENOME_2/pirna.gff" ]] + then + ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff" + else + ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff" + fi +fi + +sed -i "s:^ANNO_CATALOG.*$:ANNO_CATALOG = $ANNO_CATALOG:g" $CONFIG_FILE + +####### Remove information in config-ncrna.txt file ############### + +sed -i "s:^MATURE_MIRNA =.*$:MATURE_MIRNA =:g" $CONFIG_FILE +sed -i "s:^PRECURSOR_MIRNA =.*$:PRECURSOR_MIRNA =:g" $CONFIG_FILE +sed -i "s:^TRNA_UCSC =.*$:TRNA_UCSC =:g" $CONFIG_FILE +sed -i "s:^NCRNA_RFAM =.*$:NCRNA_RFAM =:g" $CONFIG_FILE +sed -i "s:^NCRNA_RFAM_EX =.*$:NCRNA_RFAM_EX =:g" $CONFIG_FILE +sed -i "s:^NCRNA_RMSK =.*$:NCRNA_RMSK =:g" $CONFIG_FILE +sed -i "s:^NCRNA_RMSK_EX =.*$:NCRNA_RMSK_EX =:g" $CONFIG_FILE +sed -i "s:^OTHER_NCRNA_GFF =.*$:OTHER_NCRNA_GFF =:g" $CONFIG_FILE + +####################################### + +if [[ $DATATYPE == "matmir" ]];then + + sed -i "s:^MATURE_MIRNA =.*$:MATURE_MIRNA = $EXT:g" $CONFIG_FILE + +elif [[ $DATATYPE == "premir" ]];then + + sed -i "s:^PRECURSOR_MIRNA =.*$:PRECURSOR_MIRNA = $EXT:g" $CONFIG_FILE + +elif [[ $DATATYPE == "trna" ]];then + + sed -i "s:^TRNA_UCSC =.*$:TRNA_UCSC = $EXT:g" $CONFIG_FILE + +elif [[ $DATATYPE == "rfam" ]];then + + sed -i "s:^NCRNA_RFAM_EX =.*$:NCRNA_RFAM_EX = $EXT:g" $CONFIG_FILE + +elif [[ $DATATYPE == "rmsk" ]];then + + sed -i "s:^NCRNA_RMSK_EX =.*$:NCRNA_RMSK_EX = $EXT:g" $CONFIG_FILE + + +elif [[ $DATATYPE == "other" ]];then + + # get the gff3 file + IFS=',' read -ra gff <<< "$EXT" + echo "${gff[0]}" | sed 's/\//\\\//g' > gff + gff_file=$(head -n 1 gff) + + sed -i "s:^OTHER_NCRNA_GFF =.*$:OTHER_NCRNA_GFF = $gff_file:g" $CONFIG_FILE +fi + + +#Build command line + +## ********************************** NEW for BAM files: check if reads are grouped (or not) + change command line accordingly***************************### + +#check if file is already grouped (grouped => RG = 1; not grouped => 0) +RG=`samtools view $INPUT | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'` + +if [[ $RG == 0 ]];then # if not grouped + # add -s processBam to do the grouping + echo "Grouping reads..." >> $DEBUG + COMMAND_LINE="-c $CONFIG_FILE -s processBam -s generateNcgff -s ncrnaProcess" + +else + + # omit [-s processBam] because reads are already grouped + move ready-to-use input.bam to /bowtie_results + echo "Reads already grouped..." >> $DEBUG + ln -s $INPUT ${OUTPUT_PATH}/bowtie_results/input.bam + COMMAND_LINE="-c $CONFIG_FILE -s generateNcgff -s ncrnaProcess" + +fi + +#finally, add track option if demanded +if [[ $UCSC == "True" ]];then + COMMAND_LINE="$COMMAND_LINE -s ncrnaTracks" +fi + +# **************** END NEW ************************************************************************************************************************************************ + +#Launch ncPRO analysis +echo $COMMAND_LINE >> $DEBUG + +/usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE >> $DEBUG + +##***TEST + +RG=`samtools view ${OUTPUT_PATH}/bowtie_results/input.bam | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'` +echo " RG after pre-processing = $RG" >> $DEBUG +#**** TEST + + +#Galaxy output handling + +mv ${OUTPUT_PATH}/pipeline.log $LOG_FILE + +if [[ $NORM == "True" ]];then + if [[ $DATATYPE == "matmir" ]];then + if [[ ! -z "$OUT_ALL" ]];then + mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_RPM_all_miRNA.data $OUT_ALL + mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT + else + mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT + fi + elif [[ $DATATYPE == "premir" ]];then + if [[ ! -z "$OUT_ALL" ]];then + mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_RPM_all_miRNA.data $OUT_ALL + mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT + else + mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT + fi + elif [[ $DATATYPE == "trna" ]];then + mv $OUTPUT_PATH/doc/tRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT + elif [[ $DATATYPE == "rfam" ]];then + mv $OUTPUT_PATH/doc/rfam_${EXT}_all_samples_subfamcov_RPM.data $OUT + elif [[ $DATATYPE == "rmsk" ]];then + mv $OUTPUT_PATH/doc/rmsk_${EXT}_all_samples_subfamcov_RPM.data $OUT + fi +else + if [[ $DATATYPE == "matmir" ]];then + if [[ ! -z "$OUT_ALL" ]];then + mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_all_miRNA.data $OUT_ALL + mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov.data $OUT + else + mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov.data $OUT + fi + elif [[ $DATATYPE == "premir" ]];then + if [[ ! -z "$OUT_ALL" ]];then + mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_all_miRNA.data $OUT_ALL + mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov.data $OUT + else + mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov.data $OUT + fi + elif [[ $DATATYPE == "trna" ]];then + mv $OUTPUT_PATH/doc/tRNA_${EXT}_all_samples_subfamcov.data $OUT + elif [[ $DATATYPE == "rfam" ]];then + mv $OUTPUT_PATH/doc/rfam_${EXT}_all_samples_subfamcov.data $OUT + elif [[ $DATATYPE == "rmsk" ]];then + mv $OUTPUT_PATH/doc/rmsk_${EXT}_all_samples_subfamcov.data $OUT + fi +fi + +if [[ $UCSC == "True" ]];then + +#**** FOR NEBULA ONLY ****** + +gunzip $OUTPUT_PATH/ucsc/input_*_sens.bedGraph.gz +mv $OUTPUT_PATH/ucsc/input_*_sens.bedGraph $UCSC_TRACK + +fi +# ***** END FOR NEBULA ONLY ***** + +rm -rf $OUTPUT_PATH +