1
|
1 #!/bin/bash
|
|
2
|
|
3 # Copyleft ↄ⃝ 2012 Institut Curie
|
|
4 # Author(s): Jocelyn Brayet, Laurene Syx, Chongjian Chen, Nicolas Servant(Institut Curie) 2012 - 2015
|
|
5 # Contact: bioinfo.ncproseq@curie.fr
|
|
6 # This software is distributed without any guarantee under the terms of the GNU General
|
|
7 # Public License, either Version 2, June 1991 or Version 3, June 2007.
|
|
8
|
|
9 while getopts "i:g:t:e:l:o:p:r:n:" optionName; do
|
|
10 case "$optionName" in
|
|
11
|
|
12 i) INPUT="$OPTARG";;
|
|
13 g) GENOME="$OPTARG";;
|
|
14 t) DATATYPE="$OPTARG";;
|
|
15 e) EXT="$OPTARG";;
|
|
16 l) LOG_FILE="$OPTARG";;
|
|
17 o) OUT_AB="$OPTARG";;
|
|
18 p) OUT_DIS="$OPTARG";;
|
|
19 r) ROOT_DIR="$OPTARG";;
|
|
20 n) PROJECTNAME="$OPTARG";;
|
|
21
|
|
22 esac
|
|
23 done
|
|
24
|
|
25 ##### ncPRO-seq annotation - Galaxy #####
|
|
26
|
|
27 GENOME_2=`echo $GENOME | cut -d"_" -f2`
|
|
28
|
|
29 databasePath=$ROOT_DIR/database/files
|
|
30
|
|
31 mkdir -p $databasePath/ncproseqAnnotation
|
|
32 mkdir -p $databasePath/ncproseqAnnotation/annotation
|
|
33 annotationPath=$databasePath/ncproseqAnnotation/annotation
|
|
34 [ ! -d $annotationPath/$GENOME_2 ] && wget http://ncpro.curie.fr/ncproseq/install_dir/annotation/$GENOME.tar.gz -P $annotationPath && cd $annotationPath && tar -zxf $GENOME.tar.gz && rm -rf $GENOME.tar.gz
|
|
35
|
|
36 #########
|
|
37
|
|
38 OUTPUT_PATH_DIR=`dirname $OUT_AB`
|
|
39 OUTPUT_PATH_NAME=`basename $OUT_AB .dat`
|
|
40
|
|
41 OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files"
|
|
42
|
|
43 mkdir -p $OUTPUT_PATH
|
|
44
|
|
45 VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3`
|
|
46 VERSION=`echo $VERSION | cut -d"_" -f2`
|
|
47
|
|
48 #DEBUG_MODE
|
|
49
|
|
50 DEBUG_MODE="on"
|
|
51 DEBUG="/dev/null"
|
|
52
|
|
53 if [[ $DEBUG_MODE == "on" ]];then
|
|
54
|
|
55 DEBUG="$OUTPUT_PATH/ncPRO-ANNOTATION.debug"
|
|
56
|
|
57 fi
|
|
58
|
|
59 #Deploy ncPRO directories structure
|
|
60
|
|
61 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG
|
|
62
|
|
63 #me
|
|
64 chmod 777 -R $OUTPUT_PATH
|
|
65 #Go to working directory
|
|
66
|
|
67 cd $OUTPUT_PATH
|
|
68
|
|
69 #Create symbolic link to input and annotations
|
|
70
|
|
71 ln -s $INPUT ${OUTPUT_PATH}/rawdata/input.bam
|
|
72
|
|
73 rm annotation
|
|
74
|
|
75 ln -s $annotationPath annotation
|
|
76
|
|
77 #Edit config-ncrna.txt
|
|
78
|
|
79 CONFIG_FILE=config-ncrna.txt
|
|
80
|
|
81 sed -i "s:^BOWTIE_GENOME_REFERENCE =.*$:BOWTIE_GENOME_REFERENCE = $GENOME_2:g" $CONFIG_FILE
|
|
82 sed -i "s:^ORGANISM.*$:ORGANISM = $GENOME_2:g" $CONFIG_FILE
|
|
83
|
|
84 sed -i "s:^N_CPU.*$:N_CPU = 4:g" $CONFIG_FILE #****** Make sure this value matches universe.ini files
|
|
85 sed -i "s:^PROJECT_NAME =.*$:PROJECT_NAME = $PROJECTNAME:g" $CONFIG_FILE
|
|
86
|
|
87
|
|
88 #sed -i "s/LOGFILE = pipeline.log/LOGFILE = $LOG_FILE/g" $CONFIG_FILE
|
|
89
|
|
90 if [[ -f "$annotationPath/$GENOME_2/cluster_pirna.gff" ]]
|
|
91 then
|
|
92 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/cluster_pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff"
|
|
93 else
|
|
94 if [[ -f "$annotationPath/$GENOME_2/pirna.gff" ]]
|
|
95 then
|
|
96 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff"
|
|
97 else
|
|
98 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff"
|
|
99 fi
|
|
100 fi
|
|
101
|
|
102 sed -i "s:^ANNO_CATALOG.*$:ANNO_CATALOG = $ANNO_CATALOG:g" $CONFIG_FILE
|
|
103
|
|
104 ####### Remove information in config-ncrna.txt file ###############
|
|
105
|
|
106 sed -i "s:^MATURE_MIRNA =.*$:MATURE_MIRNA =:g" $CONFIG_FILE
|
|
107 sed -i "s:^PRECURSOR_MIRNA =.*$:PRECURSOR_MIRNA =:g" $CONFIG_FILE
|
|
108 sed -i "s:^TRNA_UCSC =.*$:TRNA_UCSC =:g" $CONFIG_FILE
|
|
109 sed -i "s:^NCRNA_RFAM =.*$:NCRNA_RFAM =:g" $CONFIG_FILE
|
|
110 sed -i "s:^NCRNA_RFAM_EX =.*$:NCRNA_RFAM_EX =:g" $CONFIG_FILE
|
|
111 sed -i "s:^NCRNA_RMSK =.*$:NCRNA_RMSK =:g" $CONFIG_FILE
|
|
112 sed -i "s:^NCRNA_RMSK_EX =.*$:NCRNA_RMSK_EX =:g" $CONFIG_FILE
|
|
113 sed -i "s:^OTHER_NCRNA_GFF =.*$:OTHER_NCRNA_GFF =:g" $CONFIG_FILE
|
|
114
|
|
115 #######################################
|
|
116
|
|
117 if [[ $DATATYPE == "matmir" ]];then
|
|
118
|
|
119 sed -i "s/MATURE_MIRNA =/MATURE_MIRNA = $EXT/g" $CONFIG_FILE
|
|
120
|
|
121 elif [[ $DATATYPE == "premir" ]];then
|
|
122
|
|
123 sed -i "s/PRECURSOR_MIRNA =/PRECURSOR_MIRNA = $EXT/g" $CONFIG_FILE
|
|
124
|
|
125 elif [[ $DATATYPE == "trna" ]];then
|
|
126
|
|
127 sed -i "s/TRNA_UCSC =/TRNA_UCSC = $EXT/g" $CONFIG_FILE
|
|
128
|
|
129 elif [[ $DATATYPE == "rfam" ]];then
|
|
130
|
|
131 sed -i "s/NCRNA_RFAM_EX =/NCRNA_RFAM_EX = $EXT/g" $CONFIG_FILE
|
|
132
|
|
133 elif [[ $DATATYPE == "rmsk" ]];then
|
|
134
|
|
135 sed -i "s/NCRNA_RMSK_EX =/NCRNA_RMSK_EX = $EXT/g" $CONFIG_FILE
|
|
136
|
|
137 elif [[ $DATATYPE == "other" ]];then
|
|
138
|
|
139 # get the gff3 file
|
|
140 IFS=',' read -ra gff <<< "$EXT"
|
|
141 echo "${gff[0]}" | sed 's/\//\\\//g' > gff
|
|
142 gff_file=$(head -n 1 gff)
|
|
143 sed -i "s/OTHER_NCRNA_GFF =/OTHER_NCRNA_GFF = $gff_file/g" $CONFIG_FILE
|
|
144
|
|
145 fi
|
|
146
|
|
147 echo "building the command line" >> $DEBUG
|
|
148
|
|
149 #Build command line
|
|
150 ## ****************************************************************** NEW : check if reads are grouped and change command line accordingly********
|
|
151
|
|
152 RG=`samtools view $INPUT | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'`
|
|
153
|
|
154 if [[ $RG = 0 ]]; then # if not grouped
|
|
155
|
|
156 # add -s processBam to do the grouping
|
|
157 COMMAND_LINE="-c $CONFIG_FILE -s processBam -s generateNcgff -s ncrnaProcess"
|
|
158
|
|
159 else
|
|
160
|
|
161 # eliminate [-s processBam] because reads are already grouped + move input.bam
|
|
162 ln -s $INPUT ${OUTPUT_PATH}/bowtie_results/input.bam
|
|
163 COMMAND_LINE="-c $CONFIG_FILE -s generateNcgff -s ncrnaProcess"
|
|
164 fi
|
|
165
|
|
166 echo "cmd : $COMMAND_LINE" >> $DEBUG
|
|
167 # **************** END NEW *******************************************************************************************************************************
|
|
168
|
|
169 #Launch ncPRO analysis
|
|
170 echo $COMMAND_LINE >> $DEBUG
|
|
171 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE >> $DEBUG 2>&1
|
|
172
|
|
173 #Galaxy output handling
|
|
174
|
|
175 mv ${OUTPUT_PATH}/pipeline.log $LOG_FILE
|
|
176
|
|
177
|
|
178 # PROFILE
|
|
179
|
|
180 if [ -f ${OUTPUT_PATH}/pic/input_*_${EXT}_abundant.png ] ; then
|
|
181 convert -resize 60% ${OUTPUT_PATH}/pic/input_*_${EXT}_abundant.png $OUT_AB
|
|
182 else
|
|
183 echo -e "Distribution of positional read coverage and the read length distribution are unavailable in this annotation family. Check the coverage profile table :\n" > $OUT_AB
|
|
184 cat ${OUTPUT_PATH}/doc/${DATATYPE}_${EXT}_all_samples_scaled_basecov_abundant_all_RPM.data >> $OUT_AB
|
|
185 fi
|
|
186
|
|
187 if [ -f ${OUTPUT_PATH}/pic/input_*_${EXT}_distinct.png ]; then
|
|
188 convert -resize 60% ${OUTPUT_PATH}/pic/input_*_${EXT}_distinct.png $OUT_DIS
|
|
189 else
|
|
190 echo "Distribution of positional read coverage and the read length distribution are unavailable in this annotation family. Check the coverage profile table :\n" > $OUT_DIS
|
|
191 cat ${OUTPUT_PATH}/doc/${DATATYPE}_${EXT}_all_samples_scaled_basecov_distinct_all_RPM.data >> $OUT_DIS
|
|
192
|
|
193 fi
|
|
194
|
|
195 rm -rf $OUTPUT_PATH
|
|
196
|