0
|
1 #!/bin/bash
|
|
2
|
|
3 # Copyleft ↄ⃝ 2012 Institut Curie
|
|
4 # Author(s): Jocelyn Brayet, Laurene Syx, Chongjian Chen, Nicolas Servant(Institut Curie) 2012 - 2015
|
|
5 # Contact: bioinfo.ncproseq@curie.fr
|
|
6 # This software is distributed without any guarantee under the terms of the GNU General
|
|
7 # Public License, either Version 2, June 1991 or Version 3, June 2007.
|
|
8
|
|
9 while getopts "i:g:t:e:l:u:v:o:a::n:r:p:" optionName; do
|
|
10 case "$optionName" in
|
|
11
|
|
12 i) INPUT="$OPTARG";;
|
|
13 g) GENOME="$OPTARG";;
|
|
14 t) DATATYPE="$OPTARG";;
|
|
15 e) EXT="$OPTARG";;
|
|
16 l) LOG_FILE="$OPTARG";;
|
|
17 u) UCSC="$OPTARG";;
|
|
18 v) UCSC_TRACK="$OPTARG";;
|
|
19 o) OUT="$OPTARG";;
|
|
20 a) OUT_ALL="$OPTARG";;
|
|
21 n) NORM="$OPTARG";;
|
|
22 r) ROOT_DIR="$OPTARG";;
|
|
23 p) PROJECTNAME="$OPTARG";;
|
|
24
|
|
25
|
|
26 esac
|
|
27 done
|
|
28
|
|
29 ##### ncPRO-seq annotation - Galaxy #####
|
|
30
|
|
31 GENOME_2=`echo $GENOME | cut -d"_" -f2`
|
|
32
|
|
33 databasePath=$ROOT_DIR/database/files
|
|
34
|
|
35 mkdir -p $databasePath/ncproseqAnnotation
|
|
36 mkdir -p $databasePath/ncproseqAnnotation/annotation
|
|
37 annotationPath=$databasePath/ncproseqAnnotation/annotation
|
|
38 echo $annotationPath
|
|
39 [ ! -d $annotationPath/$GENOME_2 ] && wget http://ncpro.curie.fr/ncproseq/install_dir/annotation/$GENOME.tar.gz -P $annotationPath && cd $annotationPath && tar -zxf $GENOME.tar.gz && rm -rf $GENOME.tar.gz
|
|
40
|
|
41 #########
|
|
42
|
|
43 OUTPUT_PATH_DIR=`dirname $LOG_FILE`
|
|
44 OUTPUT_PATH_NAME=`basename $LOG_FILE .dat`
|
|
45
|
|
46 OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files"
|
|
47
|
|
48 # this was missing
|
|
49 mkdir -p $OUTPUT_PATH
|
|
50
|
|
51 VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3`
|
|
52 VERSION=`echo $VERSION | cut -d"_" -f2`
|
|
53
|
|
54 #DEBUG_MODE
|
|
55
|
|
56 DEBUG_MODE="on"
|
|
57 DEBUG="/dev/null"
|
|
58
|
|
59 if [[ $DEBUG_MODE == "on" ]];then
|
|
60
|
|
61 DEBUG="$OUTPUT_PATH/ncPRO-ANNOTATION.debug"
|
|
62
|
|
63 fi
|
|
64
|
|
65 #Deploy ncPRO directories structure
|
|
66
|
|
67 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG
|
|
68 # READ_GROUP = 1 ! (always)
|
|
69
|
|
70
|
|
71 chmod 777 -R $OUTPUT_PATH
|
|
72 #Go to working directory
|
|
73
|
|
74 cd $OUTPUT_PATH
|
|
75
|
|
76 rm annotation
|
|
77
|
|
78 echo "ln -s $annotationPath annotation"
|
|
79
|
|
80 ln -s $annotationPath annotation
|
|
81
|
|
82 #Create symbolic link to input
|
|
83
|
|
84 ln -s $INPUT ${OUTPUT_PATH}/rawdata/input.bam
|
|
85
|
|
86 #Edit config-ncrna.txt
|
|
87
|
|
88 CONFIG_FILE=config-ncrna.txt
|
|
89
|
|
90 sed -i "s:^BOWTIE_GENOME_REFERENCE =.*$:BOWTIE_GENOME_REFERENCE = $GENOME_2:g" $CONFIG_FILE
|
|
91 sed -i "s:^ORGANISM.*$:ORGANISM = $GENOME_2:g" $CONFIG_FILE
|
|
92
|
|
93 sed -i "s:^N_CPU.*$:N_CPU = 4:g" $CONFIG_FILE #****** Make sure this value matches universe.ini files
|
|
94 sed -i "s:^PROJECT_NAME =.*$:PROJECT_NAME = $PROJECTNAME:g" $CONFIG_FILE
|
|
95
|
|
96
|
|
97 #sed -i "s/LOGFILE = pipeline.log/LOGFILE = $LOG_FILE/g" $CONFIG_FILE
|
|
98
|
|
99 if [[ -f "$annotationPath/$GENOME_2/cluster_pirna.gff" ]]
|
|
100 then
|
|
101 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/cluster_pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff"
|
|
102 else
|
|
103 if [[ -f "$annotationPath/$GENOME_2/pirna.gff" ]]
|
|
104 then
|
|
105 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff"
|
|
106 else
|
|
107 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff"
|
|
108 fi
|
|
109 fi
|
|
110
|
|
111 sed -i "s:^ANNO_CATALOG.*$:ANNO_CATALOG = $ANNO_CATALOG:g" $CONFIG_FILE
|
|
112
|
|
113 ####### Remove information in config-ncrna.txt file ###############
|
|
114
|
|
115 sed -i "s:^MATURE_MIRNA =.*$:MATURE_MIRNA =:g" $CONFIG_FILE
|
|
116 sed -i "s:^PRECURSOR_MIRNA =.*$:PRECURSOR_MIRNA =:g" $CONFIG_FILE
|
|
117 sed -i "s:^TRNA_UCSC =.*$:TRNA_UCSC =:g" $CONFIG_FILE
|
|
118 sed -i "s:^NCRNA_RFAM =.*$:NCRNA_RFAM =:g" $CONFIG_FILE
|
|
119 sed -i "s:^NCRNA_RFAM_EX =.*$:NCRNA_RFAM_EX =:g" $CONFIG_FILE
|
|
120 sed -i "s:^NCRNA_RMSK =.*$:NCRNA_RMSK =:g" $CONFIG_FILE
|
|
121 sed -i "s:^NCRNA_RMSK_EX =.*$:NCRNA_RMSK_EX =:g" $CONFIG_FILE
|
|
122 sed -i "s:^OTHER_NCRNA_GFF =.*$:OTHER_NCRNA_GFF =:g" $CONFIG_FILE
|
|
123
|
|
124 #######################################
|
|
125
|
|
126 if [[ $DATATYPE == "matmir" ]];then
|
|
127
|
|
128 sed -i "s:^MATURE_MIRNA =.*$:MATURE_MIRNA = $EXT:g" $CONFIG_FILE
|
|
129
|
|
130 elif [[ $DATATYPE == "premir" ]];then
|
|
131
|
|
132 sed -i "s:^PRECURSOR_MIRNA =.*$:PRECURSOR_MIRNA = $EXT:g" $CONFIG_FILE
|
|
133
|
|
134 elif [[ $DATATYPE == "trna" ]];then
|
|
135
|
|
136 sed -i "s:^TRNA_UCSC =.*$:TRNA_UCSC = $EXT:g" $CONFIG_FILE
|
|
137
|
|
138 elif [[ $DATATYPE == "rfam" ]];then
|
|
139
|
|
140 sed -i "s:^NCRNA_RFAM_EX =.*$:NCRNA_RFAM_EX = $EXT:g" $CONFIG_FILE
|
|
141
|
|
142 elif [[ $DATATYPE == "rmsk" ]];then
|
|
143
|
|
144 sed -i "s:^NCRNA_RMSK_EX =.*$:NCRNA_RMSK_EX = $EXT:g" $CONFIG_FILE
|
|
145
|
|
146
|
|
147 elif [[ $DATATYPE == "other" ]];then
|
|
148
|
|
149 # get the gff3 file
|
|
150 IFS=',' read -ra gff <<< "$EXT"
|
|
151 echo "${gff[0]}" | sed 's/\//\\\//g' > gff
|
|
152 gff_file=$(head -n 1 gff)
|
|
153
|
|
154 sed -i "s:^OTHER_NCRNA_GFF =.*$:OTHER_NCRNA_GFF = $gff_file:g" $CONFIG_FILE
|
|
155 fi
|
|
156
|
|
157
|
|
158 #Build command line
|
|
159
|
|
160 ## ********************************** NEW for BAM files: check if reads are grouped (or not) + change command line accordingly***************************###
|
|
161
|
|
162 #check if file is already grouped (grouped => RG = 1; not grouped => 0)
|
|
163 RG=`samtools view $INPUT | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'`
|
|
164
|
|
165 if [[ $RG == 0 ]];then # if not grouped
|
|
166 # add -s processBam to do the grouping
|
|
167 echo "Grouping reads..." >> $DEBUG
|
|
168 COMMAND_LINE="-c $CONFIG_FILE -s processBam -s generateNcgff -s ncrnaProcess"
|
|
169
|
|
170 else
|
|
171
|
|
172 # omit [-s processBam] because reads are already grouped + move ready-to-use input.bam to /bowtie_results
|
|
173 echo "Reads already grouped..." >> $DEBUG
|
|
174 ln -s $INPUT ${OUTPUT_PATH}/bowtie_results/input.bam
|
|
175 COMMAND_LINE="-c $CONFIG_FILE -s generateNcgff -s ncrnaProcess"
|
|
176
|
|
177 fi
|
|
178
|
|
179 #finally, add track option if demanded
|
|
180 if [[ $UCSC == "True" ]];then
|
|
181 COMMAND_LINE="$COMMAND_LINE -s ncrnaTracks"
|
|
182 fi
|
|
183
|
|
184 # **************** END NEW ************************************************************************************************************************************************
|
|
185
|
|
186 #Launch ncPRO analysis
|
|
187 echo $COMMAND_LINE >> $DEBUG
|
|
188
|
|
189 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE >> $DEBUG
|
|
190
|
|
191 ##***TEST
|
|
192
|
|
193 RG=`samtools view ${OUTPUT_PATH}/bowtie_results/input.bam | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'`
|
|
194 echo " RG after pre-processing = $RG" >> $DEBUG
|
|
195 #**** TEST
|
|
196
|
|
197
|
|
198 #Galaxy output handling
|
|
199
|
|
200 mv ${OUTPUT_PATH}/pipeline.log $LOG_FILE
|
|
201
|
|
202 if [[ $NORM == "True" ]];then
|
|
203 if [[ $DATATYPE == "matmir" ]];then
|
|
204 if [[ ! -z "$OUT_ALL" ]];then
|
|
205 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_RPM_all_miRNA.data $OUT_ALL
|
|
206 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT
|
|
207 else
|
|
208 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT
|
|
209 fi
|
|
210 elif [[ $DATATYPE == "premir" ]];then
|
|
211 if [[ ! -z "$OUT_ALL" ]];then
|
|
212 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_RPM_all_miRNA.data $OUT_ALL
|
|
213 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT
|
|
214 else
|
|
215 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT
|
|
216 fi
|
|
217 elif [[ $DATATYPE == "trna" ]];then
|
|
218 mv $OUTPUT_PATH/doc/tRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT
|
|
219 elif [[ $DATATYPE == "rfam" ]];then
|
|
220 mv $OUTPUT_PATH/doc/rfam_${EXT}_all_samples_subfamcov_RPM.data $OUT
|
|
221 elif [[ $DATATYPE == "rmsk" ]];then
|
|
222 mv $OUTPUT_PATH/doc/rmsk_${EXT}_all_samples_subfamcov_RPM.data $OUT
|
|
223 fi
|
|
224 else
|
|
225 if [[ $DATATYPE == "matmir" ]];then
|
|
226 if [[ ! -z "$OUT_ALL" ]];then
|
|
227 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_all_miRNA.data $OUT_ALL
|
|
228 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov.data $OUT
|
|
229 else
|
|
230 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov.data $OUT
|
|
231 fi
|
|
232 elif [[ $DATATYPE == "premir" ]];then
|
|
233 if [[ ! -z "$OUT_ALL" ]];then
|
|
234 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_all_miRNA.data $OUT_ALL
|
|
235 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov.data $OUT
|
|
236 else
|
|
237 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov.data $OUT
|
|
238 fi
|
|
239 elif [[ $DATATYPE == "trna" ]];then
|
|
240 mv $OUTPUT_PATH/doc/tRNA_${EXT}_all_samples_subfamcov.data $OUT
|
|
241 elif [[ $DATATYPE == "rfam" ]];then
|
|
242 mv $OUTPUT_PATH/doc/rfam_${EXT}_all_samples_subfamcov.data $OUT
|
|
243 elif [[ $DATATYPE == "rmsk" ]];then
|
|
244 mv $OUTPUT_PATH/doc/rmsk_${EXT}_all_samples_subfamcov.data $OUT
|
|
245 fi
|
|
246 fi
|
|
247
|
|
248 if [[ $UCSC == "True" ]];then
|
|
249
|
|
250 #**** FOR NEBULA ONLY ******
|
|
251
|
|
252 gunzip $OUTPUT_PATH/ucsc/input_*_sens.bedGraph.gz
|
|
253 mv $OUTPUT_PATH/ucsc/input_*_sens.bedGraph $UCSC_TRACK
|
|
254
|
|
255 fi
|
|
256 # ***** END FOR NEBULA ONLY *****
|
|
257
|
|
258 rm -rf $OUTPUT_PATH
|
|
259
|