Mercurial > repos > jbrayet > ncproseq_1_6_5_docker
comparison ncPRO-ANNOTATION.sh @ 0:50c2566d6b4c draft
Uploaded
author | jbrayet |
---|---|
date | Thu, 28 Jan 2016 07:43:11 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:50c2566d6b4c |
---|---|
1 #!/bin/bash | |
2 | |
3 # Copyleft ↄ⃝ 2012 Institut Curie | |
4 # Author(s): Jocelyn Brayet, Laurene Syx, Chongjian Chen, Nicolas Servant(Institut Curie) 2012 - 2015 | |
5 # Contact: bioinfo.ncproseq@curie.fr | |
6 # This software is distributed without any guarantee under the terms of the GNU General | |
7 # Public License, either Version 2, June 1991 or Version 3, June 2007. | |
8 | |
9 while getopts "i:g:t:e:l:u:v:o:a::n:r:p:" optionName; do | |
10 case "$optionName" in | |
11 | |
12 i) INPUT="$OPTARG";; | |
13 g) GENOME="$OPTARG";; | |
14 t) DATATYPE="$OPTARG";; | |
15 e) EXT="$OPTARG";; | |
16 l) LOG_FILE="$OPTARG";; | |
17 u) UCSC="$OPTARG";; | |
18 v) UCSC_TRACK="$OPTARG";; | |
19 o) OUT="$OPTARG";; | |
20 a) OUT_ALL="$OPTARG";; | |
21 n) NORM="$OPTARG";; | |
22 r) ROOT_DIR="$OPTARG";; | |
23 p) PROJECTNAME="$OPTARG";; | |
24 | |
25 | |
26 esac | |
27 done | |
28 | |
29 ##### ncPRO-seq annotation - Galaxy ##### | |
30 | |
31 GENOME_2=`echo $GENOME | cut -d"_" -f2` | |
32 | |
33 databasePath=$ROOT_DIR/database/files | |
34 | |
35 mkdir -p $databasePath/ncproseqAnnotation | |
36 mkdir -p $databasePath/ncproseqAnnotation/annotation | |
37 annotationPath=$databasePath/ncproseqAnnotation/annotation | |
38 echo $annotationPath | |
39 [ ! -d $annotationPath/$GENOME_2 ] && wget http://ncpro.curie.fr/ncproseq/install_dir/annotation/$GENOME.tar.gz -P $annotationPath && cd $annotationPath && tar -zxf $GENOME.tar.gz && rm -rf $GENOME.tar.gz | |
40 | |
41 ######### | |
42 | |
43 OUTPUT_PATH_DIR=`dirname $LOG_FILE` | |
44 OUTPUT_PATH_NAME=`basename $LOG_FILE .dat` | |
45 | |
46 OUTPUT_PATH="${OUTPUT_PATH_DIR}/${OUTPUT_PATH_NAME}_files" | |
47 | |
48 # this was missing | |
49 mkdir -p $OUTPUT_PATH | |
50 | |
51 VERSION=`echo $OUTPUT_PATH | cut -d"/" -f3` | |
52 VERSION=`echo $VERSION | cut -d"_" -f2` | |
53 | |
54 #DEBUG_MODE | |
55 | |
56 DEBUG_MODE="on" | |
57 DEBUG="/dev/null" | |
58 | |
59 if [[ $DEBUG_MODE == "on" ]];then | |
60 | |
61 DEBUG="$OUTPUT_PATH/ncPRO-ANNOTATION.debug" | |
62 | |
63 fi | |
64 | |
65 #Deploy ncPRO directories structure | |
66 | |
67 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-deploy -o $OUTPUT_PATH > $DEBUG | |
68 # READ_GROUP = 1 ! (always) | |
69 | |
70 | |
71 chmod 777 -R $OUTPUT_PATH | |
72 #Go to working directory | |
73 | |
74 cd $OUTPUT_PATH | |
75 | |
76 rm annotation | |
77 | |
78 echo "ln -s $annotationPath annotation" | |
79 | |
80 ln -s $annotationPath annotation | |
81 | |
82 #Create symbolic link to input | |
83 | |
84 ln -s $INPUT ${OUTPUT_PATH}/rawdata/input.bam | |
85 | |
86 #Edit config-ncrna.txt | |
87 | |
88 CONFIG_FILE=config-ncrna.txt | |
89 | |
90 sed -i "s:^BOWTIE_GENOME_REFERENCE =.*$:BOWTIE_GENOME_REFERENCE = $GENOME_2:g" $CONFIG_FILE | |
91 sed -i "s:^ORGANISM.*$:ORGANISM = $GENOME_2:g" $CONFIG_FILE | |
92 | |
93 sed -i "s:^N_CPU.*$:N_CPU = 4:g" $CONFIG_FILE #****** Make sure this value matches universe.ini files | |
94 sed -i "s:^PROJECT_NAME =.*$:PROJECT_NAME = $PROJECTNAME:g" $CONFIG_FILE | |
95 | |
96 | |
97 #sed -i "s/LOGFILE = pipeline.log/LOGFILE = $LOG_FILE/g" $CONFIG_FILE | |
98 | |
99 if [[ -f "$annotationPath/$GENOME_2/cluster_pirna.gff" ]] | |
100 then | |
101 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/cluster_pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff" | |
102 else | |
103 if [[ -f "$annotationPath/$GENOME_2/pirna.gff" ]] | |
104 then | |
105 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/pirna.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff" | |
106 else | |
107 ANNO_CATALOG="$annotationPath/$GENOME_2/precursor_miRNA.gff $annotationPath/$GENOME_2/rfam.gff $annotationPath/$GENOME_2/rmsk.gff $annotationPath/$GENOME_2/coding_gene.gff" | |
108 fi | |
109 fi | |
110 | |
111 sed -i "s:^ANNO_CATALOG.*$:ANNO_CATALOG = $ANNO_CATALOG:g" $CONFIG_FILE | |
112 | |
113 ####### Remove information in config-ncrna.txt file ############### | |
114 | |
115 sed -i "s:^MATURE_MIRNA =.*$:MATURE_MIRNA =:g" $CONFIG_FILE | |
116 sed -i "s:^PRECURSOR_MIRNA =.*$:PRECURSOR_MIRNA =:g" $CONFIG_FILE | |
117 sed -i "s:^TRNA_UCSC =.*$:TRNA_UCSC =:g" $CONFIG_FILE | |
118 sed -i "s:^NCRNA_RFAM =.*$:NCRNA_RFAM =:g" $CONFIG_FILE | |
119 sed -i "s:^NCRNA_RFAM_EX =.*$:NCRNA_RFAM_EX =:g" $CONFIG_FILE | |
120 sed -i "s:^NCRNA_RMSK =.*$:NCRNA_RMSK =:g" $CONFIG_FILE | |
121 sed -i "s:^NCRNA_RMSK_EX =.*$:NCRNA_RMSK_EX =:g" $CONFIG_FILE | |
122 sed -i "s:^OTHER_NCRNA_GFF =.*$:OTHER_NCRNA_GFF =:g" $CONFIG_FILE | |
123 | |
124 ####################################### | |
125 | |
126 if [[ $DATATYPE == "matmir" ]];then | |
127 | |
128 sed -i "s:^MATURE_MIRNA =.*$:MATURE_MIRNA = $EXT:g" $CONFIG_FILE | |
129 | |
130 elif [[ $DATATYPE == "premir" ]];then | |
131 | |
132 sed -i "s:^PRECURSOR_MIRNA =.*$:PRECURSOR_MIRNA = $EXT:g" $CONFIG_FILE | |
133 | |
134 elif [[ $DATATYPE == "trna" ]];then | |
135 | |
136 sed -i "s:^TRNA_UCSC =.*$:TRNA_UCSC = $EXT:g" $CONFIG_FILE | |
137 | |
138 elif [[ $DATATYPE == "rfam" ]];then | |
139 | |
140 sed -i "s:^NCRNA_RFAM_EX =.*$:NCRNA_RFAM_EX = $EXT:g" $CONFIG_FILE | |
141 | |
142 elif [[ $DATATYPE == "rmsk" ]];then | |
143 | |
144 sed -i "s:^NCRNA_RMSK_EX =.*$:NCRNA_RMSK_EX = $EXT:g" $CONFIG_FILE | |
145 | |
146 | |
147 elif [[ $DATATYPE == "other" ]];then | |
148 | |
149 # get the gff3 file | |
150 IFS=',' read -ra gff <<< "$EXT" | |
151 echo "${gff[0]}" | sed 's/\//\\\//g' > gff | |
152 gff_file=$(head -n 1 gff) | |
153 | |
154 sed -i "s:^OTHER_NCRNA_GFF =.*$:OTHER_NCRNA_GFF = $gff_file:g" $CONFIG_FILE | |
155 fi | |
156 | |
157 | |
158 #Build command line | |
159 | |
160 ## ********************************** NEW for BAM files: check if reads are grouped (or not) + change command line accordingly***************************### | |
161 | |
162 #check if file is already grouped (grouped => RG = 1; not grouped => 0) | |
163 RG=`samtools view $INPUT | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'` | |
164 | |
165 if [[ $RG == 0 ]];then # if not grouped | |
166 # add -s processBam to do the grouping | |
167 echo "Grouping reads..." >> $DEBUG | |
168 COMMAND_LINE="-c $CONFIG_FILE -s processBam -s generateNcgff -s ncrnaProcess" | |
169 | |
170 else | |
171 | |
172 # omit [-s processBam] because reads are already grouped + move ready-to-use input.bam to /bowtie_results | |
173 echo "Reads already grouped..." >> $DEBUG | |
174 ln -s $INPUT ${OUTPUT_PATH}/bowtie_results/input.bam | |
175 COMMAND_LINE="-c $CONFIG_FILE -s generateNcgff -s ncrnaProcess" | |
176 | |
177 fi | |
178 | |
179 #finally, add track option if demanded | |
180 if [[ $UCSC == "True" ]];then | |
181 COMMAND_LINE="$COMMAND_LINE -s ncrnaTracks" | |
182 fi | |
183 | |
184 # **************** END NEW ************************************************************************************************************************************************ | |
185 | |
186 #Launch ncPRO analysis | |
187 echo $COMMAND_LINE >> $DEBUG | |
188 | |
189 /usr/curie_ngs/ncproseq_v1.6.5/bin/ncPRO-seq $COMMAND_LINE >> $DEBUG | |
190 | |
191 ##***TEST | |
192 | |
193 RG=`samtools view ${OUTPUT_PATH}/bowtie_results/input.bam | awk --posix 'BEGIN {RG=1} { if ($1 !~ /^[0-9]{1,}_[0-9]{1,}$/) {RG=0 ; exit} } END { print RG}'` | |
194 echo " RG after pre-processing = $RG" >> $DEBUG | |
195 #**** TEST | |
196 | |
197 | |
198 #Galaxy output handling | |
199 | |
200 mv ${OUTPUT_PATH}/pipeline.log $LOG_FILE | |
201 | |
202 if [[ $NORM == "True" ]];then | |
203 if [[ $DATATYPE == "matmir" ]];then | |
204 if [[ ! -z "$OUT_ALL" ]];then | |
205 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_RPM_all_miRNA.data $OUT_ALL | |
206 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT | |
207 else | |
208 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT | |
209 fi | |
210 elif [[ $DATATYPE == "premir" ]];then | |
211 if [[ ! -z "$OUT_ALL" ]];then | |
212 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_RPM_all_miRNA.data $OUT_ALL | |
213 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT | |
214 else | |
215 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT | |
216 fi | |
217 elif [[ $DATATYPE == "trna" ]];then | |
218 mv $OUTPUT_PATH/doc/tRNA_${EXT}_all_samples_subfamcov_RPM.data $OUT | |
219 elif [[ $DATATYPE == "rfam" ]];then | |
220 mv $OUTPUT_PATH/doc/rfam_${EXT}_all_samples_subfamcov_RPM.data $OUT | |
221 elif [[ $DATATYPE == "rmsk" ]];then | |
222 mv $OUTPUT_PATH/doc/rmsk_${EXT}_all_samples_subfamcov_RPM.data $OUT | |
223 fi | |
224 else | |
225 if [[ $DATATYPE == "matmir" ]];then | |
226 if [[ ! -z "$OUT_ALL" ]];then | |
227 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov_all_miRNA.data $OUT_ALL | |
228 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov.data $OUT | |
229 else | |
230 mv $OUTPUT_PATH/doc/mature_miRNA_${EXT}_all_samples_subfamcov.data $OUT | |
231 fi | |
232 elif [[ $DATATYPE == "premir" ]];then | |
233 if [[ ! -z "$OUT_ALL" ]];then | |
234 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov_all_miRNA.data $OUT_ALL | |
235 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov.data $OUT | |
236 else | |
237 mv $OUTPUT_PATH/doc/precursor_miRNA_${EXT}_all_samples_subfamcov.data $OUT | |
238 fi | |
239 elif [[ $DATATYPE == "trna" ]];then | |
240 mv $OUTPUT_PATH/doc/tRNA_${EXT}_all_samples_subfamcov.data $OUT | |
241 elif [[ $DATATYPE == "rfam" ]];then | |
242 mv $OUTPUT_PATH/doc/rfam_${EXT}_all_samples_subfamcov.data $OUT | |
243 elif [[ $DATATYPE == "rmsk" ]];then | |
244 mv $OUTPUT_PATH/doc/rmsk_${EXT}_all_samples_subfamcov.data $OUT | |
245 fi | |
246 fi | |
247 | |
248 if [[ $UCSC == "True" ]];then | |
249 | |
250 #**** FOR NEBULA ONLY ****** | |
251 | |
252 gunzip $OUTPUT_PATH/ucsc/input_*_sens.bedGraph.gz | |
253 mv $OUTPUT_PATH/ucsc/input_*_sens.bedGraph $UCSC_TRACK | |
254 | |
255 fi | |
256 # ***** END FOR NEBULA ONLY ***** | |
257 | |
258 rm -rf $OUTPUT_PATH | |
259 |