41
|
1 #!/bin/sh -e
|
|
2 #
|
|
3 # Prototype script to setup a conda environment with the
|
|
4 # dependencies needed for the Amplicon_analysis_pipeline
|
|
5 # script
|
|
6 #
|
|
7 # Handle command line
|
|
8 usage()
|
|
9 {
|
|
10 echo "Usage: $(basename $0) [DIR]"
|
|
11 echo ""
|
|
12 echo "Installs the Amplicon_analysis_pipeline package plus"
|
|
13 echo "dependencies in directory DIR (or current directory "
|
|
14 echo "if DIR not supplied)"
|
|
15 }
|
|
16 if [ ! -z "$1" ] ; then
|
|
17 # Check if help was requested
|
|
18 case "$1" in
|
|
19 --help|-h)
|
|
20 usage
|
|
21 exit 0
|
|
22 ;;
|
|
23 esac
|
|
24 # Assume it's the installation directory
|
|
25 cd $1
|
|
26 fi
|
|
27 # Versions
|
|
28 PIPELINE_VERSION=1.2.3
|
|
29 RDP_CLASSIFIER_VERSION=2.2
|
|
30 # Directories
|
|
31 TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}
|
|
32 BIN_DIR=${TOP_DIR}/bin
|
|
33 CONDA_DIR=${TOP_DIR}/conda
|
|
34 CONDA_BIN=${CONDA_DIR}/bin
|
|
35 CONDA_LIB=${CONDA_DIR}/lib
|
|
36 CONDA=${CONDA_BIN}/conda
|
|
37 ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"
|
|
38 ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME
|
|
39 #
|
|
40 # Functions
|
|
41 #
|
|
42 # Report failure and terminate script
|
|
43 fail()
|
|
44 {
|
|
45 echo ""
|
|
46 echo ERROR $@ >&2
|
|
47 echo ""
|
|
48 echo "$(basename $0): installation failed"
|
|
49 exit 1
|
|
50 }
|
|
51 #
|
|
52 # Rewrite the shebangs in the installed conda scripts
|
|
53 # to remove the full path to conda 'bin' directory
|
|
54 rewrite_conda_shebangs()
|
|
55 {
|
|
56 pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"
|
|
57 find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \;
|
|
58 }
|
|
59 #
|
|
60 # Install conda
|
|
61 install_conda()
|
|
62 {
|
|
63 echo "++++++++++++++++"
|
|
64 echo "Installing conda"
|
|
65 echo "++++++++++++++++"
|
|
66 if [ -e ${CONDA_DIR} ] ; then
|
|
67 echo "*** $CONDA_DIR already exists ***" >&2
|
|
68 return
|
|
69 fi
|
|
70 local cwd=$(pwd)
|
|
71 local wd=$(mktemp -d)
|
|
72 cd $wd
|
|
73 wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
|
|
74 bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}
|
|
75 echo Installed conda in ${CONDA_DIR}
|
|
76 # Update the installation files
|
|
77 # This is to avoid problems when the length the installation
|
|
78 # directory path exceeds the limit for the shebang statement
|
|
79 # in the conda files
|
|
80 echo ""
|
|
81 echo -n "Rewriting conda shebangs..."
|
|
82 rewrite_conda_shebangs
|
|
83 echo "ok"
|
|
84 echo -n "Adding conda bin to PATH..."
|
|
85 PATH=${CONDA_BIN}:$PATH
|
|
86 echo "ok"
|
|
87 cd $cwd
|
|
88 rm -rf $wd/*
|
|
89 rmdir $wd
|
|
90 }
|
|
91 #
|
|
92 # Create conda environment
|
|
93 install_conda_packages()
|
|
94 {
|
|
95 echo "+++++++++++++++++++++++++"
|
|
96 echo "Installing conda packages"
|
|
97 echo "+++++++++++++++++++++++++"
|
|
98 local cwd=$(pwd)
|
|
99 local wd=$(mktemp -d)
|
|
100 cd $wd
|
|
101 cat >environment.yml <<EOF
|
|
102 name: ${ENV_NAME}
|
|
103 channels:
|
|
104 - defaults
|
|
105 - conda-forge
|
|
106 - bioconda
|
|
107 dependencies:
|
|
108 - python=2.7
|
|
109 - cutadapt=1.11
|
|
110 - sickle-trim=1.33
|
|
111 - bioawk=1.0
|
|
112 - pandaseq=2.8.1
|
|
113 - spades=3.5.0
|
|
114 - fastqc=0.11.3
|
|
115 - qiime=1.8.0
|
|
116 - blast-legacy=2.2.26
|
|
117 - fasta-splitter=0.2.4
|
|
118 - rdp_classifier=$RDP_CLASSIFIER_VERSION
|
|
119 - vsearch=1.1.3
|
|
120 # Need to explicitly specify libgfortran
|
|
121 # version (otherwise get version incompatible
|
|
122 # with numpy=1.7.1)
|
|
123 - libgfortran=1.0
|
|
124 # Compilers needed to build R
|
|
125 - gcc_linux-64
|
|
126 - gxx_linux-64
|
|
127 - gfortran_linux-64
|
|
128 EOF
|
|
129 ${CONDA} env create --name "${ENV_NAME}" -f environment.yml
|
|
130 echo Created conda environment in ${ENV_DIR}
|
|
131 cd $cwd
|
|
132 rm -rf $wd/*
|
|
133 rmdir $wd
|
|
134 }
|
|
135 #
|
|
136 # Install all the non-conda dependencies in a single
|
|
137 # function (invokes separate functions for each package)
|
|
138 install_non_conda_packages()
|
|
139 {
|
|
140 echo "+++++++++++++++++++++++++++++"
|
|
141 echo "Installing non-conda packages"
|
|
142 echo "+++++++++++++++++++++++++++++"
|
|
143 # Temporary working directory
|
|
144 local wd=$(mktemp -d)
|
|
145 local cwd=$(pwd)
|
|
146 local wd=$(mktemp -d)
|
|
147 cd $wd
|
|
148 # Amplicon analysis pipeline
|
|
149 echo -n "Installing Amplicon_analysis_pipeline..."
|
|
150 if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then
|
|
151 echo "already installed"
|
|
152 else
|
|
153 install_amplicon_analysis_pipeline
|
|
154 echo "ok"
|
|
155 fi
|
|
156 # ChimeraSlayer
|
|
157 echo -n "Installing ChimeraSlayer..."
|
|
158 if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then
|
|
159 echo "already installed"
|
|
160 else
|
|
161 install_chimeraslayer
|
|
162 echo "ok"
|
|
163 fi
|
|
164 # Uclust
|
|
165 echo -n "Installing uclust for QIIME/pyNAST..."
|
|
166 if [ -e ${BIN_DIR}/uclust ] ; then
|
|
167 echo "already installed"
|
|
168 else
|
|
169 install_uclust
|
|
170 echo "ok"
|
|
171 fi
|
|
172 # R 3.2.1"
|
|
173 echo -n "Checking for R 3.2.1..."
|
|
174 if [ -e ${BIN_DIR}/R ] ; then
|
|
175 echo "R already installed"
|
|
176 else
|
|
177 echo "not found"
|
|
178 install_R_3_2_1
|
|
179 fi
|
|
180 }
|
|
181 #
|
|
182 # Amplicon analyis pipeline
|
|
183 install_amplicon_analysis_pipeline()
|
|
184 {
|
|
185 local wd=$(mktemp -d)
|
|
186 local cwd=$(pwd)
|
|
187 local wd=$(mktemp -d)
|
|
188 cd $wd
|
|
189 wget -q https://github.com/MTutino/Amplicon_analysis/archive/v${PIPELINE_VERSION}.tar.gz
|
|
190 tar zxf v${PIPELINE_VERSION}.tar.gz
|
|
191 cd Amplicon_analysis-${PIPELINE_VERSION}
|
|
192 INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION}
|
|
193 mkdir -p $INSTALL_DIR
|
|
194 ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline
|
|
195 for f in *.sh ; do
|
|
196 /bin/cp $f $INSTALL_DIR
|
|
197 done
|
|
198 /bin/cp -r uc2otutab $INSTALL_DIR
|
|
199 mkdir -p ${BIN_DIR}
|
|
200 cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF
|
|
201 #!/usr/bin/env bash
|
|
202 #
|
|
203 # Point to Qiime config
|
|
204 export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config
|
|
205 # Set up the RDP jar file
|
|
206 export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
|
|
207 # Put the scripts onto the PATH
|
|
208 export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH
|
|
209 # Activate the conda environment
|
|
210 export PATH=${CONDA_BIN}:\$PATH
|
|
211 source ${CONDA_BIN}/activate ${ENV_NAME}
|
|
212 # Execute the driver script with the supplied arguments
|
|
213 $INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@
|
|
214 exit \$?
|
|
215 EOF
|
|
216 chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh
|
|
217 cat >${BIN_DIR}/install_reference_data.sh <<EOF
|
|
218 #!/usr/bin/env bash -e
|
|
219 #
|
|
220 function usage() {
|
|
221 echo "Usage: \$(basename \$0) DIR"
|
|
222 }
|
|
223 if [ -z "\$1" ] ; then
|
|
224 usage
|
|
225 exit 0
|
|
226 elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then
|
|
227 usage
|
|
228 echo ""
|
|
229 echo "Install reference data into DIR"
|
|
230 exit 0
|
|
231 fi
|
|
232 echo "=========================================="
|
|
233 echo "Installing Amplicon analysis pipeline data"
|
|
234 echo "=========================================="
|
|
235 if [ ! -e "\$1" ] ; then
|
|
236 echo "Making directory \$1"
|
|
237 mkdir -p \$1
|
|
238 fi
|
|
239 cd \$1
|
|
240 DATA_DIR=\$(pwd)
|
|
241 echo "Installing reference data under \$DATA_DIR"
|
|
242 $INSTALL_DIR/References.sh
|
|
243 echo ""
|
|
244 echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh"
|
|
245 echo "to use the reference data from this directory"
|
|
246 echo ""
|
|
247 echo "\$(basename \$0): finished"
|
|
248 EOF
|
|
249 chmod 0755 ${BIN_DIR}/install_reference_data.sh
|
|
250 cd $cwd
|
|
251 rm -rf $wd/*
|
|
252 rmdir $wd
|
|
253 }
|
|
254 #
|
|
255 # ChimeraSlayer
|
|
256 install_chimeraslayer()
|
|
257 {
|
|
258 local cwd=$(pwd)
|
|
259 local wd=$(mktemp -d)
|
|
260 cd $wd
|
|
261 wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz
|
|
262 tar zxf microbiomeutil_2010-04-29.tar.gz
|
|
263 cd microbiomeutil_2010-04-29
|
|
264 INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29
|
|
265 mkdir -p $INSTALL_DIR
|
|
266 ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer
|
|
267 /bin/cp -r ChimeraSlayer $INSTALL_DIR
|
|
268 cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF
|
|
269 #!/usr/bin/env bash
|
|
270 export PATH=$INSTALL_DIR:\$PATH
|
|
271 $INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@
|
|
272 EOF
|
|
273 chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl
|
|
274 chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl
|
|
275 cd $cwd
|
|
276 rm -rf $wd/*
|
|
277 rmdir $wd
|
|
278 }
|
|
279 #
|
|
280 # uclust required for QIIME/pyNAST
|
|
281 # License only allows this version to be used with those two packages
|
|
282 # See: http://drive5.com/uclust/downloads1_2_22q.html
|
|
283 install_uclust()
|
|
284 {
|
|
285 local wd=$(mktemp -d)
|
|
286 local cwd=$(pwd)
|
|
287 local wd=$(mktemp -d)
|
|
288 cd $wd
|
|
289 wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64
|
|
290 INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22
|
|
291 mkdir -p $INSTALL_DIR
|
|
292 ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust
|
|
293 /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust
|
|
294 chmod 0755 ${INSTALL_DIR}/uclust
|
|
295 ln -s ${INSTALL_DIR}/uclust ${BIN_DIR}
|
|
296 cd $cwd
|
|
297 rm -rf $wd/*
|
|
298 rmdir $wd
|
|
299 }
|
|
300 #
|
|
301 # R 3.2.1
|
|
302 # Can't use version from conda due to dependency conflicts
|
|
303 install_R_3_2_1()
|
|
304 {
|
|
305 . ${CONDA_BIN}/activate ${ENV_NAME}
|
|
306 local cwd=$(pwd)
|
|
307 local wd=$(mktemp -d)
|
|
308 cd $wd
|
|
309 echo -n "Fetching R 3.2.1 source code..."
|
|
310 wget -q http://cran.r-project.org/src/base/R-3/R-3.2.1.tar.gz
|
|
311 echo "ok"
|
|
312 INSTALL_DIR=${TOP_DIR}
|
|
313 mkdir -p $INSTALL_DIR
|
|
314 echo -n "Unpacking source code..."
|
|
315 tar xzf R-3.2.1.tar.gz >INSTALL.log 2>&1
|
|
316 echo "ok"
|
|
317 cd R-3.2.1
|
|
318 echo -n "Running configure..."
|
|
319 ./configure --prefix=$INSTALL_DIR --with-x=no --with-readline=no >>INSTALL.log 2>&1
|
|
320 echo "ok"
|
|
321 echo -n "Running make..."
|
|
322 make >>INSTALL.log 2>&1
|
|
323 echo "ok"
|
|
324 echo -n "Running make install..."
|
|
325 make install >>INSTALL.log 2>&1
|
|
326 echo "ok"
|
|
327 cd $cwd
|
|
328 rm -rf $wd/*
|
|
329 rmdir $wd
|
|
330 . ${CONDA_BIN}/deactivate
|
|
331 }
|
|
332 setup_pipeline_environment()
|
|
333 {
|
|
334 echo "+++++++++++++++++++++++++++++++"
|
|
335 echo "Setting up pipeline environment"
|
|
336 echo "+++++++++++++++++++++++++++++++"
|
|
337 # vsearch113
|
|
338 echo -n "Setting up vsearch113..."
|
|
339 if [ -e ${BIN_DIR}/vsearch113 ] ; then
|
|
340 echo "already exists"
|
|
341 elif [ ! -e ${ENV_DIR}/bin/vsearch ] ; then
|
|
342 echo "failed"
|
|
343 fail "vsearch not found"
|
|
344 else
|
|
345 ln -s ${ENV_DIR}/bin/vsearch ${BIN_DIR}/vsearch113
|
|
346 echo "ok"
|
|
347 fi
|
|
348 # fasta_splitter.pl
|
|
349 echo -n "Setting up fasta_splitter.pl..."
|
|
350 if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then
|
|
351 echo "already exists"
|
|
352 elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then
|
|
353 echo "failed"
|
|
354 fail "fasta-splitter.pl not found"
|
|
355 else
|
|
356 ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl
|
|
357 echo "ok"
|
|
358 fi
|
|
359 # rdp_classifier.jar
|
|
360 local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
|
|
361 echo -n "Setting up rdp_classifier.jar..."
|
|
362 if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then
|
|
363 echo "already exists"
|
|
364 elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then
|
|
365 echo "failed"
|
|
366 fail "rdp_classifier.jar not found"
|
|
367 else
|
|
368 mkdir -p ${TOP_DIR}/share/rdp_classifier
|
|
369 ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}
|
|
370 echo "ok"
|
|
371 fi
|
|
372 # qiime_config
|
|
373 echo -n "Setting up qiime_config..."
|
|
374 if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then
|
|
375 echo "already exists"
|
|
376 else
|
|
377 mkdir -p ${TOP_DIR}/qiime
|
|
378 cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config
|
|
379 qiime_scripts_dir ${ENV_DIR}/bin
|
|
380 EOF-qiime-config
|
|
381 echo "ok"
|
|
382 fi
|
|
383 }
|
|
384 #
|
|
385 # Remove the compilers from the conda environment
|
|
386 # Not sure if this step is necessary
|
|
387 remove_conda_compilers()
|
|
388 {
|
|
389 echo "+++++++++++++++++++++++++++++++++++++++++"
|
|
390 echo "Removing compilers from conda environment"
|
|
391 echo "+++++++++++++++++++++++++++++++++++++++++"
|
|
392 ${CONDA} remove -y -n ${ENV_NAME} gcc_linux-64 gxx_linux-64 gfortran_linux-64
|
|
393 }
|
|
394 #
|
|
395 # Top level script does the installation
|
|
396 echo "======================================="
|
|
397 echo "Amplicon_analysis_pipeline installation"
|
|
398 echo "======================================="
|
|
399 echo "Installing into ${TOP_DIR}"
|
|
400 if [ -e ${TOP_DIR} ] ; then
|
|
401 fail "Directory already exists"
|
|
402 fi
|
|
403 mkdir -p ${TOP_DIR}
|
|
404 install_conda
|
|
405 install_conda_packages
|
|
406 install_non_conda_packages
|
|
407 setup_pipeline_environment
|
|
408 remove_conda_compilers
|
|
409 echo "===================================="
|
|
410 echo "Amplicon_analysis_pipeline installed"
|
|
411 echo "===================================="
|
|
412 echo ""
|
|
413 echo "Install reference data using:"
|
|
414 echo ""
|
|
415 echo "\$ ${BIN_DIR}/install_reference_data.sh DIR"
|
|
416 echo ""
|
|
417 echo "Run pipeline scripts using:"
|
|
418 echo ""
|
|
419 echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."
|
|
420 echo ""
|
|
421 echo "(or add ${BIN_DIR} to your PATH)"
|
|
422 echo ""
|
|
423 echo "$(basename $0): finished"
|
|
424 ##
|
|
425 #
|