41
|
1 #!/bin/sh -e
|
|
2 #
|
|
3 # Prototype script to setup a conda environment with the
|
|
4 # dependencies needed for the Amplicon_analysis_pipeline
|
|
5 # script
|
|
6 #
|
|
7 # Handle command line
|
|
8 usage()
|
|
9 {
|
|
10 echo "Usage: $(basename $0) [DIR]"
|
|
11 echo ""
|
|
12 echo "Installs the Amplicon_analysis_pipeline package plus"
|
|
13 echo "dependencies in directory DIR (or current directory "
|
|
14 echo "if DIR not supplied)"
|
|
15 }
|
|
16 if [ ! -z "$1" ] ; then
|
|
17 # Check if help was requested
|
|
18 case "$1" in
|
|
19 --help|-h)
|
|
20 usage
|
|
21 exit 0
|
|
22 ;;
|
|
23 esac
|
|
24 # Assume it's the installation directory
|
|
25 cd $1
|
|
26 fi
|
|
27 # Versions
|
|
28 PIPELINE_VERSION=1.3.5
|
|
29 CONDA_REQUIRED_VERSION=4.6.14
|
|
30 RDP_CLASSIFIER_VERSION=2.2
|
|
31 # Directories
|
|
32 TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}
|
|
33 BIN_DIR=${TOP_DIR}/bin
|
|
34 CONDA_DIR=${TOP_DIR}/conda
|
|
35 CONDA_BIN=${CONDA_DIR}/bin
|
|
36 CONDA_LIB=${CONDA_DIR}/lib
|
|
37 CONDA=${CONDA_BIN}/conda
|
|
38 ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"
|
|
39 ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME
|
|
40 #
|
|
41 # Functions
|
|
42 #
|
|
43 # Report failure and terminate script
|
|
44 fail()
|
|
45 {
|
|
46 echo ""
|
|
47 echo ERROR $@ >&2
|
|
48 echo ""
|
|
49 echo "$(basename $0): installation failed"
|
|
50 exit 1
|
|
51 }
|
|
52 #
|
|
53 # Rewrite the shebangs in the installed conda scripts
|
|
54 # to remove the full path to conda 'bin' directory
|
|
55 rewrite_conda_shebangs()
|
|
56 {
|
|
57 pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"
|
|
58 find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \;
|
|
59 }
|
|
60 #
|
|
61 # Reset conda version if required
|
|
62 reset_conda_version()
|
|
63 {
|
|
64 CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d' ' -f2)"
|
|
65 echo conda version: ${CONDA_VERSION}
|
|
66 if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then
|
|
67 echo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION"
|
|
68 ${CONDA_BIN}/conda config --set allow_conda_downgrades true
|
|
69 ${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION}
|
|
70 else
|
|
71 echo "conda version ok"
|
|
72 fi
|
|
73 }
|
|
74 #
|
|
75 # Install conda
|
|
76 install_conda()
|
|
77 {
|
|
78 echo "++++++++++++++++"
|
|
79 echo "Installing conda"
|
|
80 echo "++++++++++++++++"
|
|
81 if [ -e ${CONDA_DIR} ] ; then
|
|
82 echo "*** $CONDA_DIR already exists ***" >&2
|
|
83 return
|
|
84 fi
|
|
85 local cwd=$(pwd)
|
|
86 local wd=$(mktemp -d)
|
|
87 cd $wd
|
|
88 wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
|
|
89 bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}
|
|
90 echo Installed conda in ${CONDA_DIR}
|
|
91 # Reset the conda version to a known working version
|
|
92 # (to avoid problems observed with e.g. conda 4.7.10)
|
|
93 echo ""
|
|
94 reset_conda_version
|
|
95 # Update the installation files
|
|
96 # This is to avoid problems when the length the installation
|
|
97 # directory path exceeds the limit for the shebang statement
|
|
98 # in the conda files
|
|
99 echo ""
|
|
100 echo -n "Rewriting conda shebangs..."
|
|
101 rewrite_conda_shebangs
|
|
102 echo "ok"
|
|
103 echo -n "Adding conda bin to PATH..."
|
|
104 PATH=${CONDA_BIN}:$PATH
|
|
105 echo "ok"
|
|
106 cd $cwd
|
|
107 rm -rf $wd/*
|
|
108 rmdir $wd
|
|
109 }
|
|
110 #
|
|
111 # Create conda environment
|
|
112 install_conda_packages()
|
|
113 {
|
|
114 echo "+++++++++++++++++++++++++"
|
|
115 echo "Installing conda packages"
|
|
116 echo "+++++++++++++++++++++++++"
|
|
117 local cwd=$(pwd)
|
|
118 local wd=$(mktemp -d)
|
|
119 cd $wd
|
|
120 cat >environment.yml <<EOF
|
|
121 name: ${ENV_NAME}
|
|
122 channels:
|
|
123 - defaults
|
|
124 - conda-forge
|
|
125 - bioconda
|
|
126 dependencies:
|
|
127 - python=2.7
|
|
128 - cutadapt=1.8
|
|
129 - sickle-trim=1.33
|
|
130 - bioawk=1.0
|
|
131 - pandaseq=2.8.1
|
|
132 - spades=3.10.1
|
|
133 - fastqc=0.11.3
|
|
134 - qiime=1.9.1
|
|
135 - blast-legacy=2.2.26
|
|
136 - fasta-splitter=0.2.6
|
|
137 - rdp_classifier=$RDP_CLASSIFIER_VERSION
|
|
138 - vsearch=2.10.4
|
|
139 - r=3.5.1
|
|
140 - r-tidyverse=1.2.1
|
|
141 - bioconductor-dada2=1.8
|
|
142 - bioconductor-biomformat=1.8.0
|
|
143 EOF
|
|
144 ${CONDA} env create --name "${ENV_NAME}" -f environment.yml
|
|
145 echo Created conda environment in ${ENV_DIR}
|
|
146 cd $cwd
|
|
147 rm -rf $wd/*
|
|
148 rmdir $wd
|
|
149 #
|
|
150 # Patch qiime 1.9.1 tools to switch deprecated 'axisbg'
|
|
151 # matplotlib property to 'facecolor':
|
|
152 # https://matplotlib.org/api/prev_api_changes/api_changes_2.0.0.html
|
|
153 echo ""
|
|
154 for exe in make_2d_plots.py plot_taxa_summary.py ; do
|
|
155 echo -n "Patching ${exe}..."
|
|
156 find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/axisbg=/facecolor=/g' {} \;
|
|
157 echo "done"
|
|
158 done
|
|
159 #
|
|
160 # Patch qiime 1.9.1 tools to switch deprecated 'set_axis_bgcolor'
|
|
161 # method call to 'set_facecolor':
|
|
162 # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.set_axis_bgcolor.html
|
|
163 for exe in make_rarefaction_plots.py ; do
|
|
164 echo -n "Patching ${exe}..."
|
|
165 find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/set_axis_bgcolor/set_facecolor/g' {} \;
|
|
166 echo "done"
|
|
167 done
|
|
168 }
|
|
169 #
|
|
170 # Install all the non-conda dependencies in a single
|
|
171 # function (invokes separate functions for each package)
|
|
172 install_non_conda_packages()
|
|
173 {
|
|
174 echo "+++++++++++++++++++++++++++++"
|
|
175 echo "Installing non-conda packages"
|
|
176 echo "+++++++++++++++++++++++++++++"
|
|
177 # Temporary working directory
|
|
178 local wd=$(mktemp -d)
|
|
179 local cwd=$(pwd)
|
|
180 local wd=$(mktemp -d)
|
|
181 cd $wd
|
|
182 # Amplicon analysis pipeline
|
|
183 echo -n "Installing Amplicon_analysis_pipeline..."
|
|
184 if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then
|
|
185 echo "already installed"
|
|
186 else
|
|
187 install_amplicon_analysis_pipeline
|
|
188 echo "ok"
|
|
189 fi
|
|
190 # ChimeraSlayer
|
|
191 echo -n "Installing ChimeraSlayer..."
|
|
192 if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then
|
|
193 echo "already installed"
|
|
194 else
|
|
195 install_chimeraslayer
|
|
196 echo "ok"
|
|
197 fi
|
|
198 # Uclust
|
|
199 # This no longer seems to be available for download from
|
|
200 # drive5.com so don't download
|
|
201 echo "WARNING uclust not available: skipping installation"
|
|
202 }
|
|
203 #
|
|
204 # Amplicon analyis pipeline
|
|
205 install_amplicon_analysis_pipeline()
|
|
206 {
|
|
207 local wd=$(mktemp -d)
|
|
208 local cwd=$(pwd)
|
|
209 local wd=$(mktemp -d)
|
|
210 cd $wd
|
|
211 wget -q https://github.com/MTutino/Amplicon_analysis/archive/${PIPELINE_VERSION}.tar.gz
|
|
212 tar zxf ${PIPELINE_VERSION}.tar.gz
|
|
213 cd Amplicon_analysis-${PIPELINE_VERSION}
|
|
214 INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION}
|
|
215 mkdir -p $INSTALL_DIR
|
|
216 ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline
|
|
217 for f in *.sh *.R ; do
|
|
218 /bin/cp $f $INSTALL_DIR
|
|
219 done
|
|
220 /bin/cp -r uc2otutab $INSTALL_DIR
|
|
221 mkdir -p ${BIN_DIR}
|
|
222 cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF
|
|
223 #!/usr/bin/env bash
|
|
224 #
|
|
225 # Point to Qiime config
|
|
226 export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config
|
|
227 # Set up the RDP jar file
|
|
228 export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
|
|
229 # Set the Matplotlib backend
|
|
230 export MPLBACKEND="agg"
|
|
231 # Put the scripts onto the PATH
|
|
232 export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH
|
|
233 # Activate the conda environment
|
|
234 export PATH=${CONDA_BIN}:\$PATH
|
|
235 source ${CONDA_BIN}/activate ${ENV_NAME}
|
|
236 # Execute the driver script with the supplied arguments
|
|
237 $INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@
|
|
238 exit \$?
|
|
239 EOF
|
|
240 chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh
|
|
241 cat >${BIN_DIR}/install_reference_data.sh <<EOF
|
|
242 #!/usr/bin/env bash -e
|
|
243 #
|
|
244 function usage() {
|
|
245 echo "Usage: \$(basename \$0) DIR"
|
|
246 }
|
|
247 if [ -z "\$1" ] ; then
|
|
248 usage
|
|
249 exit 0
|
|
250 elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then
|
|
251 usage
|
|
252 echo ""
|
|
253 echo "Install reference data into DIR"
|
|
254 exit 0
|
|
255 fi
|
|
256 echo "=========================================="
|
|
257 echo "Installing Amplicon analysis pipeline data"
|
|
258 echo "=========================================="
|
|
259 if [ ! -e "\$1" ] ; then
|
|
260 echo "Making directory \$1"
|
|
261 mkdir -p \$1
|
|
262 fi
|
|
263 cd \$1
|
|
264 DATA_DIR=\$(pwd)
|
|
265 echo "Installing reference data under \$DATA_DIR"
|
|
266 $INSTALL_DIR/References.sh
|
|
267 echo ""
|
|
268 echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh"
|
|
269 echo "to use the reference data from this directory"
|
|
270 echo ""
|
|
271 echo "\$(basename \$0): finished"
|
|
272 EOF
|
|
273 chmod 0755 ${BIN_DIR}/install_reference_data.sh
|
|
274 cd $cwd
|
|
275 rm -rf $wd/*
|
|
276 rmdir $wd
|
|
277 }
|
|
278 #
|
|
279 # ChimeraSlayer
|
|
280 install_chimeraslayer()
|
|
281 {
|
|
282 local cwd=$(pwd)
|
|
283 local wd=$(mktemp -d)
|
|
284 cd $wd
|
|
285 wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz
|
|
286 tar zxf microbiomeutil_2010-04-29.tar.gz
|
|
287 cd microbiomeutil_2010-04-29
|
|
288 INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29
|
|
289 mkdir -p $INSTALL_DIR
|
|
290 ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer
|
|
291 /bin/cp -r ChimeraSlayer $INSTALL_DIR
|
|
292 cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF
|
|
293 #!/usr/bin/env bash
|
|
294 export PATH=$INSTALL_DIR:\$PATH
|
|
295 $INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@
|
|
296 EOF
|
|
297 chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl
|
|
298 chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl
|
|
299 cd $cwd
|
|
300 rm -rf $wd/*
|
|
301 rmdir $wd
|
|
302 }
|
|
303 #
|
|
304 # uclust required for QIIME/pyNAST
|
|
305 # License only allows this version to be used with those two packages
|
|
306 # See: http://drive5.com/uclust/downloads1_2_22q.html
|
|
307 install_uclust()
|
|
308 {
|
|
309 local cwd=$(pwd)
|
|
310 local wd=$(mktemp -d)
|
|
311 cd $wd
|
|
312 wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64
|
|
313 INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22
|
|
314 mkdir -p $INSTALL_DIR
|
|
315 ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust
|
|
316 /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust
|
|
317 chmod 0755 ${INSTALL_DIR}/uclust
|
|
318 ln -s ${INSTALL_DIR}/uclust ${BIN_DIR}
|
|
319 cd $cwd
|
|
320 rm -rf $wd/*
|
|
321 rmdir $wd
|
|
322 }
|
|
323 setup_pipeline_environment()
|
|
324 {
|
|
325 echo "+++++++++++++++++++++++++++++++"
|
|
326 echo "Setting up pipeline environment"
|
|
327 echo "+++++++++++++++++++++++++++++++"
|
|
328 # fasta_splitter.pl
|
|
329 echo -n "Setting up fasta_splitter.pl..."
|
|
330 if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then
|
|
331 echo "already exists"
|
|
332 elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then
|
|
333 echo "failed"
|
|
334 fail "fasta-splitter.pl not found"
|
|
335 else
|
|
336 ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl
|
|
337 echo "ok"
|
|
338 fi
|
|
339 # rdp_classifier.jar
|
|
340 local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
|
|
341 echo -n "Setting up rdp_classifier.jar..."
|
|
342 if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then
|
|
343 echo "already exists"
|
|
344 elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then
|
|
345 echo "failed"
|
|
346 fail "rdp_classifier.jar not found"
|
|
347 else
|
|
348 mkdir -p ${TOP_DIR}/share/rdp_classifier
|
|
349 ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}
|
|
350 echo "ok"
|
|
351 fi
|
|
352 # qiime_config
|
|
353 echo -n "Setting up qiime_config..."
|
|
354 if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then
|
|
355 echo "already exists"
|
|
356 else
|
|
357 mkdir -p ${TOP_DIR}/qiime
|
|
358 cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config
|
|
359 qiime_scripts_dir ${ENV_DIR}/bin
|
|
360 EOF-qiime-config
|
|
361 echo "ok"
|
|
362 fi
|
|
363 }
|
|
364 #
|
|
365 # Top level script does the installation
|
|
366 echo "======================================="
|
|
367 echo "Amplicon_analysis_pipeline installation"
|
|
368 echo "======================================="
|
|
369 echo "Installing into ${TOP_DIR}"
|
|
370 if [ -e ${TOP_DIR} ] ; then
|
|
371 fail "Directory already exists"
|
|
372 fi
|
|
373 mkdir -p ${TOP_DIR}
|
|
374 install_conda
|
|
375 install_conda_packages
|
|
376 install_non_conda_packages
|
|
377 setup_pipeline_environment
|
|
378 echo "===================================="
|
|
379 echo "Amplicon_analysis_pipeline installed"
|
|
380 echo "===================================="
|
|
381 echo ""
|
|
382 echo "Install reference data using:"
|
|
383 echo ""
|
|
384 echo "\$ ${BIN_DIR}/install_reference_data.sh DIR"
|
|
385 echo ""
|
|
386 echo "Run pipeline scripts using:"
|
|
387 echo ""
|
|
388 echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."
|
|
389 echo ""
|
|
390 echo "(or add ${BIN_DIR} to your PATH)"
|
|
391 echo ""
|
|
392 echo "$(basename $0): finished"
|
|
393 ##
|
|
394 #
|