# HG changeset patch
# User pjbriggs
# Date 1575546243 0
# Node ID 7b9786a43a160584d75a2431bb01c325e2eabb73
# Parent 5ef333d1c303b871baf1fa23c1ab8af4e6d5452e
Uploaded test version 1.3.5.0.
diff -r 5ef333d1c303 -r 7b9786a43a16 Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/.gitignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/.gitignore Thu Dec 05 11:44:03 2019 +0000
@@ -0,0 +1,7 @@
+\#*\#
+.\#*
+*~
+*.pyc
+*.bak
+auto_process_settings_local.py
+settings.ini
diff -r 5ef333d1c303 -r 7b9786a43a16 Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/.shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/.shed.yml Thu Dec 05 11:44:03 2019 +0000
@@ -0,0 +1,16 @@
+---
+categories:
+- Metagenomics
+description: Analyse paired-end 16S rRNA data from Illumina Miseq
+homepage_url: https://github.com/MTutino/Amplicon_analysis
+long_description: |
+ A Galaxy tool wrapper to Mauro Tutino's Amplicon_analysis pipeline
+ at https://github.com/MTutino/Amplicon_analysis
+
+ The pipeline can analyse paired-end 16S rRNA data from Illumina Miseq
+ (Casava >= 1.8) and performs: QC and clean up of input data; removal of
+ singletons and chimeras and building of OTU table and phylogenetic tree;
+ beta and alpha diversity analysis
+name: amplicon_analysis_pipeline
+owner: pjbriggs
+remote_repository_url: https://github.com/pjbriggs/Amplicon_analysis-galaxy
diff -r 5ef333d1c303 -r 7b9786a43a16 Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/README.rst Thu Dec 05 11:44:03 2019 +0000
@@ -0,0 +1,213 @@
+Amplicon_analysis-galaxy
+========================
+
+A Galaxy tool wrapper to Mauro Tutino's ``Amplicon_analysis`` pipeline
+script at https://github.com/MTutino/Amplicon_analysis
+
+The pipeline can analyse paired-end 16S rRNA data from Illumina Miseq
+(Casava >= 1.8) and performs the following operations:
+
+ * QC and clean up of input data
+ * Removal of singletons and chimeras and building of OTU table
+ and phylogenetic tree
+ * Beta and alpha diversity of analysis
+
+Usage documentation
+===================
+
+Usage of the tool (including required inputs) is documented within
+the ``help`` section of the tool XML.
+
+Installing the tool in a Galaxy instance
+========================================
+
+The following sections describe how to install the tool files,
+dependencies and reference data, and how to configure the Galaxy
+instance to detect the dependencies and reference data correctly
+at run time.
+
+1. Install the tool from the toolshed
+-------------------------------------
+
+The core tool is hosted on the Galaxy toolshed, so it can be installed
+directly from there (this is the recommended route):
+
+ * https://toolshed.g2.bx.psu.edu/view/pjbriggs/amplicon_analysis_pipeline/
+
+Alternatively it can be installed manually; in this case there are two
+files to install:
+
+ * ``amplicon_analysis_pipeline.xml`` (the Galaxy tool definition)
+ * ``amplicon_analysis_pipeline.py`` (the Python wrapper script)
+
+Put these in a directory that is visible to Galaxy (e.g. a
+``tools/Amplicon_analysis/`` folder), and modify the ``tools_conf.xml``
+file to tell Galaxy to offer the tool by adding the line e.g.::
+
+
+
+2. Install the reference data
+-----------------------------
+
+The script ``References.sh`` from the pipeline package at
+https://github.com/MTutino/Amplicon_analysis can be run to install
+the reference data, for example::
+
+ cd /path/to/pipeline/data
+ wget https://github.com/MTutino/Amplicon_analysis/raw/master/References.sh
+ /bin/bash ./References.sh
+
+will install the data in ``/path/to/pipeline/data``.
+
+**NB** The final amount of data downloaded and uncompressed will be
+around 9GB.
+
+3. Configure reference data location in Galaxy
+----------------------------------------------
+
+The final step is to make your Galaxy installation aware of the
+location of the reference data, so it can locate them both when the
+tool is run.
+
+The tool locates the reference data via an environment variable called
+``AMPLICON_ANALYSIS_REF_DATA_PATH``, which needs to set to the parent
+directory where the reference data has been installed.
+
+There are various ways to do this, depending on how your Galaxy
+installation is configured:
+
+ * **For local instances:** add a line to set it in the
+ ``config/local_env.sh`` file of your Galaxy installation (you
+ may need to create a new empty file first), e.g.::
+
+ export AMPLICON_ANALYSIS_REF_DATA_PATH=/path/to/pipeline/data
+
+ * **For production instances:** set the value in the ``job_conf.xml``
+ configuration file, e.g.::
+
+
+ /path/to/pipeline/data
+
+
+ and then specify that the pipeline tool uses this destination::
+
+
+
+ (For more about job destinations see the Galaxy documentation at
+ https://docs.galaxyproject.org/en/master/admin/jobs.html#job-destinations)
+
+4. Enable rendering of HTML outputs from pipeline
+-------------------------------------------------
+
+To ensure that HTML outputs are displayed correctly in Galaxy
+(for example the Vsearch OTU table heatmaps), Galaxy needs to be
+configured not to sanitize the outputs from the ``Amplicon_analysis``
+tool.
+
+Either:
+
+ * **For local instances:** set ``sanitize_all_html = False`` in
+ ``config/galaxy.ini`` (nb don't do this on production servers or
+ public instances!); or
+
+ * **For production instances:** add the ``Amplicon_analysis`` tool
+ to the display whitelist in the Galaxy instance:
+
+ - Set ``sanitize_whitelist_file = config/whitelist.txt`` in
+ ``config/galaxy.ini`` and restart Galaxy;
+ - Go to ``Admin>Manage Display Whitelist``, check the box for
+ ``Amplicon_analysis`` (hint: use your browser's 'find-in-page'
+ search function to help locate it) and click on
+ ``Submit new whitelist`` to update the settings.
+
+Additional details
+==================
+
+Some other things to be aware of:
+
+ * Note that using the Silva database requires a minimum of 18Gb RAM
+
+Known problems
+==============
+
+ * Only the ``VSEARCH`` pipeline in Mauro's script is currently
+ available via the Galaxy tool; the ``USEARCH`` and ``QIIME``
+ pipelines have yet to be implemented.
+ * The images in the tool help section are not visible if the
+ tool has been installed locally, or if it has been installed in
+ a Galaxy instance which is served from a subdirectory.
+
+ These are both problems with Galaxy and not the tool, see
+ https://github.com/galaxyproject/galaxy/issues/4490 and
+ https://github.com/galaxyproject/galaxy/issues/1676
+
+Appendix: installing the dependencies manually
+==============================================
+
+If the tool is installed from the Galaxy toolshed (recommended) then
+the dependencies should be installed automatically and this step can
+be skipped.
+
+Otherwise the ``install_amplicon_analysis_deps.sh`` script can be used
+to fetch and install the dependencies locally, for example::
+
+ install_amplicon_analysis.sh /path/to/local_tool_dependencies
+
+(This is the same script as is used to install dependencies from the
+toolshed.) This can take some time to complete, and when completed will
+have created a directory called ``Amplicon_analysis-1.2.3`` containing
+the dependencies under the specified top level directory.
+
+**NB** The installed dependencies will occupy around 2.6G of disk
+space.
+
+You will need to make sure that the ``bin`` subdirectory of this
+directory is on Galaxy's ``PATH`` at runtime, for the tool to be able
+to access the dependencies - for example by adding a line to the
+``local_env.sh`` file like::
+
+ export PATH=/path/to/local_tool_dependencies/Amplicon_analysis-1.2.3/bin:$PATH
+
+History
+=======
+
+========== ======================================================================
+Version Changes
+---------- ----------------------------------------------------------------------
+1.3.5.0 Updated to Amplicon_Analysis_Pipeline version 1.3.5.
+1.2.3.0 Updated to Amplicon_Analysis_Pipeline version 1.2.3; install
+ dependencies via tool_dependencies.xml.
+1.2.2.0 Updated to Amplicon_Analysis_Pipeline version 1.2.2 (removes
+ jackknifed analysis which is not captured by Galaxy tool)
+1.2.1.0 Updated to Amplicon_Analysis_Pipeline version 1.2.1 (adds
+ option to use the Human Oral Microbiome Database v15.1, and
+ updates SILVA database to v123)
+1.1.0 First official version on Galaxy toolshed.
+1.0.6 Expand inline documentation to provide detailed usage guidance.
+1.0.5 Updates including:
+
+ - Capture read counts from quality control as new output dataset
+ - Capture FastQC per-base quality boxplots for each sample as
+ new output dataset
+ - Add support for -l option (sliding window length for trimming)
+ - Default for -L set to "200"
+1.0.4 Various updates:
+
+ - Additional outputs are captured when a "Categories" file is
+ supplied (alpha diversity rarefaction curves and boxplots)
+ - Sample names derived from Fastqs in a collection of pairs
+ are trimmed to SAMPLE_S* (for Illumina-style Fastq filenames)
+ - Input Fastqs can now be of more general ``fastq`` type
+ - Log file outputs are captured in new output dataset
+ - User can specify a "title" for the job which is copied into
+ the dataset names (to distinguish outputs from different runs)
+ - Improved detection and reporting of problems with input
+ Metatable
+1.0.3 Take the sample names from the collection dataset names when
+ using collection as input (this is now the default input mode);
+ collect additional output dataset; disable ``usearch``-based
+ pipelines (i.e. ``UPARSE`` and ``QIIME``).
+1.0.2 Enable support for FASTQs supplied via dataset collections and
+ fix some broken output datasets.
+1.0.1 Initial version
+========== ======================================================================
diff -r 5ef333d1c303 -r 7b9786a43a16 Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/amplicon_analysis_pipeline.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/amplicon_analysis_pipeline.py Thu Dec 05 11:44:03 2019 +0000
@@ -0,0 +1,370 @@
+#!/usr/bin/env python
+#
+# Wrapper script to run Amplicon_analysis_pipeline.sh
+# from Galaxy tool
+
+import sys
+import os
+import argparse
+import subprocess
+import glob
+
+class PipelineCmd(object):
+ def __init__(self,cmd):
+ self.cmd = [str(cmd)]
+ def add_args(self,*args):
+ for arg in args:
+ self.cmd.append(str(arg))
+ def __repr__(self):
+ return ' '.join([str(arg) for arg in self.cmd])
+
+def ahref(target,name=None,type=None):
+ if name is None:
+ name = os.path.basename(target)
+ ahref = "%s" % name
+ return ahref
+
+def check_errors():
+ # Errors in Amplicon_analysis_pipeline.log
+ with open('Amplicon_analysis_pipeline.log','r') as pipeline_log:
+ log = pipeline_log.read()
+ if "Names in the first column of Metatable.txt and in the second column of Final_name.txt do not match" in log:
+ print_error("""*** Sample IDs don't match dataset names ***
+
+The sample IDs (first column of the Metatable file) don't match the
+supplied sample names for the input Fastq pairs.
+""")
+ # Errors in pipeline output
+ with open('pipeline.log','r') as pipeline_log:
+ log = pipeline_log.read()
+ if "Errors and/or warnings detected in mapping file" in log:
+ with open("Metatable_log/Metatable.log","r") as metatable_log:
+ # Echo the Metatable log file to the tool log
+ print_error("""*** Error in Metatable mapping file ***
+
+%s""" % metatable_log.read())
+ elif "No header line was found in mapping file" in log:
+ # Report error to the tool log
+ print_error("""*** No header in Metatable mapping file ***
+
+Check you've specified the correct file as the input Metatable""")
+
+def print_error(message):
+ width = max([len(line) for line in message.split('\n')]) + 4
+ sys.stderr.write("\n%s\n" % ('*'*width))
+ for line in message.split('\n'):
+ sys.stderr.write("* %s%s *\n" % (line,' '*(width-len(line)-4)))
+ sys.stderr.write("%s\n\n" % ('*'*width))
+
+def clean_up_name(sample):
+ # Remove extensions and trailing "_L[0-9]+_001" from
+ # Fastq pair names
+ sample_name = '.'.join(sample.split('.')[:1])
+ split_name = sample_name.split('_')
+ if split_name[-1] == "001":
+ split_name = split_name[:-1]
+ if split_name[-1].startswith('L'):
+ try:
+ int(split_name[-1][1:])
+ split_name = split_name[:-1]
+ except ValueError:
+ pass
+ return '_'.join(split_name)
+
+def list_outputs(filen=None):
+ # List the output directory contents
+ # If filen is specified then will be the filename to
+ # write to, otherwise write to stdout
+ if filen is not None:
+ fp = open(filen,'w')
+ else:
+ fp = sys.stdout
+ results_dir = os.path.abspath("RESULTS")
+ fp.write("Listing contents of output dir %s:\n" % results_dir)
+ ix = 0
+ for d,dirs,files in os.walk(results_dir):
+ ix += 1
+ fp.write("-- %d: %s\n" % (ix,
+ os.path.relpath(d,results_dir)))
+ for f in files:
+ ix += 1
+ fp.write("---- %d: %s\n" % (ix,
+ os.path.relpath(f,results_dir)))
+ # Close output file
+ if filen is not None:
+ fp.close()
+
+if __name__ == "__main__":
+ # Command line
+ print "Amplicon analysis: starting"
+ p = argparse.ArgumentParser()
+ p.add_argument("metatable",
+ metavar="METATABLE_FILE",
+ help="Metatable.txt file")
+ p.add_argument("fastq_pairs",
+ metavar="SAMPLE_NAME FQ_R1 FQ_R2",
+ nargs="+",
+ default=list(),
+ help="Triplets of SAMPLE_NAME followed by "
+ "a R1/R2 FASTQ file pair")
+ p.add_argument("-g",dest="forward_pcr_primer")
+ p.add_argument("-G",dest="reverse_pcr_primer")
+ p.add_argument("-q",dest="trimming_threshold")
+ p.add_argument("-O",dest="minimum_overlap")
+ p.add_argument("-L",dest="minimum_length")
+ p.add_argument("-l",dest="sliding_window_length")
+ p.add_argument("-P",dest="pipeline",
+ choices=["Vsearch","DADA2"],
+ type=str,
+ default="Vsearch")
+ p.add_argument("-S",dest="use_silva",action="store_true")
+ p.add_argument("-H",dest="use_homd",action="store_true")
+ p.add_argument("-r",dest="reference_data_path")
+ p.add_argument("-c",dest="categories_file")
+ args = p.parse_args()
+
+ # Build the environment for running the pipeline
+ print "Amplicon analysis: building the environment"
+ metatable_file = os.path.abspath(args.metatable)
+ os.symlink(metatable_file,"Metatable.txt")
+ print "-- made symlink to Metatable.txt"
+
+ # Link to Categories.txt file (if provided)
+ if args.categories_file is not None:
+ categories_file = os.path.abspath(args.categories_file)
+ os.symlink(categories_file,"Categories.txt")
+ print "-- made symlink to Categories.txt"
+
+ # Link to FASTQs and construct Final_name.txt file
+ sample_names = []
+ print "-- making Final_name.txt"
+ with open("Final_name.txt",'w') as final_name:
+ fastqs = iter(args.fastq_pairs)
+ for sample_name,fqr1,fqr2 in zip(fastqs,fastqs,fastqs):
+ sample_name = clean_up_name(sample_name)
+ print " %s" % sample_name
+ r1 = "%s_R1_.fastq" % sample_name
+ r2 = "%s_R2_.fastq" % sample_name
+ os.symlink(fqr1,r1)
+ os.symlink(fqr2,r2)
+ final_name.write("%s\n" % '\t'.join((r1,sample_name)))
+ final_name.write("%s\n" % '\t'.join((r2,sample_name)))
+ sample_names.append(sample_name)
+
+ # Reference database
+ if args.use_silva:
+ ref_database = "silva"
+ elif args.use_homd:
+ ref_database = "homd"
+ else:
+ ref_database = "gg"
+
+ # Construct the pipeline command
+ print "Amplicon analysis: constructing pipeline command"
+ pipeline = PipelineCmd("Amplicon_analysis_pipeline.sh")
+ if args.forward_pcr_primer:
+ pipeline.add_args("-g",args.forward_pcr_primer)
+ if args.reverse_pcr_primer:
+ pipeline.add_args("-G",args.reverse_pcr_primer)
+ if args.trimming_threshold:
+ pipeline.add_args("-q",args.trimming_threshold)
+ if args.minimum_overlap:
+ pipeline.add_args("-O",args.minimum_overlap)
+ if args.minimum_length:
+ pipeline.add_args("-L",args.minimum_length)
+ if args.sliding_window_length:
+ pipeline.add_args("-l",args.sliding_window_length)
+ if args.reference_data_path:
+ pipeline.add_args("-r",args.reference_data_path)
+ pipeline.add_args("-P",args.pipeline)
+ if ref_database == "silva":
+ pipeline.add_args("-S")
+ elif ref_database == "homd":
+ pipeline.add_args("-H")
+
+ # Echo the pipeline command to stdout
+ print "Running %s" % pipeline
+
+ # Run the pipeline
+ with open("pipeline.log","w") as pipeline_out:
+ try:
+ subprocess.check_call(pipeline.cmd,
+ stdout=pipeline_out,
+ stderr=subprocess.STDOUT)
+ exit_code = 0
+ print "Pipeline completed ok"
+ except subprocess.CalledProcessError as ex:
+ # Non-zero exit status
+ sys.stderr.write("Pipeline failed: exit code %s\n" %
+ ex.returncode)
+ exit_code = ex.returncode
+ except Exception as ex:
+ # Some other problem
+ sys.stderr.write("Unexpected error: %s\n" % str(ex))
+ exit_code = 1
+
+ # Write out the list of outputs
+ outputs_file = "Pipeline_outputs.txt"
+ list_outputs(outputs_file)
+
+ # Check for log file
+ log_file = "Amplicon_analysis_pipeline.log"
+ if os.path.exists(log_file):
+ print "Found log file: %s" % log_file
+ if exit_code == 0:
+ # Create an HTML file to link to log files etc
+ # NB the paths to the files should be correct once
+ # copied by Galaxy on job completion
+ with open("pipeline_outputs.html","w") as html_out:
+ html_out.write("""
+
+""")
+ # Look for raw and trimmed FastQC output for each sample
+ for sample_name in sample_names:
+ fastqc_dir = os.path.join(sample_name,"FastQC")
+ quality_boxplots.write("
%s
" % sample_name)
+ for d in ("Raw","cutdapt_sickle/Q%s" % phred_score):
+ quality_boxplots.write("
%s
" % d)
+ fastqc_html_files = glob.glob(
+ os.path.join(fastqc_dir,d,"*_fastqc.html"))
+ if not fastqc_html_files:
+ quality_boxplots.write("
No FastQC outputs found
")
+ continue
+ # Pull out the per-base quality boxplots
+ for f in fastqc_html_files:
+ boxplot = None
+ with open(f) as fp:
+ for line in fp.read().split(">"):
+ try:
+ line.index("alt=\"Per base quality graph\"")
+ boxplot = line + ">"
+ break
+ except ValueError:
+ pass
+ if boxplot is None:
+ boxplot = "Missing plot"
+ quality_boxplots.write("
\n")
+ for pdf in boxplot_pdfs:
+ boxplots_out.write("
%s
\n" % ahref(pdf))
+ boxplots_out.write("
\n")
+ boxplots_out.write("""
+
+""")
+
+ # Finish
+ print "Amplicon analysis: finishing, exit code: %s" % exit_code
+ sys.exit(exit_code)
diff -r 5ef333d1c303 -r 7b9786a43a16 Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/amplicon_analysis_pipeline.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/amplicon_analysis_pipeline.xml Thu Dec 05 11:44:03 2019 +0000
@@ -0,0 +1,502 @@
+
+ analyse 16S rRNA data from Illumina Miseq paired-end reads
+
+ amplicon_analysis_pipeline
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ pipeline['pipeline_name'] == 'Vsearch'
+
+
+
+
+
+
+
+
+
+
+
+
+
+ pipeline['pipeline_name'] == 'DADA2'
+
+
+ categories_file_in is not None
+
+
+
+
+
+ = 1.8) paired-end reads.
+
+Usage
+-----
+
+1. Preparation of the mapping file and format of unique sample id
+*****************************************************************
+
+Before using the amplicon analysis pipeline it would be necessary to
+follow the steps as below to avoid analysis failures and ensure samples
+are labelled appropriately. Sample names for the labelling are derived
+from the fastq files names that are generated from the sequencing. The
+labels will include everything between the beginning of the name and
+the sample number (from C11 to S19 in Fig. 1)
+
+.. image:: Pipeline_description_Fig1.png
+ :height: 46
+ :width: 382
+
+**Figure 1**
+
+If analysing 16S data from multiple runs:
+
+The samples from different runs may have identical IDs. For example,
+when sequencing the same samples twice, by chance, these could be at
+the same position in both the runs. This would cause the fastq files
+to have exactly the same IDs (Fig. 2).
+
+.. image:: Pipeline_description_Fig2.png
+ :height: 100
+ :width: 463
+
+**Figure 2**
+
+In case of identical sample IDs the pipeline will fail to run and
+generate an error at the beginning of the analysis.
+
+To avoid having to change the file names, before uploading the files,
+ensure that the samples IDs are not repeated.
+
+2. To upload the file
+*********************
+
+Click on **Get Data/Upload File** from the Galaxy tool panel on the
+left hand side.
+
+From the pop-up window, choose how to upload the file. The
+**Choose local file** option can be used for files up to 4Gb. Fastq files
+from Illumina MiSeq will rarely be bigger than 4Gb and this option is
+recommended.
+
+After choosing the files click **Start** to begin the upload. The window can
+now be closed and the files will be uploaded onto the Galaxy server. You
+will see the progress on the ``HISTORY`` panel on the right
+side of the screen. The colour will change from grey (queuing), to yellow
+(uploading) and finally green (uploaded).
+
+Once all the files are uploaded, click on the operations on multiple
+datasets icon and select the fastq files that need to be analysed.
+Click on the tab **For all selected...** and on the option
+**Build List of Dataset pairs** (Fig. 3).
+
+.. image:: Pipeline_description_Fig3.png
+ :height: 247
+ :width: 586
+
+**Figure 3**
+
+Change the filter parameter ``_1`` and ``_2`` to be ``_R1`` and ``_R2``.
+The fastq files forward R1 and reverse R2 should now appear in the
+corresponding columns.
+
+Select **Autopair**. This creates a collection of paired fastq files for
+the forward and reverse reads for each sample. The name of the pairs will
+be the ones used by the pipeline. You are free to change the names at this
+point as long as they are the same used in the Metatable file
+(see section 3).
+
+Name the collection and click on **create list**. This reduces the time
+required to input the forward and reverse reads for each individual sample.
+
+3. Create the Metatable files
+*****************************
+
+Metatable.txt
+~~~~~~~~~~~~~
+
+Click on the list of pairs you just created to see the name of the single
+pairs. The name of the pairs will be the ones used by the pipeline,
+therefore, these are the names that need to be used in the Metatable file.
+
+The Metatable file has to be in QIIME format. You can find a description
+of it on QIIME website http://qiime.org/documentation/file_formats.html
+
+EXAMPLE::
+
+ #SampleID BarcodeSequence LinkerPrimerSequence Disease Gender Description
+ Mock-RUN1 TAAGGCGAGCGTAAGA PsA Male Control
+ Mock-RUN2 CGTACTAGGCGTAAGA PsA Male Control
+ Mock-RUN3 AGGCAGAAGCGTAAGA PsC Female Control
+
+Briefly: the column ``LinkerPrimerSequence`` is empty but it cannot be
+deleted. The header is very important. ``#SampleID``, ``Barcode``,
+``LinkerPrimerSequence`` and ``Description`` are mandatory. Between
+``LinkerPrimerSequence`` and ``Description`` you can add as many columns
+as you want. For every column a PCoA plot will be created (see
+**Results** section). You can create this file in Excel and it will have
+to be saved as ``Text(Tab delimited)``.
+
+During the analysis the Metatable.txt will be checked to ensure that the
+file has the correct format. If necessary, this will be modified and will
+be available as Metatable_corrected.txt in the history panel. If you are
+going to use the metatable file for any other statistical analyses,
+remember to use the ``Metatable_mod.txt`` one, otherwise the sample
+names might not match!
+
+Categories.txt (optional)
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This file is required if you want to get box plots for comparison of
+alpha diversity indices (see **Results** section). The file is a list
+(without header and IN ONE COLUMN) of categories present in the
+Metatable.txt file. THE NAMES YOU ARE USING HAVE TO BE THE SAME AS THE
+ONES USED IN THE METATABLE.TXT. You can create this file in Excel and
+will have to be saved as ``Text(Tab delimited)``.
+
+EXAMPLE::
+
+ Disease
+ Gender
+
+Metatable and categories files can be uploaded using Get Data as done
+with the fatsq files.
+
+4. Analysis
+***********
+
+Under **Amplicon_Analysis_Pipeline**
+
+ * **Title** Name to distinguish between the runs. It will be shown at
+ the beginning of each output file name.
+
+ * **Input Metatable.txt file** Select the Metatable.txt file related to
+ this analysis
+
+ * **Input Categories.txt file (Optional)** Select the Categories.txt file
+ related to this analysis
+
+ * **Input FASTQ type** select *Dataset pairs in a collection* and, then,
+ the collection of pairs you created earlier.
+
+ * **Forward/Reverse PCR primer sequence** if the PCR primer sequences
+ have not been removed from the MiSeq during the fastq creation, they
+ have to be removed before the analysis. Insert the PCR primer sequence
+ in the corresponding field. DO NOT include any barcode or adapter
+ sequence. If the PCR primers have been already trimmed by the MiSeq,
+ and you include the sequence in this field, this would lead to an error.
+ Only include the sequences if still present in the fastq files.
+
+ * **Threshold quality below which reads will be trimmed** Choose the
+ Phred score used by Sickle to trim the reads at the 3’ end.
+
+ * **Minimum length to retain a read after trimming** If the read length
+ after trimming is shorter than a user defined length, the read, along
+ with the corresponding read pair, will be discarded.
+
+ * **Minimum overlap in bp between forward and reverse reads** Choose the
+ minimum basepair overlap used by Pandaseq to assemble the reads.
+ Default is 10.
+
+ * **Minimum length in bp to keep a sequence after overlapping** Choose the
+ minimum sequence length used by Pandaseq to keep a sequence after the
+ overlapping. This depends on the expected amplicon length. Default is
+ 380 (used for V3-V4 16S sequencing; expected length ~440bp)
+
+ * **Pipeline to use for analysis** Choose the pipeline to use for OTU
+ clustering and chimera removal. The Galaxy tool supports the ``Vsearch``
+ and ``DADA2`` pipelines.
+
+ * **Reference database** Choose between ``GreenGenes``, ``Silva`` or
+ ``HOMD`` (Human Oral Microbiome Database) for taxa assignment.
+
+Click on **Execute** to start the analysis.
+
+5. Results
+**********
+
+Results are entirely generated using QIIME scripts. The results will
+appear in the History panel when the analysis is completed.
+
+The following outputs are captured:
+
+ * **Vsearch_tax_OTU_table.biom|DADA2_tax_OTU_table.biom (biom format)**
+ The OTU table in BIOM format (http://biom-format.org/)
+
+ * **otus.tre** Phylogenetic tree constructed using ``make_phylogeny.py``
+ (fasttree) QIIME script (http://qiime.org/scripts/make_phylogeny.html)
+
+ * **Phylum_genus_dist_barcharts_HTML** HTML file with bar charts at
+ Phylum, Genus and Species level
+ (http://qiime.org/scripts/summarize_taxa.html and
+ http://qiime.org/scripts/plot_taxa_summary.html)
+
+ * **OTUs_count_file** Summary of OTU counts per sample
+ (http://biom-format.org/documentation/summarizing_biom_tables.html)
+
+ * **Table_summary_file** Summary of sequences counts per sample
+ (http://biom-format.org/documentation/summarizing_biom_tables.html)
+
+ * **multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta|seqs.fa**
+ Fasta file with OTU sequences (Vsearch|DADA2)
+
+ * **Heatmap_PDF** OTU heatmap in PDF format
+ (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html )
+
+ * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML
+ format using weighted Unifrac distance measure. Samples are grouped
+ by the column names present in the Metatable file. The samples are
+ firstly rarefied to the minimum sequencing depth
+ (http://qiime.org/scripts/beta_diversity_through_plots.html )
+
+ * **Vsearch_beta_diversity_unweighted_2D_plots_HTML** PCoA plots in HTML
+ format using Unweighted Unifrac distance measure. Samples are grouped
+ by the column names present in the Metatable file. The samples are
+ firstly rarefied to the minimum sequencing depth
+ (http://qiime.org/scripts/beta_diversity_through_plots.html )
+
+Code availability
+-----------------
+
+**Code is available at** https://github.com/MTutino/Amplicon_analysis
+
+Credits
+-------
+
+Pipeline author: Mauro Tutino
+
+Galaxy tool: Peter Briggs
+
+ ]]>
+
+
+ @misc{githubAmplicon_analysis,
+ author = {Tutino, Mauro},
+ year = {2017},
+ title = {Amplicon Analysis Pipeline},
+ publisher = {GitHub},
+ journal = {GitHub repository},
+ url = {https://github.com/MTutino/Amplicon_analysis},
+}
+
+
diff -r 5ef333d1c303 -r 7b9786a43a16 Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/install_amplicon_analysis-1.3.5.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Amplicon_analysis-galaxy-update-to-Amplicon_analysis_pipeline-1.3/install_amplicon_analysis-1.3.5.sh Thu Dec 05 11:44:03 2019 +0000
@@ -0,0 +1,394 @@
+#!/bin/sh -e
+#
+# Prototype script to setup a conda environment with the
+# dependencies needed for the Amplicon_analysis_pipeline
+# script
+#
+# Handle command line
+usage()
+{
+ echo "Usage: $(basename $0) [DIR]"
+ echo ""
+ echo "Installs the Amplicon_analysis_pipeline package plus"
+ echo "dependencies in directory DIR (or current directory "
+ echo "if DIR not supplied)"
+}
+if [ ! -z "$1" ] ; then
+ # Check if help was requested
+ case "$1" in
+ --help|-h)
+ usage
+ exit 0
+ ;;
+ esac
+ # Assume it's the installation directory
+ cd $1
+fi
+# Versions
+PIPELINE_VERSION=1.3.5
+CONDA_REQUIRED_VERSION=4.6.14
+RDP_CLASSIFIER_VERSION=2.2
+# Directories
+TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}
+BIN_DIR=${TOP_DIR}/bin
+CONDA_DIR=${TOP_DIR}/conda
+CONDA_BIN=${CONDA_DIR}/bin
+CONDA_LIB=${CONDA_DIR}/lib
+CONDA=${CONDA_BIN}/conda
+ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"
+ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME
+#
+# Functions
+#
+# Report failure and terminate script
+fail()
+{
+ echo ""
+ echo ERROR $@ >&2
+ echo ""
+ echo "$(basename $0): installation failed"
+ exit 1
+}
+#
+# Rewrite the shebangs in the installed conda scripts
+# to remove the full path to conda 'bin' directory
+rewrite_conda_shebangs()
+{
+ pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"
+ find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \;
+}
+#
+# Reset conda version if required
+reset_conda_version()
+{
+ CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d' ' -f2)"
+ echo conda version: ${CONDA_VERSION}
+ if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then
+ echo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION"
+ ${CONDA_BIN}/conda config --set allow_conda_downgrades true
+ ${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION}
+ else
+ echo "conda version ok"
+ fi
+}
+#
+# Install conda
+install_conda()
+{
+ echo "++++++++++++++++"
+ echo "Installing conda"
+ echo "++++++++++++++++"
+ if [ -e ${CONDA_DIR} ] ; then
+ echo "*** $CONDA_DIR already exists ***" >&2
+ return
+ fi
+ local cwd=$(pwd)
+ local wd=$(mktemp -d)
+ cd $wd
+ wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
+ bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}
+ echo Installed conda in ${CONDA_DIR}
+ # Reset the conda version to a known working version
+ # (to avoid problems observed with e.g. conda 4.7.10)
+ echo ""
+ reset_conda_version
+ # Update the installation files
+ # This is to avoid problems when the length the installation
+ # directory path exceeds the limit for the shebang statement
+ # in the conda files
+ echo ""
+ echo -n "Rewriting conda shebangs..."
+ rewrite_conda_shebangs
+ echo "ok"
+ echo -n "Adding conda bin to PATH..."
+ PATH=${CONDA_BIN}:$PATH
+ echo "ok"
+ cd $cwd
+ rm -rf $wd/*
+ rmdir $wd
+}
+#
+# Create conda environment
+install_conda_packages()
+{
+ echo "+++++++++++++++++++++++++"
+ echo "Installing conda packages"
+ echo "+++++++++++++++++++++++++"
+ local cwd=$(pwd)
+ local wd=$(mktemp -d)
+ cd $wd
+ cat >environment.yml <${BIN_DIR}/Amplicon_analysis_pipeline.sh <${BIN_DIR}/install_reference_data.sh <${BIN_DIR}/ChimeraSlayer.pl <${TOP_DIR}/qiime/qiime_config <&2
+ echo ""
+ echo "$(basename $0): installation failed"
+ exit 1
+}
+#
+# Rewrite the shebangs in the installed conda scripts
+# to remove the full path to conda 'bin' directory
+rewrite_conda_shebangs()
+{
+ pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"
+ find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \;
+}
+#
+# Install conda
+install_conda()
+{
+ echo "++++++++++++++++"
+ echo "Installing conda"
+ echo "++++++++++++++++"
+ if [ -e ${CONDA_DIR} ] ; then
+ echo "*** $CONDA_DIR already exists ***" >&2
+ return
+ fi
+ local cwd=$(pwd)
+ local wd=$(mktemp -d)
+ cd $wd
+ wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
+ bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}
+ echo Installed conda in ${CONDA_DIR}
+ # Update the installation files
+ # This is to avoid problems when the length the installation
+ # directory path exceeds the limit for the shebang statement
+ # in the conda files
+ echo ""
+ echo -n "Rewriting conda shebangs..."
+ rewrite_conda_shebangs
+ echo "ok"
+ echo -n "Adding conda bin to PATH..."
+ PATH=${CONDA_BIN}:$PATH
+ echo "ok"
+ cd $cwd
+ rm -rf $wd/*
+ rmdir $wd
+}
+#
+# Create conda environment
+install_conda_packages()
+{
+ echo "+++++++++++++++++++++++++"
+ echo "Installing conda packages"
+ echo "+++++++++++++++++++++++++"
+ local cwd=$(pwd)
+ local wd=$(mktemp -d)
+ cd $wd
+ cat >environment.yml <${BIN_DIR}/Amplicon_analysis_pipeline.sh <${BIN_DIR}/install_reference_data.sh <${BIN_DIR}/ChimeraSlayer.pl <INSTALL.log 2>&1
+ echo "ok"
+ cd R-3.2.1
+ echo -n "Running configure..."
+ ./configure --prefix=$INSTALL_DIR --with-x=no --with-readline=no >>INSTALL.log 2>&1
+ echo "ok"
+ echo -n "Running make..."
+ make >>INSTALL.log 2>&1
+ echo "ok"
+ echo -n "Running make install..."
+ make install >>INSTALL.log 2>&1
+ echo "ok"
+ cd $cwd
+ rm -rf $wd/*
+ rmdir $wd
+ . ${CONDA_BIN}/deactivate
+}
+setup_pipeline_environment()
+{
+ echo "+++++++++++++++++++++++++++++++"
+ echo "Setting up pipeline environment"
+ echo "+++++++++++++++++++++++++++++++"
+ # vsearch113
+ echo -n "Setting up vsearch113..."
+ if [ -e ${BIN_DIR}/vsearch113 ] ; then
+ echo "already exists"
+ elif [ ! -e ${ENV_DIR}/bin/vsearch ] ; then
+ echo "failed"
+ fail "vsearch not found"
+ else
+ ln -s ${ENV_DIR}/bin/vsearch ${BIN_DIR}/vsearch113
+ echo "ok"
+ fi
+ # fasta_splitter.pl
+ echo -n "Setting up fasta_splitter.pl..."
+ if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then
+ echo "already exists"
+ elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then
+ echo "failed"
+ fail "fasta-splitter.pl not found"
+ else
+ ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl
+ echo "ok"
+ fi
+ # rdp_classifier.jar
+ local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
+ echo -n "Setting up rdp_classifier.jar..."
+ if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then
+ echo "already exists"
+ elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then
+ echo "failed"
+ fail "rdp_classifier.jar not found"
+ else
+ mkdir -p ${TOP_DIR}/share/rdp_classifier
+ ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}
+ echo "ok"
+ fi
+ # qiime_config
+ echo -n "Setting up qiime_config..."
+ if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then
+ echo "already exists"
+ else
+ mkdir -p ${TOP_DIR}/qiime
+ cat >${TOP_DIR}/qiime/qiime_config <
+
+
+
+
+ https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/update-to-Amplicon_analysis_pipeline-1.3/install_amplicon_analysis-1.3.5.sh
+
+ sh ./install_amplicon_analysis.sh $INSTALL_DIR
+
+
+ $INSTALL_DIR/Amplicon_analysis-1.3.5/bin
+
+
+
+
+
diff -r 5ef333d1c303 -r 7b9786a43a16 README.rst
--- a/README.rst Wed Oct 17 08:15:19 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,212 +0,0 @@
-Amplicon_analysis-galaxy
-========================
-
-A Galaxy tool wrapper to Mauro Tutino's ``Amplicon_analysis`` pipeline
-script at https://github.com/MTutino/Amplicon_analysis
-
-The pipeline can analyse paired-end 16S rRNA data from Illumina Miseq
-(Casava >= 1.8) and performs the following operations:
-
- * QC and clean up of input data
- * Removal of singletons and chimeras and building of OTU table
- and phylogenetic tree
- * Beta and alpha diversity of analysis
-
-Usage documentation
-===================
-
-Usage of the tool (including required inputs) is documented within
-the ``help`` section of the tool XML.
-
-Installing the tool in a Galaxy instance
-========================================
-
-The following sections describe how to install the tool files,
-dependencies and reference data, and how to configure the Galaxy
-instance to detect the dependencies and reference data correctly
-at run time.
-
-1. Install the tool from the toolshed
--------------------------------------
-
-The core tool is hosted on the Galaxy toolshed, so it can be installed
-directly from there (this is the recommended route):
-
- * https://toolshed.g2.bx.psu.edu/view/pjbriggs/amplicon_analysis_pipeline/
-
-Alternatively it can be installed manually; in this case there are two
-files to install:
-
- * ``amplicon_analysis_pipeline.xml`` (the Galaxy tool definition)
- * ``amplicon_analysis_pipeline.py`` (the Python wrapper script)
-
-Put these in a directory that is visible to Galaxy (e.g. a
-``tools/Amplicon_analysis/`` folder), and modify the ``tools_conf.xml``
-file to tell Galaxy to offer the tool by adding the line e.g.::
-
-
-
-2. Install the reference data
------------------------------
-
-The script ``References.sh`` from the pipeline package at
-https://github.com/MTutino/Amplicon_analysis can be run to install
-the reference data, for example::
-
- cd /path/to/pipeline/data
- wget https://github.com/MTutino/Amplicon_analysis/raw/master/References.sh
- /bin/bash ./References.sh
-
-will install the data in ``/path/to/pipeline/data``.
-
-**NB** The final amount of data downloaded and uncompressed will be
-around 9GB.
-
-3. Configure reference data location in Galaxy
-----------------------------------------------
-
-The final step is to make your Galaxy installation aware of the
-location of the reference data, so it can locate them both when the
-tool is run.
-
-The tool locates the reference data via an environment variable called
-``AMPLICON_ANALYSIS_REF_DATA_PATH``, which needs to set to the parent
-directory where the reference data has been installed.
-
-There are various ways to do this, depending on how your Galaxy
-installation is configured:
-
- * **For local instances:** add a line to set it in the
- ``config/local_env.sh`` file of your Galaxy installation (you
- may need to create a new empty file first), e.g.::
-
- export AMPLICON_ANALYSIS_REF_DATA_PATH=/path/to/pipeline/data
-
- * **For production instances:** set the value in the ``job_conf.xml``
- configuration file, e.g.::
-
-
- /path/to/pipeline/data
-
-
- and then specify that the pipeline tool uses this destination::
-
-
-
- (For more about job destinations see the Galaxy documentation at
- https://docs.galaxyproject.org/en/master/admin/jobs.html#job-destinations)
-
-4. Enable rendering of HTML outputs from pipeline
--------------------------------------------------
-
-To ensure that HTML outputs are displayed correctly in Galaxy
-(for example the Vsearch OTU table heatmaps), Galaxy needs to be
-configured not to sanitize the outputs from the ``Amplicon_analysis``
-tool.
-
-Either:
-
- * **For local instances:** set ``sanitize_all_html = False`` in
- ``config/galaxy.ini`` (nb don't do this on production servers or
- public instances!); or
-
- * **For production instances:** add the ``Amplicon_analysis`` tool
- to the display whitelist in the Galaxy instance:
-
- - Set ``sanitize_whitelist_file = config/whitelist.txt`` in
- ``config/galaxy.ini`` and restart Galaxy;
- - Go to ``Admin>Manage Display Whitelist``, check the box for
- ``Amplicon_analysis`` (hint: use your browser's 'find-in-page'
- search function to help locate it) and click on
- ``Submit new whitelist`` to update the settings.
-
-Additional details
-==================
-
-Some other things to be aware of:
-
- * Note that using the Silva database requires a minimum of 18Gb RAM
-
-Known problems
-==============
-
- * Only the ``VSEARCH`` pipeline in Mauro's script is currently
- available via the Galaxy tool; the ``USEARCH`` and ``QIIME``
- pipelines have yet to be implemented.
- * The images in the tool help section are not visible if the
- tool has been installed locally, or if it has been installed in
- a Galaxy instance which is served from a subdirectory.
-
- These are both problems with Galaxy and not the tool, see
- https://github.com/galaxyproject/galaxy/issues/4490 and
- https://github.com/galaxyproject/galaxy/issues/1676
-
-Appendix: installing the dependencies manually
-==============================================
-
-If the tool is installed from the Galaxy toolshed (recommended) then
-the dependencies should be installed automatically and this step can
-be skipped.
-
-Otherwise the ``install_amplicon_analysis_deps.sh`` script can be used
-to fetch and install the dependencies locally, for example::
-
- install_amplicon_analysis.sh /path/to/local_tool_dependencies
-
-(This is the same script as is used to install dependencies from the
-toolshed.) This can take some time to complete, and when completed will
-have created a directory called ``Amplicon_analysis-1.2.3`` containing
-the dependencies under the specified top level directory.
-
-**NB** The installed dependencies will occupy around 2.6G of disk
-space.
-
-You will need to make sure that the ``bin`` subdirectory of this
-directory is on Galaxy's ``PATH`` at runtime, for the tool to be able
-to access the dependencies - for example by adding a line to the
-``local_env.sh`` file like::
-
- export PATH=/path/to/local_tool_dependencies/Amplicon_analysis-1.2.3/bin:$PATH
-
-History
-=======
-
-========== ======================================================================
-Version Changes
----------- ----------------------------------------------------------------------
-1.2.3.0 Updated to Amplicon_Analysis_Pipeline version 1.2.3; install
- dependencies via tool_dependencies.xml.
-1.2.2.0 Updated to Amplicon_Analysis_Pipeline version 1.2.2 (removes
- jackknifed analysis which is not captured by Galaxy tool)
-1.2.1.0 Updated to Amplicon_Analysis_Pipeline version 1.2.1 (adds
- option to use the Human Oral Microbiome Database v15.1, and
- updates SILVA database to v123)
-1.1.0 First official version on Galaxy toolshed.
-1.0.6 Expand inline documentation to provide detailed usage guidance.
-1.0.5 Updates including:
-
- - Capture read counts from quality control as new output dataset
- - Capture FastQC per-base quality boxplots for each sample as
- new output dataset
- - Add support for -l option (sliding window length for trimming)
- - Default for -L set to "200"
-1.0.4 Various updates:
-
- - Additional outputs are captured when a "Categories" file is
- supplied (alpha diversity rarefaction curves and boxplots)
- - Sample names derived from Fastqs in a collection of pairs
- are trimmed to SAMPLE_S* (for Illumina-style Fastq filenames)
- - Input Fastqs can now be of more general ``fastq`` type
- - Log file outputs are captured in new output dataset
- - User can specify a "title" for the job which is copied into
- the dataset names (to distinguish outputs from different runs)
- - Improved detection and reporting of problems with input
- Metatable
-1.0.3 Take the sample names from the collection dataset names when
- using collection as input (this is now the default input mode);
- collect additional output dataset; disable ``usearch``-based
- pipelines (i.e. ``UPARSE`` and ``QIIME``).
-1.0.2 Enable support for FASTQs supplied via dataset collections and
- fix some broken output datasets.
-1.0.1 Initial version
-========== ======================================================================
diff -r 5ef333d1c303 -r 7b9786a43a16 amplicon_analysis_pipeline.py
--- a/amplicon_analysis_pipeline.py Wed Oct 17 08:15:19 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,343 +0,0 @@
-#!/usr/bin/env python
-#
-# Wrapper script to run Amplicon_analysis_pipeline.sh
-# from Galaxy tool
-
-import sys
-import os
-import argparse
-import subprocess
-import glob
-
-class PipelineCmd(object):
- def __init__(self,cmd):
- self.cmd = [str(cmd)]
- def add_args(self,*args):
- for arg in args:
- self.cmd.append(str(arg))
- def __repr__(self):
- return ' '.join([str(arg) for arg in self.cmd])
-
-def ahref(target,name=None,type=None):
- if name is None:
- name = os.path.basename(target)
- ahref = "%s" % name
- return ahref
-
-def check_errors():
- # Errors in Amplicon_analysis_pipeline.log
- with open('Amplicon_analysis_pipeline.log','r') as pipeline_log:
- log = pipeline_log.read()
- if "Names in the first column of Metatable.txt and in the second column of Final_name.txt do not match" in log:
- print_error("""*** Sample IDs don't match dataset names ***
-
-The sample IDs (first column of the Metatable file) don't match the
-supplied sample names for the input Fastq pairs.
-""")
- # Errors in pipeline output
- with open('pipeline.log','r') as pipeline_log:
- log = pipeline_log.read()
- if "Errors and/or warnings detected in mapping file" in log:
- with open("Metatable_log/Metatable.log","r") as metatable_log:
- # Echo the Metatable log file to the tool log
- print_error("""*** Error in Metatable mapping file ***
-
-%s""" % metatable_log.read())
- elif "No header line was found in mapping file" in log:
- # Report error to the tool log
- print_error("""*** No header in Metatable mapping file ***
-
-Check you've specified the correct file as the input Metatable""")
-
-def print_error(message):
- width = max([len(line) for line in message.split('\n')]) + 4
- sys.stderr.write("\n%s\n" % ('*'*width))
- for line in message.split('\n'):
- sys.stderr.write("* %s%s *\n" % (line,' '*(width-len(line)-4)))
- sys.stderr.write("%s\n\n" % ('*'*width))
-
-def clean_up_name(sample):
- # Remove extensions and trailing "_L[0-9]+_001" from
- # Fastq pair names
- sample_name = '.'.join(sample.split('.')[:1])
- split_name = sample_name.split('_')
- if split_name[-1] == "001":
- split_name = split_name[:-1]
- if split_name[-1].startswith('L'):
- try:
- int(split_name[-1][1:])
- split_name = split_name[:-1]
- except ValueError:
- pass
- return '_'.join(split_name)
-
-def list_outputs(filen=None):
- # List the output directory contents
- # If filen is specified then will be the filename to
- # write to, otherwise write to stdout
- if filen is not None:
- fp = open(filen,'w')
- else:
- fp = sys.stdout
- results_dir = os.path.abspath("RESULTS")
- fp.write("Listing contents of output dir %s:\n" % results_dir)
- ix = 0
- for d,dirs,files in os.walk(results_dir):
- ix += 1
- fp.write("-- %d: %s\n" % (ix,
- os.path.relpath(d,results_dir)))
- for f in files:
- ix += 1
- fp.write("---- %d: %s\n" % (ix,
- os.path.relpath(f,results_dir)))
- # Close output file
- if filen is not None:
- fp.close()
-
-if __name__ == "__main__":
- # Command line
- print "Amplicon analysis: starting"
- p = argparse.ArgumentParser()
- p.add_argument("metatable",
- metavar="METATABLE_FILE",
- help="Metatable.txt file")
- p.add_argument("fastq_pairs",
- metavar="SAMPLE_NAME FQ_R1 FQ_R2",
- nargs="+",
- default=list(),
- help="Triplets of SAMPLE_NAME followed by "
- "a R1/R2 FASTQ file pair")
- p.add_argument("-g",dest="forward_pcr_primer")
- p.add_argument("-G",dest="reverse_pcr_primer")
- p.add_argument("-q",dest="trimming_threshold")
- p.add_argument("-O",dest="minimum_overlap")
- p.add_argument("-L",dest="minimum_length")
- p.add_argument("-l",dest="sliding_window_length")
- p.add_argument("-P",dest="pipeline",
- choices=["vsearch","uparse","qiime"],
- type=str.lower,
- default="vsearch")
- p.add_argument("-S",dest="use_silva",action="store_true")
- p.add_argument("-H",dest="use_homd",action="store_true")
- p.add_argument("-r",dest="reference_data_path")
- p.add_argument("-c",dest="categories_file")
- args = p.parse_args()
-
- # Build the environment for running the pipeline
- print "Amplicon analysis: building the environment"
- metatable_file = os.path.abspath(args.metatable)
- os.symlink(metatable_file,"Metatable.txt")
- print "-- made symlink to Metatable.txt"
-
- # Link to Categories.txt file (if provided)
- if args.categories_file is not None:
- categories_file = os.path.abspath(args.categories_file)
- os.symlink(categories_file,"Categories.txt")
- print "-- made symlink to Categories.txt"
-
- # Link to FASTQs and construct Final_name.txt file
- sample_names = []
- print "-- making Final_name.txt"
- with open("Final_name.txt",'w') as final_name:
- fastqs = iter(args.fastq_pairs)
- for sample_name,fqr1,fqr2 in zip(fastqs,fastqs,fastqs):
- sample_name = clean_up_name(sample_name)
- print " %s" % sample_name
- r1 = "%s_R1_.fastq" % sample_name
- r2 = "%s_R2_.fastq" % sample_name
- os.symlink(fqr1,r1)
- os.symlink(fqr2,r2)
- final_name.write("%s\n" % '\t'.join((r1,sample_name)))
- final_name.write("%s\n" % '\t'.join((r2,sample_name)))
- sample_names.append(sample_name)
-
- # Reference database
- if args.use_silva:
- ref_database = "silva"
- elif args.use_homd:
- ref_database = "homd"
- else:
- ref_database = "gg"
-
- # Construct the pipeline command
- print "Amplicon analysis: constructing pipeline command"
- pipeline = PipelineCmd("Amplicon_analysis_pipeline.sh")
- if args.forward_pcr_primer:
- pipeline.add_args("-g",args.forward_pcr_primer)
- if args.reverse_pcr_primer:
- pipeline.add_args("-G",args.reverse_pcr_primer)
- if args.trimming_threshold:
- pipeline.add_args("-q",args.trimming_threshold)
- if args.minimum_overlap:
- pipeline.add_args("-O",args.minimum_overlap)
- if args.minimum_length:
- pipeline.add_args("-L",args.minimum_length)
- if args.sliding_window_length:
- pipeline.add_args("-l",args.sliding_window_length)
- if args.reference_data_path:
- pipeline.add_args("-r",args.reference_data_path)
- pipeline.add_args("-P",args.pipeline)
- if ref_database == "silva":
- pipeline.add_args("-S")
- elif ref_database == "homd":
- pipeline.add_args("-H")
-
- # Echo the pipeline command to stdout
- print "Running %s" % pipeline
-
- # Run the pipeline
- with open("pipeline.log","w") as pipeline_out:
- try:
- subprocess.check_call(pipeline.cmd,
- stdout=pipeline_out,
- stderr=subprocess.STDOUT)
- exit_code = 0
- print "Pipeline completed ok"
- except subprocess.CalledProcessError as ex:
- # Non-zero exit status
- sys.stderr.write("Pipeline failed: exit code %s\n" %
- ex.returncode)
- exit_code = ex.returncode
- except Exception as ex:
- # Some other problem
- sys.stderr.write("Unexpected error: %s\n" % str(ex))
- exit_code = 1
-
- # Write out the list of outputs
- outputs_file = "Pipeline_outputs.txt"
- list_outputs(outputs_file)
-
- # Check for log file
- log_file = "Amplicon_analysis_pipeline.log"
- if os.path.exists(log_file):
- print "Found log file: %s" % log_file
- if exit_code == 0:
- # Create an HTML file to link to log files etc
- # NB the paths to the files should be correct once
- # copied by Galaxy on job completion
- with open("pipeline_outputs.html","w") as html_out:
- html_out.write("""
-
-Amplicon analysis pipeline: log files
-
-
-
-""")
- # Look for raw and trimmed FastQC output for each sample
- for sample_name in sample_names:
- fastqc_dir = os.path.join(sample_name,"FastQC")
- quality_boxplots.write("
%s
" % sample_name)
- for d in ("Raw","cutdapt_sickle/Q%s" % phred_score):
- quality_boxplots.write("
%s
" % d)
- fastqc_html_files = glob.glob(
- os.path.join(fastqc_dir,d,"*_fastqc.html"))
- if not fastqc_html_files:
- quality_boxplots.write("
No FastQC outputs found
")
- continue
- # Pull out the per-base quality boxplots
- for f in fastqc_html_files:
- boxplot = None
- with open(f) as fp:
- for line in fp.read().split(">"):
- try:
- line.index("alt=\"Per base quality graph\"")
- boxplot = line + ">"
- break
- except ValueError:
- pass
- if boxplot is None:
- boxplot = "Missing plot"
- quality_boxplots.write("
%s
%s
" %
- (os.path.basename(f),
- boxplot))
- quality_boxplots.write("""
-
-""")
-
- # Handle additional output when categories file was supplied
- if args.categories_file is not None:
- # Alpha diversity boxplots
- print "Amplicon analysis: indexing alpha diversity boxplots"
- boxplots_dir = os.path.abspath(
- os.path.join("RESULTS",
- "%s_%s" % (args.pipeline.title(),
- ref_database),
- "Alpha_diversity",
- "Alpha_diversity_boxplot",
- "Categories_shannon"))
- print "Amplicon analysis: gathering PDFs from %s" % boxplots_dir
- boxplot_pdfs = [os.path.basename(pdf)
- for pdf in
- sorted(glob.glob(
- os.path.join(boxplots_dir,"*.pdf")))]
- with open("alpha_diversity_boxplots.html","w") as boxplots_out:
- boxplots_out.write("""
-
-Amplicon analysis pipeline: Alpha Diversity Boxplots (Shannon)
-
-
-
\n")
- for pdf in boxplot_pdfs:
- boxplots_out.write("
%s
\n" % ahref(pdf))
- boxplots_out.write("
\n")
- boxplots_out.write("""
-
-""")
-
- # Finish
- print "Amplicon analysis: finishing, exit code: %s" % exit_code
- sys.exit(exit_code)
diff -r 5ef333d1c303 -r 7b9786a43a16 amplicon_analysis_pipeline.xml
--- a/amplicon_analysis_pipeline.xml Wed Oct 17 08:15:19 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,473 +0,0 @@
-
- analyse 16S rRNA data from Illumina Miseq paired-end reads
-
- amplicon_analysis_pipeline
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- categories_file_in is not None
-
-
-
-
-
- = 1.8) paired-end reads.
-
-Usage
------
-
-1. Preparation of the mapping file and format of unique sample id
-*****************************************************************
-
-Before using the amplicon analysis pipeline it would be necessary to
-follow the steps as below to avoid analysis failures and ensure samples
-are labelled appropriately. Sample names for the labelling are derived
-from the fastq files names that are generated from the sequencing. The
-labels will include everything between the beginning of the name and
-the sample number (from C11 to S19 in Fig. 1)
-
-.. image:: Pipeline_description_Fig1.png
- :height: 46
- :width: 382
-
-**Figure 1**
-
-If analysing 16S data from multiple runs:
-
-The samples from different runs may have identical IDs. For example,
-when sequencing the same samples twice, by chance, these could be at
-the same position in both the runs. This would cause the fastq files
-to have exactly the same IDs (Fig. 2).
-
-.. image:: Pipeline_description_Fig2.png
- :height: 100
- :width: 463
-
-**Figure 2**
-
-In case of identical sample IDs the pipeline will fail to run and
-generate an error at the beginning of the analysis.
-
-To avoid having to change the file names, before uploading the files,
-ensure that the samples IDs are not repeated.
-
-2. To upload the file
-*********************
-
-Click on **Get Data/Upload File** from the Galaxy tool panel on the
-left hand side.
-
-From the pop-up window, choose how to upload the file. The
-**Choose local file** option can be used for files up to 4Gb. Fastq files
-from Illumina MiSeq will rarely be bigger than 4Gb and this option is
-recommended.
-
-After choosing the files click **Start** to begin the upload. The window can
-now be closed and the files will be uploaded onto the Galaxy server. You
-will see the progress on the ``HISTORY`` panel on the right
-side of the screen. The colour will change from grey (queuing), to yellow
-(uploading) and finally green (uploaded).
-
-Once all the files are uploaded, click on the operations on multiple
-datasets icon and select the fastq files that need to be analysed.
-Click on the tab **For all selected...** and on the option
-**Build List of Dataset pairs** (Fig. 3).
-
-.. image:: Pipeline_description_Fig3.png
- :height: 247
- :width: 586
-
-**Figure 3**
-
-Change the filter parameter ``_1`` and ``_2`` to be ``_R1`` and ``_R2``.
-The fastq files forward R1 and reverse R2 should now appear in the
-corresponding columns.
-
-Select **Autopair**. This creates a collection of paired fastq files for
-the forward and reverse reads for each sample. The name of the pairs will
-be the ones used by the pipeline. You are free to change the names at this
-point as long as they are the same used in the Metatable file
-(see section 3).
-
-Name the collection and click on **create list**. This reduces the time
-required to input the forward and reverse reads for each individual sample.
-
-3. Create the Metatable files
-*****************************
-
-Metatable.txt
-~~~~~~~~~~~~~
-
-Click on the list of pairs you just created to see the name of the single
-pairs. The name of the pairs will be the ones used by the pipeline,
-therefore, these are the names that need to be used in the Metatable file.
-
-The Metatable file has to be in QIIME format. You can find a description
-of it on QIIME website http://qiime.org/documentation/file_formats.html
-
-EXAMPLE::
-
- #SampleID BarcodeSequence LinkerPrimerSequence Disease Gender Description
- Mock-RUN1 TAAGGCGAGCGTAAGA PsA Male Control
- Mock-RUN2 CGTACTAGGCGTAAGA PsA Male Control
- Mock-RUN3 AGGCAGAAGCGTAAGA PsC Female Control
-
-Briefly: the column ``LinkerPrimerSequence`` is empty but it cannot be
-deleted. The header is very important. ``#SampleID``, ``Barcode``,
-``LinkerPrimerSequence`` and ``Description`` are mandatory. Between
-``LinkerPrimerSequence`` and ``Description`` you can add as many columns
-as you want. For every column a PCoA plot will be created (see
-**Results** section). You can create this file in Excel and it will have
-to be saved as ``Text(Tab delimited)``.
-
-During the analysis the Metatable.txt will be checked to ensure that the
-file has the correct format. If necessary, this will be modified and will
-be available as Metatable_corrected.txt in the history panel. If you are
-going to use the metatable file for any other statistical analyses,
-remember to use the ``Metatable_mod.txt`` one, otherwise the sample
-names might not match!
-
-Categories.txt (optional)
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This file is required if you want to get box plots for comparison of
-alpha diversity indices (see **Results** section). The file is a list
-(without header and IN ONE COLUMN) of categories present in the
-Metatable.txt file. THE NAMES YOU ARE USING HAVE TO BE THE SAME AS THE
-ONES USED IN THE METATABLE.TXT. You can create this file in Excel and
-will have to be saved as ``Text(Tab delimited)``.
-
-EXAMPLE::
-
- Disease
- Gender
-
-Metatable and categories files can be uploaded using Get Data as done
-with the fatsq files.
-
-4. Analysis
-***********
-
-Under **Amplicon_Analysis_Pipeline**
-
- * **Title** Name to distinguish between the runs. It will be shown at
- the beginning of each output file name.
-
- * **Input Metatable.txt file** Select the Metatable.txt file related to
- this analysis
-
- * **Input Categories.txt file (Optional)** Select the Categories.txt file
- related to this analysis
-
- * **Input FASTQ type** select *Dataset pairs in a collection* and, then,
- the collection of pairs you created earlier.
-
- * **Forward/Reverse PCR primer sequence** if the PCR primer sequences
- have not been removed from the MiSeq during the fastq creation, they
- have to be removed before the analysis. Insert the PCR primer sequence
- in the corresponding field. DO NOT include any barcode or adapter
- sequence. If the PCR primers have been already trimmed by the MiSeq,
- and you include the sequence in this field, this would lead to an error.
- Only include the sequences if still present in the fastq files.
-
- * **Threshold quality below which reads will be trimmed** Choose the
- Phred score used by Sickle to trim the reads at the 3’ end.
-
- * **Minimum length to retain a read after trimming** If the read length
- after trimming is shorter than a user defined length, the read, along
- with the corresponding read pair, will be discarded.
-
- * **Minimum overlap in bp between forward and reverse reads** Choose the
- minimum basepair overlap used by Pandaseq to assemble the reads.
- Default is 10.
-
- * **Minimum length in bp to keep a sequence after overlapping** Choose the
- minimum sequence length used by Pandaseq to keep a sequence after the
- overlapping. This depends on the expected amplicon length. Default is
- 380 (used for V3-V4 16S sequencing; expected length ~440bp)
-
- * **Pipeline to use for analysis** Choose the pipeline to use for OTU
- clustering and chimera removal. The Galaxy tool currently supports
- ``Vsearch`` only. ``Uparse`` and ``QIIME`` are planned to be added
- shortly (the tools are already available for the stand-alone pipeline).
-
- * **Reference database** Choose between ``GreenGenes`` and ``Silva``
- databases for taxa assignment.
-
-Click on **Execute** to start the analysis.
-
-5. Results
-**********
-
-Results are entirely generated using QIIME scripts. The results will
-appear in the History panel when the analysis is completed
-
- * **Vsearch_tax_OTU_table (biom format)** The OTU table in BIOM format
- (http://biom-format.org/)
-
- * **Vsearch_OTUs.tree** Phylogenetic tree constructed using
- ``make_phylogeny.py`` (fasttree) QIIME script
- (http://qiime.org/scripts/make_phylogeny.html)
-
- * **Vsearch_phylum_genus_dist_barcharts_HTML** HTML file with bar
- charts at Phylum, Genus and Species level
- (http://qiime.org/scripts/summarize_taxa.html and
- http://qiime.org/scripts/plot_taxa_summary.html)
-
- * **Vsearch_OTUs_count_file** Summary of OTU counts per sample
- (http://biom-format.org/documentation/summarizing_biom_tables.html)
-
- * **Vsearch_table_summary_file** Summary of sequences counts per sample
- (http://biom-format.org/documentation/summarizing_biom_tables.html)
-
- * **Vsearch_multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta**
- Fasta file with OTU sequences
-
- * **Vsearch_heatmap_OTU_table_HTML** Interactive OTU heatmap
- (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html )
-
- * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML
- format using weighted Unifrac distance measure. Samples are grouped
- by the column names present in the Metatable file. The samples are
- firstly rarefied to the minimum sequencing depth
- (http://qiime.org/scripts/beta_diversity_through_plots.html )
-
- * **Vsearch_beta_diversity_unweighted_2D_plots_HTML** PCoA plots in HTML
- format using Unweighted Unifrac distance measure. Samples are grouped
- by the column names present in the Metatable file. The samples are
- firstly rarefied to the minimum sequencing depth
- (http://qiime.org/scripts/beta_diversity_through_plots.html )
-
-Code availability
------------------
-
-**Code is available at** https://github.com/MTutino/Amplicon_analysis
-
-Credits
--------
-
-Pipeline author: Mauro Tutino
-
-Galaxy tool: Peter Briggs
-
- ]]>
-
-
- @misc{githubAmplicon_analysis,
- author = {Tutino, Mauro},
- year = {2017},
- title = {Amplicon Analysis Pipeline},
- publisher = {GitHub},
- journal = {GitHub repository},
- url = {https://github.com/MTutino/Amplicon_analysis},
-}
-
-
diff -r 5ef333d1c303 -r 7b9786a43a16 install_amplicon_analysis.sh
--- a/install_amplicon_analysis.sh Wed Oct 17 08:15:19 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,425 +0,0 @@
-#!/bin/sh -e
-#
-# Prototype script to setup a conda environment with the
-# dependencies needed for the Amplicon_analysis_pipeline
-# script
-#
-# Handle command line
-usage()
-{
- echo "Usage: $(basename $0) [DIR]"
- echo ""
- echo "Installs the Amplicon_analysis_pipeline package plus"
- echo "dependencies in directory DIR (or current directory "
- echo "if DIR not supplied)"
-}
-if [ ! -z "$1" ] ; then
- # Check if help was requested
- case "$1" in
- --help|-h)
- usage
- exit 0
- ;;
- esac
- # Assume it's the installation directory
- cd $1
-fi
-# Versions
-PIPELINE_VERSION=1.2.3
-RDP_CLASSIFIER_VERSION=2.2
-# Directories
-TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}
-BIN_DIR=${TOP_DIR}/bin
-CONDA_DIR=${TOP_DIR}/conda
-CONDA_BIN=${CONDA_DIR}/bin
-CONDA_LIB=${CONDA_DIR}/lib
-CONDA=${CONDA_BIN}/conda
-ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"
-ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME
-#
-# Functions
-#
-# Report failure and terminate script
-fail()
-{
- echo ""
- echo ERROR $@ >&2
- echo ""
- echo "$(basename $0): installation failed"
- exit 1
-}
-#
-# Rewrite the shebangs in the installed conda scripts
-# to remove the full path to conda 'bin' directory
-rewrite_conda_shebangs()
-{
- pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"
- find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \;
-}
-#
-# Install conda
-install_conda()
-{
- echo "++++++++++++++++"
- echo "Installing conda"
- echo "++++++++++++++++"
- if [ -e ${CONDA_DIR} ] ; then
- echo "*** $CONDA_DIR already exists ***" >&2
- return
- fi
- local cwd=$(pwd)
- local wd=$(mktemp -d)
- cd $wd
- wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
- bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}
- echo Installed conda in ${CONDA_DIR}
- # Update the installation files
- # This is to avoid problems when the length the installation
- # directory path exceeds the limit for the shebang statement
- # in the conda files
- echo ""
- echo -n "Rewriting conda shebangs..."
- rewrite_conda_shebangs
- echo "ok"
- echo -n "Adding conda bin to PATH..."
- PATH=${CONDA_BIN}:$PATH
- echo "ok"
- cd $cwd
- rm -rf $wd/*
- rmdir $wd
-}
-#
-# Create conda environment
-install_conda_packages()
-{
- echo "+++++++++++++++++++++++++"
- echo "Installing conda packages"
- echo "+++++++++++++++++++++++++"
- local cwd=$(pwd)
- local wd=$(mktemp -d)
- cd $wd
- cat >environment.yml <${BIN_DIR}/Amplicon_analysis_pipeline.sh <${BIN_DIR}/install_reference_data.sh <${BIN_DIR}/ChimeraSlayer.pl <INSTALL.log 2>&1
- echo "ok"
- cd R-3.2.1
- echo -n "Running configure..."
- ./configure --prefix=$INSTALL_DIR --with-x=no --with-readline=no >>INSTALL.log 2>&1
- echo "ok"
- echo -n "Running make..."
- make >>INSTALL.log 2>&1
- echo "ok"
- echo -n "Running make install..."
- make install >>INSTALL.log 2>&1
- echo "ok"
- cd $cwd
- rm -rf $wd/*
- rmdir $wd
- . ${CONDA_BIN}/deactivate
-}
-setup_pipeline_environment()
-{
- echo "+++++++++++++++++++++++++++++++"
- echo "Setting up pipeline environment"
- echo "+++++++++++++++++++++++++++++++"
- # vsearch113
- echo -n "Setting up vsearch113..."
- if [ -e ${BIN_DIR}/vsearch113 ] ; then
- echo "already exists"
- elif [ ! -e ${ENV_DIR}/bin/vsearch ] ; then
- echo "failed"
- fail "vsearch not found"
- else
- ln -s ${ENV_DIR}/bin/vsearch ${BIN_DIR}/vsearch113
- echo "ok"
- fi
- # fasta_splitter.pl
- echo -n "Setting up fasta_splitter.pl..."
- if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then
- echo "already exists"
- elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then
- echo "failed"
- fail "fasta-splitter.pl not found"
- else
- ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl
- echo "ok"
- fi
- # rdp_classifier.jar
- local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
- echo -n "Setting up rdp_classifier.jar..."
- if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then
- echo "already exists"
- elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then
- echo "failed"
- fail "rdp_classifier.jar not found"
- else
- mkdir -p ${TOP_DIR}/share/rdp_classifier
- ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}
- echo "ok"
- fi
- # qiime_config
- echo -n "Setting up qiime_config..."
- if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then
- echo "already exists"
- else
- mkdir -p ${TOP_DIR}/qiime
- cat >${TOP_DIR}/qiime/qiime_config <
-
-
-
-
- https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/master/install_amplicon_analysis.sh
-
- sh ./install_amplicon_analysis.sh $INSTALL_DIR
-
-
- $INSTALL_DIR/Amplicon_analysis-1.2.3/bin
-
-
-
-
-