view confil/confil.py @ 4:eaf51f9aff10 draft

planemo upload for repository https://github.com/COMBAT-TB/confil commit 9e5284a2616de5d869319f9c4436716a3f6656f8-dirty
author sanbi-uwc
date Mon, 04 Mar 2019 03:28:57 -0500
parents 53a61865e86e
children 2b90d0574ea5
line wrap: on
line source

import os

import click

from kraken import kraken_installed, run_kraken

# TODO: Remove
KRAKEN2_DEFAULT_DB = "/tools/databases/kraken2/04092018/standard/"
OUT_DIR = os.path.abspath(os.curdir)
fastq_file_extensions = ['.fq', '.fastq']


@click.command()
@click.option('--db', default=OUT_DIR, required=True,
              help='Name for Kraken 2 DB', type=click.Path(exists=True),
              show_default=True)
@click.option('--threads', default=1, help='Number of threads',
              show_default=True)
@click.option('--cutoff', default=50, show_default=True,
              help='Percentage of fragments covered')
@click.option('--paired', is_flag=True,
              help='The filenames provided have paired-end reads')
@click.argument('seqfiles', nargs=-1, required=True)
def confil(db, threads, cutoff, paired, seqfiles):
    """
    Checks sequence for contamination using specified cutoff.
    """
    if kraken_installed():
        seqfiles = [os.path.abspath(seqfile) for seqfile in seqfiles]
        if len(seqfiles) > 2 and not paired:
            raise ValueError(
                "Expecting no more than 2 FASTQ files. We got {}.\n{}".format(
                    len(seqfiles), seqfiles))
        if paired and len(seqfiles) < 2:
            raise ValueError(
                "Expecting 2 paired FASTQ files. We got {}.\n{}".format(
                    len(seqfiles), seqfiles))
        click.secho('Using a cutoff of {}% for contamination!\n'.format(
            cutoff), fg='green')
        # run kraken and read/parse report
        run_kraken(db=db, threads=threads, cutoff=cutoff,
                   paired=paired, seqfiles=seqfiles)


if __name__ == '__main__':
    confil()