# HG changeset patch # User sanbi-uwc # Date 1552303114 14400 # Node ID 5ec97cccb3feac9959d0e62578b59fb19da94253 # Parent 72cb19a32d8b01a7b485ff22f58f000cace9d9b9 planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2 diff -r 72cb19a32d8b -r 5ec97cccb3fe conda/meta.yaml --- a/conda/meta.yaml Fri Mar 08 13:55:10 2019 -0500 +++ b/conda/meta.yaml Mon Mar 11 07:18:34 2019 -0400 @@ -1,5 +1,5 @@ -{% set version = "0.1.3" %} -{% set sha256 = "8ae72d4c136edff4e755462f6c57c903edb8479ea68d6f0ccdb38f6c56e42d04" %} +{% set version = "0.1.4" %} +{% set sha256 = "1ebbb14e64e057d6b58f4942a7b1dbf9e53f4ab741709b0ecfcb8d358fb8848e" %} package: name: confil @@ -11,7 +11,7 @@ build: script: "{{ PYTHON }} -m pip install . --no-deps -vv" - number: 1 + number: 0 requirements: build: diff -r 72cb19a32d8b -r 5ec97cccb3fe confil.egg-info/PKG-INFO --- a/confil.egg-info/PKG-INFO Fri Mar 08 13:55:10 2019 -0500 +++ b/confil.egg-info/PKG-INFO Mon Mar 11 07:18:34 2019 -0400 @@ -1,9 +1,12 @@ -Metadata-Version: 2.1 +Metadata-Version: 1.0 Name: confil -Version: 0.1.3.dev20190307 +Version: 0.1.2.dev20190304 Summary: Contamination filter Home-page: https://github.com/COMBAT-TB/confil +Author: UNKNOWN +Author-email: UNKNOWN License: UNKNOWN +Description-Content-Type: text/markdown Description: # confil [![Build Status](https://travis-ci.org/COMBAT-TB/confil.svg?branch=master)](https://travis-ci.org/COMBAT-TB/confil) @@ -33,4 +36,3 @@ Keywords: contamination,filter Platform: UNKNOWN -Description-Content-Type: text/markdown diff -r 72cb19a32d8b -r 5ec97cccb3fe confil.egg-info/SOURCES.txt --- a/confil.egg-info/SOURCES.txt Fri Mar 08 13:55:10 2019 -0500 +++ b/confil.egg-info/SOURCES.txt Mon Mar 11 07:18:34 2019 -0400 @@ -3,10 +3,13 @@ setup.py confil/__init__.py confil/confil.py +confil/kraken.py +confil/report.py confil.egg-info/PKG-INFO confil.egg-info/SOURCES.txt confil.egg-info/dependency_links.txt confil.egg-info/entry_points.txt confil.egg-info/requires.txt confil.egg-info/top_level.txt +test/test_report.py test/test_runner.py \ No newline at end of file diff -r 72cb19a32d8b -r 5ec97cccb3fe confil.xml --- a/confil.xml Fri Mar 08 13:55:10 2019 -0500 +++ b/confil.xml Mon Mar 11 07:18:34 2019 -0400 @@ -1,6 +1,6 @@ - + - 0.1.3 + 0.1.4 confil @@ -20,9 +20,10 @@ $input_type_conditional.single_input.element_identifier ; then ln -sf "${input_type_conditional.single_input}" '$single_output_file' ; - else - touch '$single_output_file' ; - fi + else + touch '$single_output_file' ; + fi + ####### Paired Collection #elif $input_type == "paired_collection" ln -s "${input_type_conditional.collection_input.forward}" $input_type_conditional.collection_input.forward.element_identifier && @@ -31,15 +32,16 @@ #set report_name = re.sub('_[0-9]+$', '', str(report_name)) + '.tab' if confil --threads \${GALAXY_SLOTS:-1} --cutoff $cutoff --paired - $input_type_conditional.collection_input.forward.element_identifier $input_type_conditional.collection_input.reverse.element_identifier ; - then - ln -sf "${input_type_conditional.collection_input.forward}" '$list_output.forward' - && ln -sf "${input_type_conditional.collection_input.reverse}" '$list_output.reverse' ; - else - touch '$list_output.forward' && touch '$list_output.reverse' ; - fi + $input_type_conditional.collection_input.forward.element_identifier $input_type_conditional.collection_input.reverse.element_identifier ; + then + ln -sf "${input_type_conditional.collection_input.forward}" '$list_output.forward' + && ln -sf "${input_type_conditional.collection_input.reverse}" '$list_output.reverse' ; + else + touch '$list_output.forward' && touch '$list_output.reverse' ; + fi #end if && mv $report_name '$output_report' + ]]> @@ -58,11 +60,11 @@ - - + + input_type_conditional['input_type'] == "single" - + input_type_conditional['input_type'] == "paired_collection" diff -r 72cb19a32d8b -r 5ec97cccb3fe confil/.confil.py.swp Binary file confil/.confil.py.swp has changed diff -r 72cb19a32d8b -r 5ec97cccb3fe confil/__init__.pyc Binary file confil/__init__.pyc has changed diff -r 72cb19a32d8b -r 5ec97cccb3fe confil/confil.py --- a/confil/confil.py Fri Mar 08 13:55:10 2019 -0500 +++ b/confil/confil.py Mon Mar 11 07:18:34 2019 -0400 @@ -1,11 +1,9 @@ -import distutils.spawn import os -import re -from shlex import split -from subprocess import PIPE, Popen import click +from .kraken import kraken_installed, run_kraken + # TODO: Remove KRAKEN2_DEFAULT_DB = "/tools/databases/kraken2/04092018/standard/" OUT_DIR = os.path.abspath(os.curdir) @@ -19,88 +17,6 @@ return OUT_DIR -def run_kraken(db, threads, cutoff, paired, seqfiles): - # Using the sample name to track report - seq_name = [os.path.splitext(os.path.basename(seq))[0] - for seq in seqfiles][0] - # remove _ and numbers - seq_name = re.sub('_[0-9]+$', '', seq_name) - # building cmd - cmd = "kraken2 --threads {threads} --db {db} --output {seq_name}.out --report {seq_name}.tab ".format( - threads=threads, db=db, seq_name=seq_name) - if paired: - cmd += "--paired --classified-out {}_cseqs#.fq ".format(seq_name) - cmd += "{seqfiles}".format(seqfiles=' '.join(seqfiles)) - click.secho("Executing kraken2: \n{}\n".format( - split(cmd)), fg='bright_yellow') - - # TODO: remove - # test_file = "https://raw.githubusercontent.com/COMBAT-TB/confil/master/test/test_data/test_file.tab" - # out_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name)) - # mock_cmd = 'wget {} -O {}'.format(test_file, out_file) - # cmd = mock_cmd - # click.secho("Executing mock_cmd: \n{}\n".format(split(cmd)), fg='red') - - p = Popen(split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True) - while True: - output = p.stdout.readline() - if output == '' and p.poll() is not None: - break - if output: - click.echo(output) - returncode = p.poll() - if returncode != 0: - error = p.stderr.readline() - raise OSError("Kraken2 launch error:\n{}\n".format(error)) - # parse kraken report - report_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name)) - parse_report(report_file=report_file, cutoff=cutoff) - return returncode - - -def parse_report(report_file, cutoff): - file_name = os.path.splitext(os.path.basename(report_file))[0] - hit = None - if os.stat(report_file).st_size > 0 and report_file.endswith(".tab"): - click.secho("Processing {} with cutoff of {}...\n".format( - report_file, cutoff), fg='green') - with open(report_file, 'r') as report: - for line in report: - line = [str(e).strip() for e in line.split('\t')] - if len(line) > 1: - click.secho('{}'.format(line), fg='green') - # Percentage of fragments covered by the clade rooted at this taxon - percentage = int(float(line[0])) - # Number of fragments covered by the clade rooted at this taxon - # num_covered = int(float(line[1])) - # Number of fragments assigned directly to this taxon - # num_assigned = int(float(line[2])) - # NCBI taxonomic ID number - # ncbi_tax = int(float(line[3])) - # Indented scientific name (Mycobacterium\n) - name = str(line[5]).strip() - if percentage < cutoff and 'Mycobacterium' in name: - click.secho('\n{}%: {} is contaminated!\n'.format( - percentage, file_name), fg='red') - raise SystemExit('{}%: {} is contaminated!\n'.format( - percentage, file_name)) - if percentage >= cutoff and 'Mycobacterium' in name: - click.secho('\n{}%: {} is not contaminated!\n'.format( - percentage, file_name), fg='green') - hit = line - break - click.secho('Hit: {}'.format(hit), fg='green') - return hit - - -def kraken_installed(): - # check if `kraken2` is in path - installed = distutils.spawn.find_executable("kraken2") - if not installed: - raise OSError("kraken2 is not installed.") - return installed - - @click.command() @click.option('--db', default=db_path(), required=True, help='Name for Kraken 2 DB', type=click.Path(exists=True), diff -r 72cb19a32d8b -r 5ec97cccb3fe confil/confil.pyc Binary file confil/confil.pyc has changed diff -r 72cb19a32d8b -r 5ec97cccb3fe confil/kraken.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/confil/kraken.py Mon Mar 11 07:18:34 2019 -0400 @@ -0,0 +1,58 @@ +import distutils.spawn +import os +import re +from shlex import split +from subprocess import PIPE, Popen + +import click + +from .report import parse_report + +OUT_DIR = os.path.abspath(os.curdir) + + +def kraken_installed(): + # check if `kraken2` is in path + installed = distutils.spawn.find_executable("kraken2") + if not installed: + raise OSError("kraken2 is not installed.") + return installed + + +def run_kraken(db, threads, cutoff, paired, seqfiles): + # Using the sample name to track report + seq_name = [os.path.splitext(os.path.basename(seq))[0] + for seq in seqfiles][0] + # remove _ and numbers + seq_name = re.sub('_[0-9]+$', '', seq_name) + # building cmd + cmd = "kraken2 --threads {threads} --db {db} --output {seq_name}.out --report {seq_name}.tab ".format( + threads=threads, db=db, seq_name=seq_name) + if paired: + cmd += "--paired --classified-out {}_cseqs#.fq ".format(seq_name) + cmd += "{seqfiles}".format(seqfiles=' '.join(seqfiles)) + click.secho("Executing kraken2: \n{}\n".format( + split(cmd)), fg='bright_yellow') + + # TODO: remove + # test_file = "https://raw.githubusercontent.com/COMBAT-TB/confil/master/test/test_data/test_file.tab" + # out_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name)) + # mock_cmd = 'wget {} -O {}'.format(test_file, out_file) + # cmd = mock_cmd + # click.secho("Executing mock_cmd: \n{}\n".format(split(cmd)), fg='red') + + p = Popen(split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True) + while True: + output = p.stdout.readline() + if output == '' and p.poll() is not None: + break + if output: + click.echo(output) + returncode = p.poll() + if returncode != 0: + error = p.stderr.readline() + raise OSError("Kraken2 launch error:\n{}\n".format(error)) + # parse kraken report + report_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name)) + parse_report(report_file=report_file, cutoff=cutoff) + return returncode diff -r 72cb19a32d8b -r 5ec97cccb3fe confil/kraken.pyc Binary file confil/kraken.pyc has changed diff -r 72cb19a32d8b -r 5ec97cccb3fe confil/report.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/confil/report.py Mon Mar 11 07:18:34 2019 -0400 @@ -0,0 +1,48 @@ +import os + +import click + + +def parse_report(report_file, cutoff): + file_name = os.path.splitext(os.path.basename(report_file))[0] + hit = None + try: + rpt_file = open(report_file, 'r') + except OSError as e: + raise OSError( + "{} does not exist!\n{}".format(report_file, e)) + else: + click.secho("Processing {} with cutoff of {}...\n".format( + report_file, cutoff), fg='green') + with rpt_file as report: + for line in report: + line = [str(e).strip() for e in line.split('\t')] + if len(line) > 1: + click.secho('{}'.format(line), fg='green') + # Percentage of fragments covered by the clade rooted at this taxon + percentage = int(float(line[0])) + # Number of fragments covered by the clade rooted at this taxon + # num_covered = int(float(line[1])) + # Number of fragments assigned directly to this taxon + # num_assigned = int(float(line[2])) + # NCBI taxonomic ID number + # ncbi_tax = int(float(line[3])) + # Indented scientific name (Mycobacterium\n) + name = str(line[5]).strip() + if percentage >= cutoff and name == 'unclassified': + click.secho('\n{}%: {} is unclassified!\n'.format( + percentage, file_name), fg='red') + raise SystemExit('{}%: {} is unclassified!\n'.format( + percentage, file_name)) + if percentage < cutoff and 'Mycobacterium' in name: + click.secho('\n{}%: {} is contaminated!\n'.format( + percentage, file_name), fg='red') + raise SystemExit('{}%: {} is contaminated!\n'.format( + percentage, file_name)) + if percentage >= cutoff and 'Mycobacterium' in name: + click.secho('\n{}%: {} is not contaminated!\n'.format( + percentage, file_name), fg='green') + hit = line + break + click.secho('Hit: {}'.format(hit), fg='green') + return hit diff -r 72cb19a32d8b -r 5ec97cccb3fe confil/report.pyc Binary file confil/report.pyc has changed diff -r 72cb19a32d8b -r 5ec97cccb3fe setup.py --- a/setup.py Fri Mar 08 13:55:10 2019 -0500 +++ b/setup.py Mon Mar 11 07:18:34 2019 -0400 @@ -5,7 +5,7 @@ setup( name='confil', - version='0.1.3', + version='0.1.4', url='https://github.com/COMBAT-TB/confil', description='Contamination filter', long_description=long_description, @@ -14,6 +14,14 @@ py_modules=['confil'], packages=find_packages(), include_package_data=True, + license="GPLv3", + classifiers=[ + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + 'Programming Lavnguage :: Python', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + ], install_requires=[ 'click' ], diff -r 72cb19a32d8b -r 5ec97cccb3fe test/test_report.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test_report.py Mon Mar 11 07:18:34 2019 -0400 @@ -0,0 +1,25 @@ +import os + +import pytest + +from confil.report import parse_report +from test_runner import TEST_DATA_DIR + +TEST_REPORT = os.path.join(TEST_DATA_DIR, "test_file.tab") + +# test using a cutoff of 50% + + +@pytest.mark.parametrize("test_input, expected", [ + (type(parse_report(TEST_REPORT, 50)), list), + (parse_report(TEST_REPORT, 50)[5], 'Mycobacterium'), + (parse_report(TEST_REPORT, 50)[0], '55.84') +]) +def test_parse_report(test_input, expected): + assert test_input == expected + + +def test_parse_report_exception(): + with pytest.raises(SystemExit): + parse_report(TEST_REPORT, 90) + parse_report(TEST_REPORT, 20) diff -r 72cb19a32d8b -r 5ec97cccb3fe test/test_runner.py --- a/test/test_runner.py Fri Mar 08 13:55:10 2019 -0500 +++ b/test/test_runner.py Mon Mar 11 07:18:34 2019 -0400 @@ -3,7 +3,7 @@ import pytest from click.testing import CliRunner -from confil.confil import confil, parse_report +from confil.confil import confil CURR_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA_DIR = os.path.join(CURR_DIR, "test_data/") @@ -22,17 +22,3 @@ def test_runner(cli_runner): result = cli_runner.invoke(confil, ["--paired", FILE_1, FILE_2]) assert result.exit_code == 0 - - -@pytest.mark.parametrize("test_input, expected", [ - (type(parse_report(TEST_REPORT, 50)), list), - (parse_report(TEST_REPORT, 50)[5], 'Mycobacterium'), - (parse_report(TEST_REPORT, 50)[0], '55.84') -]) -def test_parse_report(test_input, expected): - assert test_input == expected - - -def test_parse_report_exception(): - with pytest.raises(SystemExit): - parse_report(TEST_REPORT, 90)