Mercurial > repos > sanbi-uwc > confil

--- a/conda/meta.yaml	Mon Mar 04 07:42:05 2019 -0500
+++ b/conda/meta.yaml	Tue Mar 05 03:49:38 2019 -0500
@@ -3,8 +3,8 @@
   version: {{ environ['VERSION'] }}

 source:
-  fn: confil-0.0.1.tar.gz
-  url: https://github.com/COMBAT-TB/confil/archive/0.0.1.tar.gz
+  fn: confil-0.1.3.tar.gz
+  url: https://github.com/COMBAT-TB/confil/archive/0.1.3.tar.gz

 build:
   script_env:
--- a/confil.xml	Mon Mar 04 07:42:05 2019 -0500
+++ b/confil.xml	Tue Mar 05 03:49:38 2019 -0500
@@ -1,6 +1,8 @@
-<tool id="confil" name="Contamination Filter (confil)" version="0.1.2">
+<tool id="confil" name="Contamination Filter (confil)" version="0.1.3">
     <requirements>
         <requirement type="package" version="7.0">click</requirement>
+        <requirement type="package" version="2.0.7_beta">kraken2</requirement>
+        <requirement type="package" version="dev20190305">confil</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
         #set $input_type = $input_type_conditional.input_type
@@ -12,7 +14,7 @@
         #set report_name = os.path.splitext(os.path.basename($input_type_conditional.single_input.element_identifier))[0]
         #set report_name = re.sub('_[0-9]+$', '', str(report_name)) + '.tab'

-        python $__tool_directory__/confil/confil.py --threads $threads --cutoff $cutoff
+        confil --threads $threads --cutoff $cutoff
         $input_type_conditional.single_input.element_identifier
         && mv $report_name '$output_report'
         && ln -sf "${input_type_conditional.single_input}" '$single_output_file'
@@ -24,7 +26,7 @@
         #set report_name = os.path.splitext(os.path.basename($input_type_conditional.collection_input.forward.element_identifier))[0]
         #set report_name = re.sub('_[0-9]+$', '', str(report_name)) + '.tab'

-        python $__tool_directory__/confil/confil.py --threads $threads --cutoff $cutoff --paired
+        confil --threads $threads --cutoff $cutoff --paired
         $input_type_conditional.collection_input.forward.element_identifier $input_type_conditional.collection_input.reverse.element_identifier
         && mv $report_name '$output_report'
         && ln -sf "${input_type_conditional.collection_input.forward}" '$list_output.forward'
@@ -76,9 +78,9 @@
             <param name="input_type" value="paired_collection" />
             <output name="output_report" ftype='tabular' file="seq.tab" />
             <output_collection name="list_output" type="paired">
-                <element name="forward">
+                <element name="forward" value="seq_1.fastq">
                 </element>
-                <element name="reverse">
+                <element name="reverse" value="seq_2.fastq">
                 </element>
             </output_collection>
         </test>
--- a/confil/confil.py	Mon Mar 04 07:42:05 2019 -0500
+++ b/confil/confil.py	Tue Mar 05 03:49:38 2019 -0500
@@ -1,17 +1,108 @@
+import distutils.spawn
 import os
+import re
+from shlex import split
+from subprocess import PIPE, Popen

 import click

-from kraken import kraken_installed, run_kraken
-
 # TODO: Remove
 KRAKEN2_DEFAULT_DB = "/tools/databases/kraken2/04092018/standard/"
 OUT_DIR = os.path.abspath(os.curdir)
-fastq_file_extensions = ['.fq', '.fastq']
+
+
+def db_path():
+    # Checking DB path
+    if os.path.exists(KRAKEN2_DEFAULT_DB):
+        return KRAKEN2_DEFAULT_DB
+    else:
+        return OUT_DIR
+
+
+def run_kraken(db, threads, cutoff, paired, seqfiles):
+    # Using the sample name to track report
+    seq_name = [os.path.splitext(os.path.basename(seq))[0]
+                for seq in seqfiles][0]
+    # remove _ and numbers
+    seq_name = re.sub('_[0-9]+$', '', seq_name)
+    # building cmd
+    cmd = "kraken2 --threads {threads} --db {db} --output {seq_name}.out --report {seq_name}.tab ".format(
+        threads=threads, db=db, seq_name=seq_name)
+    if paired:
+        cmd += "--paired --classified-out {}_cseqs#.fq ".format(seq_name)
+    cmd += "{seqfiles}".format(seqfiles=' '.join(seqfiles))
+    click.secho("Executing kraken2: \n{}\n".format(
+        split(cmd)), fg='bright_yellow')
+
+    # TODO: remove
+    # test_file = "https://raw.githubusercontent.com/COMBAT-TB/confil/master/test/test_data/test_file.tab"
+    # out_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
+    # mock_cmd = 'wget {} -O {}'.format(test_file, out_file)
+    # cmd = mock_cmd
+    # click.secho("Executing mock_cmd: \n{}\n".format(split(cmd)), fg='red')
+
+    p = Popen(split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True)
+    while True:
+        output = p.stdout.readline()
+        if output == '' and p.poll() is not None:
+            break
+        if output:
+            click.echo(output)
+    returncode = p.poll()
+    if returncode != 0:
+        error = p.stderr.readline()
+        raise OSError("Kraken2 launch error:\n{}\n".format(error))
+    # parse kraken report
+    report_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
+    parse_report(report_file=report_file, cutoff=cutoff)
+    return returncode
+
+
+def parse_report(report_file, cutoff):
+    file_name = os.path.splitext(os.path.basename(report_file))[0]
+    hit = None
+    if os.stat(report_file).st_size > 0 and report_file.endswith(".tab"):
+        click.secho("Processing {} with cutoff of {}...\n".format(
+            report_file, cutoff), fg='green')
+        with open(report_file, 'r') as report:
+            for line in report:
+                line = [str(e).strip() for e in line.split('\t')]
+                if len(line) > 1:
+                    click.secho('{}'.format(line), fg='green')
+                    # Percentage of fragments covered by the clade rooted at this taxon
+                    percentage = int(float(line[0]))
+                    # Number of fragments covered by the clade rooted at this taxon
+                    # num_covered = int(float(line[1]))
+                    # Number of fragments assigned directly to this taxon
+                    # num_assigned = int(float(line[2]))
+                    # NCBI taxonomic ID number
+                    # ncbi_tax = int(float(line[3]))
+                    # Indented scientific name (Mycobacterium\n)
+                    name = str(line[5]).strip()
+                    if percentage < cutoff and 'Mycobacterium' in name:
+                        click.secho('\n{}%: {} is contaminated!\n'.format(
+                            percentage, file_name), fg='red')
+                        raise SystemExit('{}%: {} is contaminated!\n'.format(
+                            percentage, file_name))
+                    if percentage >= cutoff and 'Mycobacterium' in name:
+                        click.secho('\n{}%: {} is not contaminated!\n'.format(
+                            percentage, file_name), fg='green')
+                        hit = line
+                        break
+    click.secho('Hit: {}'.format(hit), fg='green')
+    return hit
+
+
+def kraken_installed():
+    # check if `kraken2` is in path
+    installed = distutils.spawn.find_executable("kraken2")
+    if not installed:
+        raise OSError("kraken2 is not installed.")
+    return installed


 @click.command()
-@click.option('--db', default=OUT_DIR, required=True,
+@click.option('--db', default=db_path(), required=True,
               help='Name for Kraken 2 DB', type=click.Path(exists=True),
               show_default=True)
 @click.option('--threads', default=1, help='Number of threads',
--- a/confil/kraken.py	Mon Mar 04 07:42:05 2019 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-import distutils.spawn
-import os
-import re
-from shlex import split
-from subprocess import PIPE, Popen
-
-import click
-
-from report import parse_report
-
-OUT_DIR = os.path.abspath(os.curdir)
-
-
-def kraken_installed():
-    # check if `kraken2` is in path
-    # TODO remove python
-    installed = distutils.spawn.find_executable("python")
-    if not installed:
-        raise OSError("kraken2 is not installed.")
-    return installed
-
-
-def run_kraken(db, threads, cutoff, paired, seqfiles):
-    # Using the sample name to track report
-    seq_name = [os.path.splitext(os.path.basename(seq))[0]
-                for seq in seqfiles][0]
-    # remove _ and numbers
-    seq_name = re.sub('_[0-9]+$', '', seq_name)
-    # building cmd
-    cmd = "kraken2 --threads {threads} --db {db} --output {seq_name}.out --report {seq_name}.tab ".format(
-        threads=threads, db=db, seq_name=seq_name)
-    if paired:
-        cmd += "--paired --classified-out {}_cseqs#.fq ".format(seq_name)
-    cmd += "{seqfiles}".format(seqfiles=' '.join(seqfiles))
-    click.secho("Executing kraken2: \n{}\n".format(
-        split(cmd)), fg='bright_yellow')
-
-    # TODO: remove
-    test_file = "https://raw.githubusercontent.com/COMBAT-TB/confil/master/test/test_data/test_file.tab"
-    out_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
-    mock_cmd = 'wget {} -O {}'.format(test_file, out_file)
-    cmd = mock_cmd
-    click.secho("Executing mock_cmd: \n{}\n".format(split(cmd)), fg='red')
-
-    p = Popen(split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True)
-    while True:
-        output = p.stdout.readline()
-        if output == '' and p.poll() is not None:
-            break
-        if output:
-            click.echo(output)
-    returncode = p.poll()
-    if returncode != 0:
-        error = p.stderr.readline()
-        raise OSError("Kraken2 launch error:\n{}\n".format(error))
-    # parse kraken report
-    report_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
-    parse_report(report_file=report_file, cutoff=cutoff)
-    return returncode
--- a/confil/report.py	Mon Mar 04 07:42:05 2019 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-import os
-
-import click
-
-
-def parse_report(report_file, cutoff):
-    file_name = os.path.splitext(os.path.basename(report_file))[0]
-    hit = None
-    if os.stat(report_file).st_size > 0 and report_file.endswith(".tab"):
-        click.secho("Processing {} with cutoff of {}...\n".format(
-            report_file, cutoff), fg='green')
-        with open(report_file, 'r') as report:
-            for line in report:
-                line = [str(e).strip() for e in line.split('\t')]
-                if len(line) > 1:
-                    click.secho('{}'.format(line), fg='green')
-                    # Percentage of fragments covered by the clade rooted at this taxon
-                    percentage = int(float(line[0]))
-                    # Number of fragments covered by the clade rooted at this taxon
-                    # num_covered = int(float(line[1]))
-                    # Number of fragments assigned directly to this taxon
-                    # num_assigned = int(float(line[2]))
-                    # NCBI taxonomic ID number
-                    # ncbi_tax = int(float(line[3]))
-                    # Indented scientific name (Mycobacterium\n)
-                    name = str(line[5]).strip()
-                    if percentage < cutoff and 'Mycobacterium' in name:
-                        click.secho('\n{}%: {} is contaminated!\n'.format(
-                            percentage, file_name), fg='red')
-                        raise SystemExit('{}%: {} is contaminated!\n'.format(
-                            percentage, file_name))
-                    if percentage >= cutoff and 'Mycobacterium' in name:
-                        click.secho('\n{}%: {} is not contaminated!\n'.format(
-                            percentage, file_name), fg='green')
-                        hit = line
-                        break
-    click.secho('Hit: {}'.format(hit), fg='green')
-    return hit
--- a/setup.py	Mon Mar 04 07:42:05 2019 -0500
+++ b/setup.py	Tue Mar 05 03:49:38 2019 -0500
@@ -5,7 +5,7 @@

 setup(
     name='confil',
-    version='0.1.2',
+    version='0.1.3',
     url='https://github.com/COMBAT-TB/confil',
     description='Contamination filter',
     long_description=long_description,
--- a/test/test_report.py	Mon Mar 04 07:42:05 2019 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-import os
-
-import pytest
-
-from confil.report import parse_report
-from test_runner import TEST_DATA_DIR
-
-TEST_REPORT = os.path.join(TEST_DATA_DIR, "test_file.tab")
-
-# test using a cutoff of 50%
-
-
-@pytest.mark.parametrize("test_input, expected", [
-    (type(parse_report(TEST_REPORT, 50)), list),
-    (parse_report(TEST_REPORT, 50)[5], 'Mycobacterium'),
-    (parse_report(TEST_REPORT, 50)[0], '55.84')
-])
-def test_parse_report(test_input, expected):
-    assert test_input == expected
-
-
-def test_parse_report_exception():
-    with pytest.raises(SystemExit):
-        parse_report(TEST_REPORT, 90)
--- a/test/test_runner.py	Mon Mar 04 07:42:05 2019 -0500
+++ b/test/test_runner.py	Tue Mar 05 03:49:38 2019 -0500
@@ -3,11 +3,11 @@
 import pytest
 from click.testing import CliRunner

-from confil.confil import confil
+from confil.confil import confil, parse_report

 CURR_DIR = os.path.dirname(os.path.abspath(__file__))
 TEST_DATA_DIR = os.path.join(CURR_DIR, "test_data/")
-
+TEST_REPORT = os.path.join(TEST_DATA_DIR, "test_file.tab")
 FILE_1 = os.path.join(TEST_DATA_DIR, "test_file_1.fastq")
 FILE_2 = os.path.join(TEST_DATA_DIR, "test_file_2.fastq")

@@ -18,6 +18,21 @@
     return runner


+@pytest.mark.skip(reason="No way of currently testing this. It's KRAKEN!")
 def test_runner(cli_runner):
     result = cli_runner.invoke(confil, ["--paired", FILE_1, FILE_2])
     assert result.exit_code == 0
+
+
+@pytest.mark.parametrize("test_input, expected", [
+    (type(parse_report(TEST_REPORT, 50)), list),
+    (parse_report(TEST_REPORT, 50)[5], 'Mycobacterium'),
+    (parse_report(TEST_REPORT, 50)[0], '55.84')
+])
+def test_parse_report(test_input, expected):
+    assert test_input == expected
+
+
+def test_parse_report_exception():
+    with pytest.raises(SystemExit):
+        parse_report(TEST_REPORT, 90)