changeset 12:5ec97cccb3fe draft

planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
author sanbi-uwc
date Mon, 11 Mar 2019 07:18:34 -0400
parents 72cb19a32d8b
children bbf9ab2ebee7
files conda/meta.yaml confil.egg-info/PKG-INFO confil.egg-info/SOURCES.txt confil.xml confil/.confil.py.swp confil/__init__.pyc confil/confil.py confil/confil.pyc confil/kraken.py confil/kraken.pyc confil/report.py confil/report.pyc setup.py test/test_report.py test/test_runner.py
diffstat 15 files changed, 171 insertions(+), 123 deletions(-) [+]
line wrap: on
line diff
--- a/conda/meta.yaml	Fri Mar 08 13:55:10 2019 -0500
+++ b/conda/meta.yaml	Mon Mar 11 07:18:34 2019 -0400
@@ -1,5 +1,5 @@
-{% set version = "0.1.3" %}
-{% set sha256 = "8ae72d4c136edff4e755462f6c57c903edb8479ea68d6f0ccdb38f6c56e42d04" %}
+{% set version = "0.1.4" %}
+{% set sha256 = "1ebbb14e64e057d6b58f4942a7b1dbf9e53f4ab741709b0ecfcb8d358fb8848e" %}
 
 package:
   name: confil
@@ -11,7 +11,7 @@
 
 build:
   script: "{{ PYTHON }} -m pip install . --no-deps -vv"
-  number: 1
+  number: 0
 
 requirements:
   build:
--- a/confil.egg-info/PKG-INFO	Fri Mar 08 13:55:10 2019 -0500
+++ b/confil.egg-info/PKG-INFO	Mon Mar 11 07:18:34 2019 -0400
@@ -1,9 +1,12 @@
-Metadata-Version: 2.1
+Metadata-Version: 1.0
 Name: confil
-Version: 0.1.3.dev20190307
+Version: 0.1.2.dev20190304
 Summary: Contamination filter
 Home-page: https://github.com/COMBAT-TB/confil
+Author: UNKNOWN
+Author-email: UNKNOWN
 License: UNKNOWN
+Description-Content-Type: text/markdown
 Description: # confil
         
         [![Build Status](https://travis-ci.org/COMBAT-TB/confil.svg?branch=master)](https://travis-ci.org/COMBAT-TB/confil)
@@ -33,4 +36,3 @@
         
 Keywords: contamination,filter
 Platform: UNKNOWN
-Description-Content-Type: text/markdown
--- a/confil.egg-info/SOURCES.txt	Fri Mar 08 13:55:10 2019 -0500
+++ b/confil.egg-info/SOURCES.txt	Mon Mar 11 07:18:34 2019 -0400
@@ -3,10 +3,13 @@
 setup.py
 confil/__init__.py
 confil/confil.py
+confil/kraken.py
+confil/report.py
 confil.egg-info/PKG-INFO
 confil.egg-info/SOURCES.txt
 confil.egg-info/dependency_links.txt
 confil.egg-info/entry_points.txt
 confil.egg-info/requires.txt
 confil.egg-info/top_level.txt
+test/test_report.py
 test/test_runner.py
\ No newline at end of file
--- a/confil.xml	Fri Mar 08 13:55:10 2019 -0500
+++ b/confil.xml	Mon Mar 11 07:18:34 2019 -0400
@@ -1,6 +1,6 @@
-<tool id="confil" name="Contamination Filter (confil)" version="@VERSION@+galaxy4">
+<tool id="confil" name="Contamination Filter (confil)" version="@VERSION@+galaxy0">
     <macros>
-        <token name="@VERSION@">0.1.3</token>
+        <token name="@VERSION@">0.1.4</token>
     </macros>
     <requirements>
         <requirement type="package" version="@VERSION@">confil</requirement>
@@ -20,9 +20,10 @@
           $input_type_conditional.single_input.element_identifier ;
         then 
           ln -sf "${input_type_conditional.single_input}" '$single_output_file' ;
-	else
-	  touch '$single_output_file' ;
-	fi
+	    else
+	      touch '$single_output_file' ;
+	    fi
+
         ####### Paired Collection
         #elif $input_type == "paired_collection"
         ln -s "${input_type_conditional.collection_input.forward}" $input_type_conditional.collection_input.forward.element_identifier &&
@@ -31,15 +32,16 @@
         #set report_name = re.sub('_[0-9]+$', '', str(report_name)) + '.tab'
 
         if confil --threads \${GALAXY_SLOTS:-1} --cutoff $cutoff --paired 
-        $input_type_conditional.collection_input.forward.element_identifier $input_type_conditional.collection_input.reverse.element_identifier ;
-	then 
-          ln -sf "${input_type_conditional.collection_input.forward}" '$list_output.forward'
-          && ln -sf "${input_type_conditional.collection_input.reverse}" '$list_output.reverse' ;
-	else
-	  touch '$list_output.forward' && touch '$list_output.reverse' ;
-	fi
+            $input_type_conditional.collection_input.forward.element_identifier $input_type_conditional.collection_input.reverse.element_identifier ;
+	    then 
+            ln -sf "${input_type_conditional.collection_input.forward}" '$list_output.forward'
+            && ln -sf "${input_type_conditional.collection_input.reverse}" '$list_output.reverse' ;
+	    else
+	        touch '$list_output.forward' && touch '$list_output.reverse' ;
+	    fi
         #end if
         && mv $report_name '$output_report'
+
     ]]>
     </command>
     <inputs>
@@ -58,11 +60,11 @@
         <param name="cutoff" type="integer" label="Cutoff percentage" value="90" min="50" max="99" />
     </inputs>
     <outputs>
-        <data name="output_report" format="tabular" label="Kraken2 report" />
-        <data name="single_output_file" format="fastq" label="${tool.name} single output">
+        <data name="output_report" format="tabular" label="${tool.name} on ${on_string} kraken2 report" />
+        <data name="single_output_file" format="fastq" label="${tool.name} on ${on_string} single output">
             <filter>input_type_conditional['input_type'] == "single"</filter>
         </data>
-        <collection name="list_output" type="paired" label="${tool.name} paired output" structured_like="collection_input" inherit_format="true">
+        <collection name="list_output" type="paired" label="${tool.name} on ${on_string} paired output" structured_like="collection_input" inherit_format="true">
             <filter>input_type_conditional['input_type'] == "paired_collection"</filter>
             <data name="forward" format="fastq" />
             <data name="reverse" format="fastq" />
Binary file confil/.confil.py.swp has changed
Binary file confil/__init__.pyc has changed
--- a/confil/confil.py	Fri Mar 08 13:55:10 2019 -0500
+++ b/confil/confil.py	Mon Mar 11 07:18:34 2019 -0400
@@ -1,11 +1,9 @@
-import distutils.spawn
 import os
-import re
-from shlex import split
-from subprocess import PIPE, Popen
 
 import click
 
+from .kraken import kraken_installed, run_kraken
+
 # TODO: Remove
 KRAKEN2_DEFAULT_DB = "/tools/databases/kraken2/04092018/standard/"
 OUT_DIR = os.path.abspath(os.curdir)
@@ -19,88 +17,6 @@
         return OUT_DIR
 
 
-def run_kraken(db, threads, cutoff, paired, seqfiles):
-    # Using the sample name to track report
-    seq_name = [os.path.splitext(os.path.basename(seq))[0]
-                for seq in seqfiles][0]
-    # remove _ and numbers
-    seq_name = re.sub('_[0-9]+$', '', seq_name)
-    # building cmd
-    cmd = "kraken2 --threads {threads} --db {db} --output {seq_name}.out --report {seq_name}.tab ".format(
-        threads=threads, db=db, seq_name=seq_name)
-    if paired:
-        cmd += "--paired --classified-out {}_cseqs#.fq ".format(seq_name)
-    cmd += "{seqfiles}".format(seqfiles=' '.join(seqfiles))
-    click.secho("Executing kraken2: \n{}\n".format(
-        split(cmd)), fg='bright_yellow')
-
-    # TODO: remove
-    # test_file = "https://raw.githubusercontent.com/COMBAT-TB/confil/master/test/test_data/test_file.tab"
-    # out_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
-    # mock_cmd = 'wget {} -O {}'.format(test_file, out_file)
-    # cmd = mock_cmd
-    # click.secho("Executing mock_cmd: \n{}\n".format(split(cmd)), fg='red')
-
-    p = Popen(split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True)
-    while True:
-        output = p.stdout.readline()
-        if output == '' and p.poll() is not None:
-            break
-        if output:
-            click.echo(output)
-    returncode = p.poll()
-    if returncode != 0:
-        error = p.stderr.readline()
-        raise OSError("Kraken2 launch error:\n{}\n".format(error))
-    # parse kraken report
-    report_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
-    parse_report(report_file=report_file, cutoff=cutoff)
-    return returncode
-
-
-def parse_report(report_file, cutoff):
-    file_name = os.path.splitext(os.path.basename(report_file))[0]
-    hit = None
-    if os.stat(report_file).st_size > 0 and report_file.endswith(".tab"):
-        click.secho("Processing {} with cutoff of {}...\n".format(
-            report_file, cutoff), fg='green')
-        with open(report_file, 'r') as report:
-            for line in report:
-                line = [str(e).strip() for e in line.split('\t')]
-                if len(line) > 1:
-                    click.secho('{}'.format(line), fg='green')
-                    # Percentage of fragments covered by the clade rooted at this taxon
-                    percentage = int(float(line[0]))
-                    # Number of fragments covered by the clade rooted at this taxon
-                    # num_covered = int(float(line[1]))
-                    # Number of fragments assigned directly to this taxon
-                    # num_assigned = int(float(line[2]))
-                    # NCBI taxonomic ID number
-                    # ncbi_tax = int(float(line[3]))
-                    # Indented scientific name (Mycobacterium\n)
-                    name = str(line[5]).strip()
-                    if percentage < cutoff and 'Mycobacterium' in name:
-                        click.secho('\n{}%: {} is contaminated!\n'.format(
-                            percentage, file_name), fg='red')
-                        raise SystemExit('{}%: {} is contaminated!\n'.format(
-                            percentage, file_name))
-                    if percentage >= cutoff and 'Mycobacterium' in name:
-                        click.secho('\n{}%: {} is not contaminated!\n'.format(
-                            percentage, file_name), fg='green')
-                        hit = line
-                        break
-    click.secho('Hit: {}'.format(hit), fg='green')
-    return hit
-
-
-def kraken_installed():
-    # check if `kraken2` is in path
-    installed = distutils.spawn.find_executable("kraken2")
-    if not installed:
-        raise OSError("kraken2 is not installed.")
-    return installed
-
-
 @click.command()
 @click.option('--db', default=db_path(), required=True,
               help='Name for Kraken 2 DB', type=click.Path(exists=True),
Binary file confil/confil.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/confil/kraken.py	Mon Mar 11 07:18:34 2019 -0400
@@ -0,0 +1,58 @@
+import distutils.spawn
+import os
+import re
+from shlex import split
+from subprocess import PIPE, Popen
+
+import click
+
+from .report import parse_report
+
+OUT_DIR = os.path.abspath(os.curdir)
+
+
+def kraken_installed():
+    # check if `kraken2` is in path
+    installed = distutils.spawn.find_executable("kraken2")
+    if not installed:
+        raise OSError("kraken2 is not installed.")
+    return installed
+
+
+def run_kraken(db, threads, cutoff, paired, seqfiles):
+    # Using the sample name to track report
+    seq_name = [os.path.splitext(os.path.basename(seq))[0]
+                for seq in seqfiles][0]
+    # remove _ and numbers
+    seq_name = re.sub('_[0-9]+$', '', seq_name)
+    # building cmd
+    cmd = "kraken2 --threads {threads} --db {db} --output {seq_name}.out --report {seq_name}.tab ".format(
+        threads=threads, db=db, seq_name=seq_name)
+    if paired:
+        cmd += "--paired --classified-out {}_cseqs#.fq ".format(seq_name)
+    cmd += "{seqfiles}".format(seqfiles=' '.join(seqfiles))
+    click.secho("Executing kraken2: \n{}\n".format(
+        split(cmd)), fg='bright_yellow')
+
+    # TODO: remove
+    # test_file = "https://raw.githubusercontent.com/COMBAT-TB/confil/master/test/test_data/test_file.tab"
+    # out_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
+    # mock_cmd = 'wget {} -O {}'.format(test_file, out_file)
+    # cmd = mock_cmd
+    # click.secho("Executing mock_cmd: \n{}\n".format(split(cmd)), fg='red')
+
+    p = Popen(split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True)
+    while True:
+        output = p.stdout.readline()
+        if output == '' and p.poll() is not None:
+            break
+        if output:
+            click.echo(output)
+    returncode = p.poll()
+    if returncode != 0:
+        error = p.stderr.readline()
+        raise OSError("Kraken2 launch error:\n{}\n".format(error))
+    # parse kraken report
+    report_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
+    parse_report(report_file=report_file, cutoff=cutoff)
+    return returncode
Binary file confil/kraken.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/confil/report.py	Mon Mar 11 07:18:34 2019 -0400
@@ -0,0 +1,48 @@
+import os
+
+import click
+
+
+def parse_report(report_file, cutoff):
+    file_name = os.path.splitext(os.path.basename(report_file))[0]
+    hit = None
+    try:
+        rpt_file = open(report_file, 'r')
+    except OSError as e:
+        raise OSError(
+            "{} does not exist!\n{}".format(report_file, e))
+    else:
+        click.secho("Processing {} with cutoff of {}...\n".format(
+            report_file, cutoff), fg='green')
+        with rpt_file as report:
+            for line in report:
+                line = [str(e).strip() for e in line.split('\t')]
+                if len(line) > 1:
+                    click.secho('{}'.format(line), fg='green')
+                    # Percentage of fragments covered by the clade rooted at this taxon
+                    percentage = int(float(line[0]))
+                    # Number of fragments covered by the clade rooted at this taxon
+                    # num_covered = int(float(line[1]))
+                    # Number of fragments assigned directly to this taxon
+                    # num_assigned = int(float(line[2]))
+                    # NCBI taxonomic ID number
+                    # ncbi_tax = int(float(line[3]))
+                    # Indented scientific name (Mycobacterium\n)
+                    name = str(line[5]).strip()
+                    if percentage >= cutoff and name == 'unclassified':
+                        click.secho('\n{}%: {} is unclassified!\n'.format(
+                            percentage, file_name), fg='red')
+                        raise SystemExit('{}%: {} is unclassified!\n'.format(
+                            percentage, file_name))
+                    if percentage < cutoff and 'Mycobacterium' in name:
+                        click.secho('\n{}%: {} is contaminated!\n'.format(
+                            percentage, file_name), fg='red')
+                        raise SystemExit('{}%: {} is contaminated!\n'.format(
+                            percentage, file_name))
+                    if percentage >= cutoff and 'Mycobacterium' in name:
+                        click.secho('\n{}%: {} is not contaminated!\n'.format(
+                            percentage, file_name), fg='green')
+                        hit = line
+                        break
+    click.secho('Hit: {}'.format(hit), fg='green')
+    return hit
Binary file confil/report.pyc has changed
--- a/setup.py	Fri Mar 08 13:55:10 2019 -0500
+++ b/setup.py	Mon Mar 11 07:18:34 2019 -0400
@@ -5,7 +5,7 @@
 
 setup(
     name='confil',
-    version='0.1.3',
+    version='0.1.4',
     url='https://github.com/COMBAT-TB/confil',
     description='Contamination filter',
     long_description=long_description,
@@ -14,6 +14,14 @@
     py_modules=['confil'],
     packages=find_packages(),
     include_package_data=True,
+    license="GPLv3",
+    classifiers=[
+        'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
+        'Programming Lavnguage :: Python',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+    ],
     install_requires=[
         'click'
     ],
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_report.py	Mon Mar 11 07:18:34 2019 -0400
@@ -0,0 +1,25 @@
+import os
+
+import pytest
+
+from confil.report import parse_report
+from test_runner import TEST_DATA_DIR
+
+TEST_REPORT = os.path.join(TEST_DATA_DIR, "test_file.tab")
+
+# test using a cutoff of 50%
+
+
+@pytest.mark.parametrize("test_input, expected", [
+    (type(parse_report(TEST_REPORT, 50)), list),
+    (parse_report(TEST_REPORT, 50)[5], 'Mycobacterium'),
+    (parse_report(TEST_REPORT, 50)[0], '55.84')
+])
+def test_parse_report(test_input, expected):
+    assert test_input == expected
+
+
+def test_parse_report_exception():
+    with pytest.raises(SystemExit):
+        parse_report(TEST_REPORT, 90)
+        parse_report(TEST_REPORT, 20)
--- a/test/test_runner.py	Fri Mar 08 13:55:10 2019 -0500
+++ b/test/test_runner.py	Mon Mar 11 07:18:34 2019 -0400
@@ -3,7 +3,7 @@
 import pytest
 from click.testing import CliRunner
 
-from confil.confil import confil, parse_report
+from confil.confil import confil
 
 CURR_DIR = os.path.dirname(os.path.abspath(__file__))
 TEST_DATA_DIR = os.path.join(CURR_DIR, "test_data/")
@@ -22,17 +22,3 @@
 def test_runner(cli_runner):
     result = cli_runner.invoke(confil, ["--paired", FILE_1, FILE_2])
     assert result.exit_code == 0
-
-
-@pytest.mark.parametrize("test_input, expected", [
-    (type(parse_report(TEST_REPORT, 50)), list),
-    (parse_report(TEST_REPORT, 50)[5], 'Mycobacterium'),
-    (parse_report(TEST_REPORT, 50)[0], '55.84')
-])
-def test_parse_report(test_input, expected):
-    assert test_input == expected
-
-
-def test_parse_report_exception():
-    with pytest.raises(SystemExit):
-        parse_report(TEST_REPORT, 90)