annotate confil/kraken.py @ 12:5ec97cccb3fe draft

planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
author sanbi-uwc
date Mon, 11 Mar 2019 07:18:34 -0400
parents
children bbf9ab2ebee7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
1 import distutils.spawn
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
2 import os
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
3 import re
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
4 from shlex import split
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
5 from subprocess import PIPE, Popen
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
6
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
7 import click
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
8
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
9 from .report import parse_report
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
10
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
11 OUT_DIR = os.path.abspath(os.curdir)
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
12
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
13
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
14 def kraken_installed():
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
15 # check if `kraken2` is in path
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
16 installed = distutils.spawn.find_executable("kraken2")
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
17 if not installed:
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
18 raise OSError("kraken2 is not installed.")
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
19 return installed
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
20
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
21
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
22 def run_kraken(db, threads, cutoff, paired, seqfiles):
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
23 # Using the sample name to track report
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
24 seq_name = [os.path.splitext(os.path.basename(seq))[0]
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
25 for seq in seqfiles][0]
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
26 # remove _ and numbers
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
27 seq_name = re.sub('_[0-9]+$', '', seq_name)
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
28 # building cmd
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
29 cmd = "kraken2 --threads {threads} --db {db} --output {seq_name}.out --report {seq_name}.tab ".format(
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
30 threads=threads, db=db, seq_name=seq_name)
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
31 if paired:
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
32 cmd += "--paired --classified-out {}_cseqs#.fq ".format(seq_name)
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
33 cmd += "{seqfiles}".format(seqfiles=' '.join(seqfiles))
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
34 click.secho("Executing kraken2: \n{}\n".format(
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
35 split(cmd)), fg='bright_yellow')
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
36
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
37 # TODO: remove
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
38 # test_file = "https://raw.githubusercontent.com/COMBAT-TB/confil/master/test/test_data/test_file.tab"
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
39 # out_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
40 # mock_cmd = 'wget {} -O {}'.format(test_file, out_file)
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
41 # cmd = mock_cmd
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
42 # click.secho("Executing mock_cmd: \n{}\n".format(split(cmd)), fg='red')
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
43
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
44 p = Popen(split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True)
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
45 while True:
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
46 output = p.stdout.readline()
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
47 if output == '' and p.poll() is not None:
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
48 break
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
49 if output:
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
50 click.echo(output)
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
51 returncode = p.poll()
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
52 if returncode != 0:
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
53 error = p.stderr.readline()
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
54 raise OSError("Kraken2 launch error:\n{}\n".format(error))
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
55 # parse kraken report
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
56 report_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name))
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
57 parse_report(report_file=report_file, cutoff=cutoff)
5ec97cccb3fe planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff changeset
58 return returncode