Mercurial > repos > sanbi-uwc > confil
annotate confil/kraken.py @ 12:5ec97cccb3fe draft
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
author | sanbi-uwc |
---|---|
date | Mon, 11 Mar 2019 07:18:34 -0400 |
parents | |
children | bbf9ab2ebee7 |
rev | line source |
---|---|
12
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
1 import distutils.spawn |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
2 import os |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
3 import re |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
4 from shlex import split |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
5 from subprocess import PIPE, Popen |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
6 |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
7 import click |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
8 |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
9 from .report import parse_report |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
10 |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
11 OUT_DIR = os.path.abspath(os.curdir) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
12 |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
13 |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
14 def kraken_installed(): |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
15 # check if `kraken2` is in path |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
16 installed = distutils.spawn.find_executable("kraken2") |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
17 if not installed: |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
18 raise OSError("kraken2 is not installed.") |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
19 return installed |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
20 |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
21 |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
22 def run_kraken(db, threads, cutoff, paired, seqfiles): |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
23 # Using the sample name to track report |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
24 seq_name = [os.path.splitext(os.path.basename(seq))[0] |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
25 for seq in seqfiles][0] |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
26 # remove _ and numbers |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
27 seq_name = re.sub('_[0-9]+$', '', seq_name) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
28 # building cmd |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
29 cmd = "kraken2 --threads {threads} --db {db} --output {seq_name}.out --report {seq_name}.tab ".format( |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
30 threads=threads, db=db, seq_name=seq_name) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
31 if paired: |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
32 cmd += "--paired --classified-out {}_cseqs#.fq ".format(seq_name) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
33 cmd += "{seqfiles}".format(seqfiles=' '.join(seqfiles)) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
34 click.secho("Executing kraken2: \n{}\n".format( |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
35 split(cmd)), fg='bright_yellow') |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
36 |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
37 # TODO: remove |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
38 # test_file = "https://raw.githubusercontent.com/COMBAT-TB/confil/master/test/test_data/test_file.tab" |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
39 # out_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name)) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
40 # mock_cmd = 'wget {} -O {}'.format(test_file, out_file) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
41 # cmd = mock_cmd |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
42 # click.secho("Executing mock_cmd: \n{}\n".format(split(cmd)), fg='red') |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
43 |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
44 p = Popen(split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
45 while True: |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
46 output = p.stdout.readline() |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
47 if output == '' and p.poll() is not None: |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
48 break |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
49 if output: |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
50 click.echo(output) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
51 returncode = p.poll() |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
52 if returncode != 0: |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
53 error = p.stderr.readline() |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
54 raise OSError("Kraken2 launch error:\n{}\n".format(error)) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
55 # parse kraken report |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
56 report_file = os.path.join(OUT_DIR, "{}.tab".format(seq_name)) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
57 parse_report(report_file=report_file, cutoff=cutoff) |
5ec97cccb3fe
planemo upload for repository https://github.com/COMBAT-TB/confil commit b1a96c1b50cea70a20d4e606100879da8b6ed1b2
sanbi-uwc
parents:
diff
changeset
|
58 return returncode |