annotate novo_align.py @ 3:2cadfddb73cd draft

planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
author sanbi-uwc
date Wed, 09 Mar 2016 05:42:36 -0500
parents 7e0286bd9405
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
1 #!/usr/bin/env python
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
2
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
3 from __future__ import print_function
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
4 import argparse
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
5 from subprocess import check_call, CalledProcessError
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
6 from json import load, dump, dumps
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
7 from os import environ, mkdir, makedirs, path
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
8 from os.path import isdir, exists
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
9 import shlex
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
10 import sys
3
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
11 import logging
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
12 log = logging.getLogger( __name__ )
0
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
13
3
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
14 def novo_align(output_filename, index_filename, fwd_file, rev_file ):
0
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
15 #novoalign -c 8 -k -d /cip0/research/ajayi/RNA-seq_Analysis_Project_Case_Study/reference/Homo_Sapiens/out/TB_H37Rv.nix
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
16 # -f X165_820L8_.R1_val_1.fq X165_820L8_.R2_val_2.fq -i PE 250,100
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
17 # -o SAM '@RG\tID:readgroup\tPU:platform unit\tLB:library' | samtools view -bS - > `pwd`/out/X165_820L8.bam
3
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
18 #output_filename = path.join(output_directory, fwd_file.split(".")[0] + ".bam")
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
19 param = r'@RG\tID:readgroup\tPU:platform unit\tLB:library'
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
20 cmdline_str = "novoalign -c 8 -k -d {} -f {} {} -i PE 250, 100 -o SAM '{}' | samtools view -bS - > {}".format(
0
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
21 index_filename,
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
22 fwd_file,
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
23 rev_file,
3
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
24 param,
0
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
25 output_filename)
3
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
26 cmdline = newSplit(cmdline_str)
0
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
27 try:
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
28 check_call(cmdline)
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
29 except CalledProcessError:
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
30 print("Error running the nova-align", file=sys.stderr)
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
31
3
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
32 def newSplit(value):
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
33 lex = shlex.shlex(value)
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
34 lex.quotes = '"'
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
35 lex.whitespace_split = True
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
36 lex.commenters = ''
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
37 return list(lex)
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
38
0
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
39 def main():
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
40 parser = argparse.ArgumentParser(description="Generate a BAM file from the Novo Align tool")
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
41 parser.add_argument('output_filename')
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
42 parser.add_argument('--index_filename')
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
43 parser.add_argument('--forward_filename')
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
44 parser.add_argument('--reverse_filename')
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
45 args = parser.parse_args()
3
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
46
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
47 #a dirty way of referencing the file
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
48 index_file_path = args.index_filename + "/" + args.index_filename.split("/")[-1]
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
49
2cadfddb73cd planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
sanbi-uwc
parents: 0
diff changeset
50 novo_align(args.output_filename, index_file_path, args.forward_filename, args.reverse_filename)
0
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
51
7e0286bd9405 planemo upload for repository https://github.com/zipho/novo_align commit e1bedce305f6cec5dcbc72bc8282665517f902e0
sanbi-uwc
parents:
diff changeset
52 if __name__ == "__main__": main()