annotate data_manager/bwameth_index_builder.py @ 0:a801458261bb draft

Uploaded
author dpryan79
date Thu, 15 Sep 2016 04:59:28 -0400
parents
children f5a15a12a4c9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a801458261bb Uploaded
dpryan79
parents:
diff changeset
1 #!/usr/bin/env python
a801458261bb Uploaded
dpryan79
parents:
diff changeset
2 # Based heavily on the HISAT2 data manager wrapper
a801458261bb Uploaded
dpryan79
parents:
diff changeset
3
a801458261bb Uploaded
dpryan79
parents:
diff changeset
4 import argparse
a801458261bb Uploaded
dpryan79
parents:
diff changeset
5 import os
a801458261bb Uploaded
dpryan79
parents:
diff changeset
6 import shlex
a801458261bb Uploaded
dpryan79
parents:
diff changeset
7 import subprocess
a801458261bb Uploaded
dpryan79
parents:
diff changeset
8 import sys
a801458261bb Uploaded
dpryan79
parents:
diff changeset
9 from json import dumps, loads
a801458261bb Uploaded
dpryan79
parents:
diff changeset
10
a801458261bb Uploaded
dpryan79
parents:
diff changeset
11 DEFAULT_DATA_TABLE_NAME = "bwameth_indexes"
a801458261bb Uploaded
dpryan79
parents:
diff changeset
12
a801458261bb Uploaded
dpryan79
parents:
diff changeset
13
a801458261bb Uploaded
dpryan79
parents:
diff changeset
14 def build_hisat_index(data_manager_dict, params, args):
a801458261bb Uploaded
dpryan79
parents:
diff changeset
15 data_table_name = args.data_table_name
a801458261bb Uploaded
dpryan79
parents:
diff changeset
16 target_directory = params[ 'output_data' ][0]['extra_files_path']
a801458261bb Uploaded
dpryan79
parents:
diff changeset
17 if not os.path.exists( target_directory ):
a801458261bb Uploaded
dpryan79
parents:
diff changeset
18 os.mkdir( target_directory )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
19 fasta_base_name = os.path.basename(args.fasta_filename)
a801458261bb Uploaded
dpryan79
parents:
diff changeset
20 sym_linked_fasta_filename = os.path.join(target_directory, fasta_base_name)
a801458261bb Uploaded
dpryan79
parents:
diff changeset
21 os.symlink(options.fasta_filename, sym_linked_fasta_filename)
a801458261bb Uploaded
dpryan79
parents:
diff changeset
22 args = ['bwameth.py', 'index', sym_linked_fasta_filename]
a801458261bb Uploaded
dpryan79
parents:
diff changeset
23 proc = subprocess.Popen( args=args, shell=False, cwd=target_directory )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
24 return_code = proc.wait()
a801458261bb Uploaded
dpryan79
parents:
diff changeset
25 if return_code:
a801458261bb Uploaded
dpryan79
parents:
diff changeset
26 print >> sys.stderr, "Error building index."
a801458261bb Uploaded
dpryan79
parents:
diff changeset
27 sys.exit( return_code )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
28 data_table_entry = dict(value=args.name, dbkey=args.dbkey, name=args.name, path=sequence_id)
a801458261bb Uploaded
dpryan79
parents:
diff changeset
29 _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
a801458261bb Uploaded
dpryan79
parents:
diff changeset
30
a801458261bb Uploaded
dpryan79
parents:
diff changeset
31
a801458261bb Uploaded
dpryan79
parents:
diff changeset
32 def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):
a801458261bb Uploaded
dpryan79
parents:
diff changeset
33 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
34 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
35 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
36 return data_manager_dict
a801458261bb Uploaded
dpryan79
parents:
diff changeset
37
a801458261bb Uploaded
dpryan79
parents:
diff changeset
38
a801458261bb Uploaded
dpryan79
parents:
diff changeset
39 def main():
a801458261bb Uploaded
dpryan79
parents:
diff changeset
40 # Parse Command Line
a801458261bb Uploaded
dpryan79
parents:
diff changeset
41 parser = argparse.ArgumentParser()
a801458261bb Uploaded
dpryan79
parents:
diff changeset
42 parser.add_argument( '--output', default=None )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
43 parser.add_argument( '--fasta_filename', default=None )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
44 parser.add_argument( '--dbkey', default=None )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
45 parser.add_argument( '--name', default=None )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
46 parser.add_argument( '--description', default=None )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
47 parser.add_argument( '--data_table_name', default=DEFAULT_DATA_TABLE_NAME )
a801458261bb Uploaded
dpryan79
parents:
diff changeset
48 args = parser.parse_args()
a801458261bb Uploaded
dpryan79
parents:
diff changeset
49
a801458261bb Uploaded
dpryan79
parents:
diff changeset
50 filename = args.output
a801458261bb Uploaded
dpryan79
parents:
diff changeset
51 params = loads(open(filename).read())
a801458261bb Uploaded
dpryan79
parents:
diff changeset
52 data_manager_dict = {}
a801458261bb Uploaded
dpryan79
parents:
diff changeset
53
a801458261bb Uploaded
dpryan79
parents:
diff changeset
54 if args.dbkey in [ None, '', '?' ]:
a801458261bb Uploaded
dpryan79
parents:
diff changeset
55 raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (args.dbkey))
a801458261bb Uploaded
dpryan79
parents:
diff changeset
56
a801458261bb Uploaded
dpryan79
parents:
diff changeset
57 # build the index
a801458261bb Uploaded
dpryan79
parents:
diff changeset
58 build_hisat_index(data_manager_dict, args)
a801458261bb Uploaded
dpryan79
parents:
diff changeset
59
a801458261bb Uploaded
dpryan79
parents:
diff changeset
60 # save info to json file
a801458261bb Uploaded
dpryan79
parents:
diff changeset
61 open(filename, 'wb').write(dumps(data_manager_dict))
a801458261bb Uploaded
dpryan79
parents:
diff changeset
62
a801458261bb Uploaded
dpryan79
parents:
diff changeset
63 if __name__ == "__main__":
a801458261bb Uploaded
dpryan79
parents:
diff changeset
64 main()