Mercurial > repos > sanbi-uwc > data_manager_rnastar_index_builder
comparison data_manager/rnastar_index_builder.py @ 12:a42dbab7917c draft
planemo upload for repository https://github.com/pvanheus/data_manager_rnastar_index_builder commit 599e0c3b37869cc016156f7058e16d6c268bc703
author | sanbi-uwc |
---|---|
date | Mon, 15 Feb 2016 06:55:55 -0500 |
parents | 6096741e8d83 |
children | e905fcc6fd15 |
comparison
equal
deleted
inserted
replaced
11:6096741e8d83 | 12:a42dbab7917c |
---|---|
7 from os import environ, mkdir, makedirs | 7 from os import environ, mkdir, makedirs |
8 from os.path import isdir, exists | 8 from os.path import isdir, exists |
9 import shlex | 9 import shlex |
10 import sys | 10 import sys |
11 | 11 |
12 def get_id_name( params, dbkey, fasta_description=None): | 12 |
13 #TODO: ensure sequence_id is unique and does not already appear in location file | 13 def get_id_name(params, dbkey, fasta_description=None): |
14 # TODO: ensure sequence_id is unique and does not already appear in location file | |
14 sequence_id = params['param_dict']['sequence_id'] | 15 sequence_id = params['param_dict']['sequence_id'] |
15 if not sequence_id: | 16 if not sequence_id: |
16 sequence_id = dbkey | 17 sequence_id = dbkey |
17 | 18 |
18 sequence_name = params['param_dict']['sequence_name'] | 19 sequence_name = params['param_dict']['sequence_name'] |
20 sequence_name = fasta_description | 21 sequence_name = fasta_description |
21 if not sequence_name: | 22 if not sequence_name: |
22 sequence_name = dbkey | 23 sequence_name = dbkey |
23 return sequence_id, sequence_name | 24 return sequence_id, sequence_name |
24 | 25 |
26 | |
25 def make_rnastar_index(output_directory, fasta_filename): | 27 def make_rnastar_index(output_directory, fasta_filename): |
26 #STAR | |
27 # --runMode genomeGenerate | |
28 # --genomeDir tempstargenomedir | |
29 # --genomeFastaFiles $input1 | |
30 # --runThreadsN \${GALAXY_SLOTS:-1} | |
31 # --genomeChrBinNbits $advanced_options.chr_bin_nbits | |
32 | |
33 if exists(output_directory) and not isdir(output_directory): | 28 if exists(output_directory) and not isdir(output_directory): |
34 print("Output directory path already exists but is not a directory: {}".format(output_directory), file=sys.stderr) | 29 print("Output directory path already exists but is not a directory: {}".format(output_directory), |
30 file=sys.stderr) | |
35 elif not exists(output_directory): | 31 elif not exists(output_directory): |
36 mkdir(output_directory) | 32 mkdir(output_directory) |
37 | 33 |
38 if 'GALAXY_SLOTS' in environ: | 34 if 'GALAXY_SLOTS' in environ: |
39 nslots = environ['GALAXY_SLOTS'] | 35 nslots = environ['GALAXY_SLOTS'] |
40 else: | 36 else: |
41 nslots = 1 | 37 nslots = 1 |
42 | 38 |
43 # cmdline_str = 'STAR --runMode genomeGenerate --genomeDir {} --genomeFastaFiles {} --runThreadsN {}'.format(output_directory, | 39 cmdline_str = 'STAR --runMode genomeGenerate --genomeDir {} --genomeFastaFiles {} --runThreadsN {}'.format( |
44 # fasta_filename, | 40 output_directory, |
45 # nslots) | 41 fasta_filename, |
46 # cmdline = shlex.split(cmdline_str) | 42 nslots) |
47 cmdline = ('touch', '{}/foo'.format(output_directory)) | 43 cmdline = shlex.split(cmdline_str) |
44 # cmdline = ('touch', '{}/foo'.format(output_directory)) | |
48 try: | 45 try: |
49 check_call(cmdline) | 46 check_call(cmdline) |
50 except CalledProcessError: | 47 except CalledProcessError: |
51 print("Error building RNA STAR index", file=sys.stderr) | 48 print("Error building RNA STAR index", file=sys.stderr) |
52 return(output_directory) | 49 return (output_directory) |
53 | 50 |
54 parser = argparse.ArgumentParser(description="Generate RNA STAR genome index and JSON describing this") | |
55 parser.add_argument('output_filename') | |
56 parser.add_argument('--fasta_filename') | |
57 parser.add_argument('--fasta_dbkey') | |
58 parser.add_argument('--fasta_description', default=None) | |
59 parser.add_argument('--data_table_name', default='rnastar_index') | |
60 args = parser.parse_args() | |
61 | 51 |
62 filename = args.output_filename | 52 def main(): |
53 parser = argparse.ArgumentParser(description="Generate RNA STAR genome index and JSON describing this") | |
54 parser.add_argument('output_filename') | |
55 parser.add_argument('--fasta_filename') | |
56 parser.add_argument('--fasta_dbkey') | |
57 parser.add_argument('--fasta_description', default=None) | |
58 parser.add_argument('--data_table_name', default='rnastar_index') | |
59 args = parser.parse_args() | |
63 | 60 |
64 params = load(open(filename, 'rb')) | 61 filename = args.output_filename |
65 output_directory = params[ 'output_data' ][0]['extra_files_path'] | |
66 makedirs( output_directory ) | |
67 data_manager_dict = {} | |
68 | 62 |
69 make_rnastar_index(output_directory, args.fasta_filename) | 63 params = load(open(filename, 'rb')) |
70 (sequence_id, sequence_name) = get_id_name(params, args.fasta_dbkey, args.fasta_description) | 64 output_directory = params['output_data'][0]['extra_files_path'] |
71 data_table_entry = dict(value=sequence_id, dbkey=args.fasta_dbkey, name=sequence_name, path=output_directory) | 65 makedirs(output_directory) |
72 | 66 |
73 output_datatable_dict = dict(data_tables={args.data_table_name : [data_table_entry]}) | 67 make_rnastar_index(output_directory, args.fasta_filename) |
74 open( filename, 'wb' ).write( dumps( output_datatable_dict ) ) | 68 (sequence_id, sequence_name) = get_id_name(params, args.fasta_dbkey, args.fasta_description) |
69 data_table_entry = dict(value=sequence_id, dbkey=args.fasta_dbkey, name=sequence_name, path=output_directory) | |
70 | |
71 output_datatable_dict = dict(data_tables={args.data_table_name: [data_table_entry]}) | |
72 open(filename, 'wb').write(dumps(output_datatable_dict)) | |
73 | |
74 if __name__ == "__main__": main() |