Mercurial > repos > sanbi-uwc > data_manager_rnastar_index_builder
comparison data_manager/rnastar_index_builder.py @ 0:2883a3b7dc56 draft
planemo upload for repository https://github.com/pvanheus/data_manager_rnastar_index_builder commit 265fa1966ea606ebccea8c2865043c014959c10c-dirty
author | sanbi-uwc |
---|---|
date | Mon, 08 Feb 2016 03:23:34 -0500 |
parents | |
children | aed097239724 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2883a3b7dc56 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from __future__ import print_function | |
4 import argparse | |
5 from subprocess import check_call, CalledProcessError | |
6 from json import load, dump | |
7 from os import environ, mkdir | |
8 from os.path import isdir, exists | |
9 import shlex | |
10 import sys | |
11 | |
12 def get_id_name( params, dbkey, fasta_description=None): | |
13 #TODO: ensure sequence_id is unique and does not already appear in location file | |
14 sequence_id = params['param_dict']['sequence_id'] | |
15 if not sequence_id: | |
16 sequence_id = dbkey | |
17 | |
18 sequence_name = params['param_dict']['sequence_name'] | |
19 if not sequence_name: | |
20 sequence_name = fasta_description | |
21 if not sequence_name: | |
22 sequence_name = dbkey | |
23 return sequence_id, sequence_name | |
24 | |
25 def make_rnastar_index(output_directory, fasta_filename): | |
26 # STAR | |
27 # --runMode genomeGenerate | |
28 # --genomeDir tempstargenomedir | |
29 # --genomeFastaFiles $input1 | |
30 # --runThreadsN \${GALAXY_SLOTS:-1} | |
31 # --genomeChrBinNbits $advanced_options.chr_bin_nbits | |
32 | |
33 if exists(output_directory) and not isdir(output_directory): | |
34 print("Output directory path already exists but is not a directory: {}".format(output_directory), file=sys.stderr) | |
35 elif not exists(output_directory): | |
36 mkdir(output_directory) | |
37 | |
38 if 'GALAXY_SLOTS' in environ: | |
39 nslots = environ['GALAXY_SLOTS'] | |
40 else: | |
41 nslots = 1 | |
42 | |
43 # cmdline_str = 'STAR --runMode genomeGenerate --genomeDir {} --genomeFastaFiles {} --runThreadsN {}'.format(output_directory, | |
44 # fasta_filename, | |
45 # nslots) | |
46 # cmdline = shlex.split(cmdline_str) | |
47 cmdline = ('touch', '{}/foo'.format(output_directory)) | |
48 try: | |
49 check_call(cmdline) | |
50 except CalledProcessError: | |
51 print("Error building RNA STAR index", file=sys.stderr) | |
52 return(output_directory) | |
53 | |
54 parser = argparse.ArgumentParser(description="Generate RNA STAR genome index and JSON describing this") | |
55 parser.add_argument('output_filename') | |
56 parser.add_argument('--fasta_filename') | |
57 parser.add_argument('--fasta_dbkey') | |
58 parser.add_argument('--fasta_description', default=None) | |
59 parser.add_argument('--data_table_name', default='rnastar_indexes') | |
60 args = parser.parse_args() | |
61 | |
62 params = load(open(args.output_filename, 'rb')) | |
63 | |
64 output_directory = params['output_data'][0]['extra_files_path'] | |
65 | |
66 make_rnastar_index(output_directory, args.fasta_filename) | |
67 (sequence_id, sequence_name) = get_id_name(params, args.fasta_dbkey, args.fasta_description) | |
68 data_table_entry = dict(value=sequence_id, dbkey=args.fasta_dbkey, name=sequence_name, path=output_directory) | |
69 output_datatable_dict = dict('data_tables', dict(args.data_table_name, [data_table_entry])) | |
70 | |
71 dump(output_datatable_dict(open(args.output_file, 'wb'))) |