comparison galaxy-tools/biobank/utils/before_prepare_seq_dsample_inputs.py @ 3:43be74e62bfe draft

Uploaded
author ric
date Thu, 22 Sep 2016 08:57:04 -0400
parents
children
comparison
equal deleted inserted replaced
2:47bf0086e082 3:43be74e62bfe
1 """
2 This tool produce files used as input by 'prepare_seq_dsample_inputs'.
3
4 It needs as input an Illumina samplesheet and produce as output:
5 the same samplesheet with a new column 'sample_project'
6 a configuration file
7 The configuration file is a YAML file with the following structure:
8
9 config_parameters:
10 study_label: study_label
11
12 where study_label is mandatory
13 """
14
15 import csv, sys, argparse, logging, yaml
16
17
18 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
19 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
20 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
21
22 def make_parser():
23 parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs')
24 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
25 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
26 help='logging level', default='INFO')
27 parser.add_argument('--in-file', '-i', type=str, required=True,
28 help='input file')
29 parser.add_argument('--samplesheet-output-file', type=str,
30 help='output file containing flowcell samplesheet',
31 default='./samplesheet.tsv')
32 parser.add_argument('--config-output-file', type=str,
33 help='output file containing config definitions',
34 default='./config_parameters.yaml')
35 parser.add_argument('--study', type=str, required=True,
36 help='study label parameter for YAML file')
37 return parser
38
39
40 def read_samplesheet(in_file):
41 with open(in_file, 'rU') as f:
42 reader = csv.DictReader(f, delimiter='\t')
43 recs = [r for r in reader]
44 fieldnames = reader.fieldnames
45 return recs, fieldnames
46
47
48 def write_samplesheet_extended(recs, fields, out_file, sample_project_label):
49 with open(out_file, 'wb') as f:
50 fields.append('SampleProject')
51 writer = csv.DictWriter(f, fields, delimiter='\t')
52 writer.writeheader()
53 for row in recs:
54 row['SampleProject'] = sample_project_label
55 writer.writerow(row)
56
57
58 def write_yaml_config_file(out_file, study_label):
59 config_data = {'config_parameters': {'study_label': study_label}}
60 with open(out_file, 'w') as f:
61 yaml.dump(config_data, f)
62
63
64 def main(argv):
65 parser = make_parser()
66 args = parser.parse_args(argv)
67
68 log_level = getattr(logging, args.loglevel)
69 kwargs = {'format': LOG_FORMAT,
70 'datefmt': LOG_DATEFMT,
71 'level': log_level}
72 if args.logfile:
73 kwargs['filename'] = args.logfile
74 logging.basicConfig(**kwargs)
75 logger = logging.getLogger('before_prepare_seq_dsample_inputs')
76
77 logger.info('Loading data from file %s' % args.in_file)
78 recs, out_file_header = read_samplesheet(args.in_file)
79 logger.debug('Retrieved %d records', len(recs))
80
81 logger.info('Writing file %s' % args.samplesheet_output_file)
82 write_samplesheet_extended(recs, out_file_header, args.samplesheet_output_file,
83 args.study)
84
85 logger.info('Writing file %s' % args.config_output_file)
86 write_yaml_config_file(args.config_output_file, args.study)
87
88 if __name__ == '__main__':
89 main(sys.argv[1:])