Mercurial > repos > ric > test1
comparison galaxy-tools/biobank/utils/before_prepare_seq_dsample_inputs.py @ 3:43be74e62bfe draft
Uploaded
author | ric |
---|---|
date | Thu, 22 Sep 2016 08:57:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:47bf0086e082 | 3:43be74e62bfe |
---|---|
1 """ | |
2 This tool produce files used as input by 'prepare_seq_dsample_inputs'. | |
3 | |
4 It needs as input an Illumina samplesheet and produce as output: | |
5 the same samplesheet with a new column 'sample_project' | |
6 a configuration file | |
7 The configuration file is a YAML file with the following structure: | |
8 | |
9 config_parameters: | |
10 study_label: study_label | |
11 | |
12 where study_label is mandatory | |
13 """ | |
14 | |
15 import csv, sys, argparse, logging, yaml | |
16 | |
17 | |
18 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | |
19 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | |
20 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] | |
21 | |
22 def make_parser(): | |
23 parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs') | |
24 parser.add_argument('--logfile', type=str, help='log file (default=stderr)') | |
25 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, | |
26 help='logging level', default='INFO') | |
27 parser.add_argument('--in-file', '-i', type=str, required=True, | |
28 help='input file') | |
29 parser.add_argument('--samplesheet-output-file', type=str, | |
30 help='output file containing flowcell samplesheet', | |
31 default='./samplesheet.tsv') | |
32 parser.add_argument('--config-output-file', type=str, | |
33 help='output file containing config definitions', | |
34 default='./config_parameters.yaml') | |
35 parser.add_argument('--study', type=str, required=True, | |
36 help='study label parameter for YAML file') | |
37 return parser | |
38 | |
39 | |
40 def read_samplesheet(in_file): | |
41 with open(in_file, 'rU') as f: | |
42 reader = csv.DictReader(f, delimiter='\t') | |
43 recs = [r for r in reader] | |
44 fieldnames = reader.fieldnames | |
45 return recs, fieldnames | |
46 | |
47 | |
48 def write_samplesheet_extended(recs, fields, out_file, sample_project_label): | |
49 with open(out_file, 'wb') as f: | |
50 fields.append('SampleProject') | |
51 writer = csv.DictWriter(f, fields, delimiter='\t') | |
52 writer.writeheader() | |
53 for row in recs: | |
54 row['SampleProject'] = sample_project_label | |
55 writer.writerow(row) | |
56 | |
57 | |
58 def write_yaml_config_file(out_file, study_label): | |
59 config_data = {'config_parameters': {'study_label': study_label}} | |
60 with open(out_file, 'w') as f: | |
61 yaml.dump(config_data, f) | |
62 | |
63 | |
64 def main(argv): | |
65 parser = make_parser() | |
66 args = parser.parse_args(argv) | |
67 | |
68 log_level = getattr(logging, args.loglevel) | |
69 kwargs = {'format': LOG_FORMAT, | |
70 'datefmt': LOG_DATEFMT, | |
71 'level': log_level} | |
72 if args.logfile: | |
73 kwargs['filename'] = args.logfile | |
74 logging.basicConfig(**kwargs) | |
75 logger = logging.getLogger('before_prepare_seq_dsample_inputs') | |
76 | |
77 logger.info('Loading data from file %s' % args.in_file) | |
78 recs, out_file_header = read_samplesheet(args.in_file) | |
79 logger.debug('Retrieved %d records', len(recs)) | |
80 | |
81 logger.info('Writing file %s' % args.samplesheet_output_file) | |
82 write_samplesheet_extended(recs, out_file_header, args.samplesheet_output_file, | |
83 args.study) | |
84 | |
85 logger.info('Writing file %s' % args.config_output_file) | |
86 write_yaml_config_file(args.config_output_file, args.study) | |
87 | |
88 if __name__ == '__main__': | |
89 main(sys.argv[1:]) |