3
|
1 """
|
|
2 This tool produce files used as input by 'prepare_seq_dsample_inputs'.
|
|
3
|
|
4 It needs as input an Illumina samplesheet and produce as output:
|
|
5 the same samplesheet with a new column 'sample_project'
|
|
6 a configuration file
|
|
7 The configuration file is a YAML file with the following structure:
|
|
8
|
|
9 config_parameters:
|
|
10 study_label: study_label
|
|
11
|
|
12 where study_label is mandatory
|
|
13 """
|
|
14
|
|
15 import csv, sys, argparse, logging, yaml
|
|
16
|
|
17
|
|
18 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
|
|
19 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
|
|
20 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
|
|
21
|
|
22 def make_parser():
|
|
23 parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs')
|
|
24 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
|
|
25 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
|
|
26 help='logging level', default='INFO')
|
|
27 parser.add_argument('--in-file', '-i', type=str, required=True,
|
|
28 help='input file')
|
|
29 parser.add_argument('--samplesheet-output-file', type=str,
|
|
30 help='output file containing flowcell samplesheet',
|
|
31 default='./samplesheet.tsv')
|
|
32 parser.add_argument('--config-output-file', type=str,
|
|
33 help='output file containing config definitions',
|
|
34 default='./config_parameters.yaml')
|
|
35 parser.add_argument('--study', type=str, required=True,
|
|
36 help='study label parameter for YAML file')
|
|
37 return parser
|
|
38
|
|
39
|
|
40 def read_samplesheet(in_file):
|
|
41 with open(in_file, 'rU') as f:
|
|
42 reader = csv.DictReader(f, delimiter='\t')
|
|
43 recs = [r for r in reader]
|
|
44 fieldnames = reader.fieldnames
|
|
45 return recs, fieldnames
|
|
46
|
|
47
|
|
48 def write_samplesheet_extended(recs, fields, out_file, sample_project_label):
|
|
49 with open(out_file, 'wb') as f:
|
|
50 fields.append('SampleProject')
|
|
51 writer = csv.DictWriter(f, fields, delimiter='\t')
|
|
52 writer.writeheader()
|
|
53 for row in recs:
|
|
54 row['SampleProject'] = sample_project_label
|
|
55 writer.writerow(row)
|
|
56
|
|
57
|
|
58 def write_yaml_config_file(out_file, study_label):
|
|
59 config_data = {'config_parameters': {'study_label': study_label}}
|
|
60 with open(out_file, 'w') as f:
|
|
61 yaml.dump(config_data, f)
|
|
62
|
|
63
|
|
64 def main(argv):
|
|
65 parser = make_parser()
|
|
66 args = parser.parse_args(argv)
|
|
67
|
|
68 log_level = getattr(logging, args.loglevel)
|
|
69 kwargs = {'format': LOG_FORMAT,
|
|
70 'datefmt': LOG_DATEFMT,
|
|
71 'level': log_level}
|
|
72 if args.logfile:
|
|
73 kwargs['filename'] = args.logfile
|
|
74 logging.basicConfig(**kwargs)
|
|
75 logger = logging.getLogger('before_prepare_seq_dsample_inputs')
|
|
76
|
|
77 logger.info('Loading data from file %s' % args.in_file)
|
|
78 recs, out_file_header = read_samplesheet(args.in_file)
|
|
79 logger.debug('Retrieved %d records', len(recs))
|
|
80
|
|
81 logger.info('Writing file %s' % args.samplesheet_output_file)
|
|
82 write_samplesheet_extended(recs, out_file_header, args.samplesheet_output_file,
|
|
83 args.study)
|
|
84
|
|
85 logger.info('Writing file %s' % args.config_output_file)
|
|
86 write_yaml_config_file(args.config_output_file, args.study)
|
|
87
|
|
88 if __name__ == '__main__':
|
|
89 main(sys.argv[1:])
|