Mercurial > repos > ric > test1
diff galaxy-tools/biobank/utils/before_prepare_seq_dsample_inputs.py @ 3:43be74e62bfe draft
Uploaded
author | ric |
---|---|
date | Thu, 22 Sep 2016 08:57:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-tools/biobank/utils/before_prepare_seq_dsample_inputs.py Thu Sep 22 08:57:04 2016 -0400 @@ -0,0 +1,89 @@ +""" +This tool produce files used as input by 'prepare_seq_dsample_inputs'. + +It needs as input an Illumina samplesheet and produce as output: +the same samplesheet with a new column 'sample_project' +a configuration file +The configuration file is a YAML file with the following structure: + + config_parameters: + study_label: study_label + +where study_label is mandatory +""" + +import csv, sys, argparse, logging, yaml + + +LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' +LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' +LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + +def make_parser(): + parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs') + parser.add_argument('--logfile', type=str, help='log file (default=stderr)') + parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, + help='logging level', default='INFO') + parser.add_argument('--in-file', '-i', type=str, required=True, + help='input file') + parser.add_argument('--samplesheet-output-file', type=str, + help='output file containing flowcell samplesheet', + default='./samplesheet.tsv') + parser.add_argument('--config-output-file', type=str, + help='output file containing config definitions', + default='./config_parameters.yaml') + parser.add_argument('--study', type=str, required=True, + help='study label parameter for YAML file') + return parser + + +def read_samplesheet(in_file): + with open(in_file, 'rU') as f: + reader = csv.DictReader(f, delimiter='\t') + recs = [r for r in reader] + fieldnames = reader.fieldnames + return recs, fieldnames + + +def write_samplesheet_extended(recs, fields, out_file, sample_project_label): + with open(out_file, 'wb') as f: + fields.append('SampleProject') + writer = csv.DictWriter(f, fields, delimiter='\t') + writer.writeheader() + for row in recs: + row['SampleProject'] = sample_project_label + writer.writerow(row) + + +def write_yaml_config_file(out_file, study_label): + config_data = {'config_parameters': {'study_label': study_label}} + with open(out_file, 'w') as f: + yaml.dump(config_data, f) + + +def main(argv): + parser = make_parser() + args = parser.parse_args(argv) + + log_level = getattr(logging, args.loglevel) + kwargs = {'format': LOG_FORMAT, + 'datefmt': LOG_DATEFMT, + 'level': log_level} + if args.logfile: + kwargs['filename'] = args.logfile + logging.basicConfig(**kwargs) + logger = logging.getLogger('before_prepare_seq_dsample_inputs') + + logger.info('Loading data from file %s' % args.in_file) + recs, out_file_header = read_samplesheet(args.in_file) + logger.debug('Retrieved %d records', len(recs)) + + logger.info('Writing file %s' % args.samplesheet_output_file) + write_samplesheet_extended(recs, out_file_header, args.samplesheet_output_file, + args.study) + + logger.info('Writing file %s' % args.config_output_file) + write_yaml_config_file(args.config_output_file, args.study) + +if __name__ == '__main__': + main(sys.argv[1:])