Mercurial > repos > ric > test2
comparison galaxy-tools/biobank/utils/prepare_seq_out_inputs.py @ 0:ba6cf6ede027 draft default tip
Uploaded
| author | ric |
|---|---|
| date | Wed, 28 Sep 2016 06:03:30 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ba6cf6ede027 |
|---|---|
| 1 """ | |
| 2 This tool produces files that can be used as input to import | |
| 3 * SequencerOutput data samples | |
| 4 * SequencerOutput data objects | |
| 5 within OMERO.biobank using import applications. | |
| 6 | |
| 7 Input file must be like | |
| 8 | |
| 9 run_directory path | |
| 10 130418_SN194_0303_BC1NYHACXX file:///SHARE/USERFS/els7/users/sequencing_data/completed/130418_SN194_0303_BC1NYHACXX/raw | |
| 11 160418_SN194_0304_BCAZYHACXX file:///SHARE/USERFS/els7/users/sequencing_data/completed/160418_SN194_0304_BCAZYHACXX/raw | |
| 12 .... | |
| 13 """ | |
| 14 | |
| 15 import csv, sys, argparse, logging | |
| 16 | |
| 17 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | |
| 18 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | |
| 19 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] | |
| 20 | |
| 21 | |
| 22 def make_parser(): | |
| 23 parser = argparse.ArgumentParser(description='build sequencer output import files') | |
| 24 parser.add_argument('--logfile', type=str, help='log file (default=stderr)') | |
| 25 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, | |
| 26 help='logging level', default='INFO') | |
| 27 parser.add_argument('--in-file', '-i', type=str, required=True, | |
| 28 help='input file') | |
| 29 parser.add_argument('--dsamples-out-file', type=str, default='./seq_out_dsamples.tsv', | |
| 30 help='output file containing data samples definitions') | |
| 31 parser.add_argument('--dobjects-out-file', type=str, default='./seq_out_dobjects.tsv', | |
| 32 help='output file containing data objects definitions') | |
| 33 parser.add_argument('--study', '-s', type=str, required=True) | |
| 34 return parser | |
| 35 | |
| 36 | |
| 37 def write_dsamples_file(records, out_file, study_label): | |
| 38 | |
| 39 def parse_run_directoty(run_dir): | |
| 40 _, device, _, flowcell = run_dir.split('_') | |
| 41 return device, flowcell[1:] | |
| 42 | |
| 43 with open(out_file, 'w') as ofile: | |
| 44 out_file_header = ['study', 'label', 'source', 'source_type', 'seq_dsample_type', | |
| 45 'status', 'device'] | |
| 46 writer = csv.DictWriter(ofile, out_file_header, delimiter='\t') | |
| 47 writer.writeheader() | |
| 48 for r in records: | |
| 49 device, flowcell = parse_run_directoty(r) | |
| 50 writer.writerow({'study': study_label, | |
| 51 'label': r, | |
| 52 'source': flowcell, | |
| 53 'source_type': 'FlowCell', | |
| 54 'seq_dsample_type': 'SequencerOutput', | |
| 55 'status': 'USABLE', | |
| 56 'device': device}) | |
| 57 | |
| 58 | |
| 59 def write_dobjects_file(records, out_file, study_label): | |
| 60 with open(out_file, 'w') as ofile: | |
| 61 out_file_header = ['study', 'path', 'data_sample', 'mimetype', 'size', 'sha1'] | |
| 62 writer = csv.DictWriter(ofile, out_file_header, delimiter='\t') | |
| 63 writer.writeheader() | |
| 64 for r in records: | |
| 65 writer.writerow({'study': study_label, | |
| 66 'path': r['path'], | |
| 67 'data_sample': r['run_directory'], | |
| 68 'mimetype': 'x-vl/illumina-run-folder', | |
| 69 'size': '-1', | |
| 70 'sha1': 'N.A.'}) | |
| 71 | |
| 72 | |
| 73 def main(argv): | |
| 74 parser = make_parser() | |
| 75 args = parser.parse_args(argv) | |
| 76 | |
| 77 log_level = getattr(logging, args.loglevel) | |
| 78 kwargs = {'format': LOG_FORMAT, | |
| 79 'datefmt': LOG_DATEFMT, | |
| 80 'level': log_level} | |
| 81 if args.logfile: | |
| 82 kwargs['filename'] = args.logfile | |
| 83 logging.basicConfig(**kwargs) | |
| 84 logger = logging.getLogger('prepare_seq_dsample_inputs') | |
| 85 | |
| 86 with open(args.in_file) as f: | |
| 87 logger.info('Loading data from file %s', args.in_file) | |
| 88 reader = csv.DictReader(f, delimiter='\t') | |
| 89 recs = [r for r in reader] | |
| 90 | |
| 91 logger.info('Writing DataSample data to file %s', args.dsamples_out_file) | |
| 92 write_dsamples_file(set([r['run_directory'] for r in recs]), | |
| 93 args.dsamples_out_file, args.study) | |
| 94 logger.info('Writing DataObjects data to file %s', args.dobjects_out_file) | |
| 95 write_dobjects_file(recs, args.dobjects_out_file, args.study) | |
| 96 | |
| 97 | |
| 98 if __name__ == '__main__': | |
| 99 main(sys.argv[1:]) |
