annotate galaxy-tools/biobank/utils/prepare_aligned_seq_dsample_import.py @ 0:ba6cf6ede027 draft default tip

Uploaded
author ric
date Wed, 28 Sep 2016 06:03:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
1 import csv, sys, argparse, logging
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
2
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
3 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
4 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
5 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
6
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
7
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
8 def make_parser():
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
9 parser = argparse.ArgumentParser(description='build aligned seq data sample import files')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
10 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
11 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
12 help='logging level', default='INFO')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
13 parser.add_argument('--study', type=str, required=True,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
14 help='study')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
15 parser.add_argument('--label', type=str, required=True,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
16 help='label')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
17 parser.add_argument('--source', type=str, required=True,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
18 help='source')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
19 parser.add_argument('--device', type=str, required=True,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
20 help='device')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
21 parser.add_argument('--path', type=str, required=True,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
22 help='path')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
23 parser.add_argument('--sample', type=str, required=True,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
24 help='sample')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
25 parser.add_argument('--genome_reference', type=str, required=True,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
26 help='genome reference')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
27 parser.add_argument('--dsample_ofile', type=str, default='./genome_variations_dsample.tsv',
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
28 help='output file containing data samples definitions')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
29 parser.add_argument('--dobject_ofile', type=str, default='./genome_variations_dobject.tsv',
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
30 help='output file containing data samples definitions')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
31
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
32 return parser
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
33
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
34 def main(argv):
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
35 parser = make_parser()
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
36 args = parser.parse_args(argv)
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
37
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
38 log_level = getattr(logging, args.loglevel)
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
39 kwargs = {'format': LOG_FORMAT,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
40 'datefmt': LOG_DATEFMT,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
41 'level': log_level}
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
42 if args.logfile:
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
43 kwargs['filename'] = args.logfile
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
44 logging.basicConfig(**kwargs)
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
45 logger = logging.getLogger('prepare_aligned_seq_dsample_inport')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
46
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
47
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
48 with open(args.dsample_ofile, 'w') as ofile:
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
49 out_file_header = ['study', 'label', 'source', 'source_type', 'seq_dsample_type', 'status', 'device', 'sample', 'genome_reference']
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
50 writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
51 writer.writeheader()
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
52 writer.writerow({'study': args.study,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
53 'label': args.label,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
54 'source': args.source,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
55 'device': args.device,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
56 'source_type' : 'Tube',
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
57 'seq_dsample_type': 'AlignedSeqDataSample',
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
58 'status': 'USABLE',
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
59 'sample' : args.sample,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
60 'genome_reference': args.genome_reference})
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
61 logger.info('Done writing file %s' % args.dsample_ofile)
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
62
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
63 with open(args.dobject_ofile, 'w') as ofile:
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
64 out_file_header = ['study', 'path', 'data_sample', 'mimetype', 'size', 'sha1']
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
65 writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
66 writer.writeheader()
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
67 writer.writerow({'study': args.study,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
68 'path': args.path,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
69 'data_sample': args.label,
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
70 'mimetype': 'x-vl/bam',
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
71 'size': '-1',
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
72 'sha1': 'N.A.'})
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
73 logger.info('Done writing file %s' % args.dobject_ofile)
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
74
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
75 if __name__ == '__main__':
ba6cf6ede027 Uploaded
ric
parents:
diff changeset
76 main(sys.argv[1:])