annotate galaxy-tools/biobank/utils/prepare_seq_dsample_inputs.py @ 3:43be74e62bfe draft

Uploaded
author ric
date Thu, 22 Sep 2016 08:57:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
43be74e62bfe Uploaded
ric
parents:
diff changeset
1 """
43be74e62bfe Uploaded
ric
parents:
diff changeset
2 This tool produces files that can be used as input to import
43be74e62bfe Uploaded
ric
parents:
diff changeset
3 * samples
43be74e62bfe Uploaded
ric
parents:
diff changeset
4 * flowcells
43be74e62bfe Uploaded
ric
parents:
diff changeset
5 * lanes
43be74e62bfe Uploaded
ric
parents:
diff changeset
6 * laneslots
43be74e62bfe Uploaded
ric
parents:
diff changeset
7 within OMERO.biobank using import applications.
43be74e62bfe Uploaded
ric
parents:
diff changeset
8 If the optional 'study-output-file' parameter is given as input, the
43be74e62bfe Uploaded
ric
parents:
diff changeset
9 script will produce the input file for a new study definition.
43be74e62bfe Uploaded
ric
parents:
diff changeset
10 If the optional 'tubes-subsamples-output-file' is given, the script
43be74e62bfe Uploaded
ric
parents:
diff changeset
11 will generate another file with tubes definitions where each tube is
43be74e62bfe Uploaded
ric
parents:
diff changeset
12 produced appliying a specific laboratory protocol to an existing
43be74e62bfe Uploaded
ric
parents:
diff changeset
13 tube. Existing tubes are the ones in tubes-out-file, new tubes' labels
43be74e62bfe Uploaded
ric
parents:
diff changeset
14 are created using the pattern <tube_label>::<protocol>
43be74e62bfe Uploaded
ric
parents:
diff changeset
15 The config_parameters field must point to a YAML configuration file
43be74e62bfe Uploaded
ric
parents:
diff changeset
16 with the following structure:
43be74e62bfe Uploaded
ric
parents:
diff changeset
17
43be74e62bfe Uploaded
ric
parents:
diff changeset
18 config_parameters:
43be74e62bfe Uploaded
ric
parents:
diff changeset
19 study_label: study_label
43be74e62bfe Uploaded
ric
parents:
diff changeset
20 namespace: namespace
43be74e62bfe Uploaded
ric
parents:
diff changeset
21
43be74e62bfe Uploaded
ric
parents:
diff changeset
22 where study_label is mandatory
43be74e62bfe Uploaded
ric
parents:
diff changeset
23 """
43be74e62bfe Uploaded
ric
parents:
diff changeset
24
43be74e62bfe Uploaded
ric
parents:
diff changeset
25 import csv, sys, argparse, logging, yaml
43be74e62bfe Uploaded
ric
parents:
diff changeset
26 # Needed to import flowcell data
43be74e62bfe Uploaded
ric
parents:
diff changeset
27 from bioblend.galaxy import GalaxyInstance
43be74e62bfe Uploaded
ric
parents:
diff changeset
28 import nglimsclient, os
43be74e62bfe Uploaded
ric
parents:
diff changeset
29
43be74e62bfe Uploaded
ric
parents:
diff changeset
30 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
43be74e62bfe Uploaded
ric
parents:
diff changeset
31 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
43be74e62bfe Uploaded
ric
parents:
diff changeset
32 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
43be74e62bfe Uploaded
ric
parents:
diff changeset
33
43be74e62bfe Uploaded
ric
parents:
diff changeset
34 def make_parser():
43be74e62bfe Uploaded
ric
parents:
diff changeset
35 parser = argparse.ArgumentParser(description='split sequencing samplesheet')
43be74e62bfe Uploaded
ric
parents:
diff changeset
36 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
43be74e62bfe Uploaded
ric
parents:
diff changeset
37 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
43be74e62bfe Uploaded
ric
parents:
diff changeset
38 help='logging level', default='INFO')
43be74e62bfe Uploaded
ric
parents:
diff changeset
39 parser.add_argument('--in-file', '-i', type=str, required=True,
43be74e62bfe Uploaded
ric
parents:
diff changeset
40 help='input file')
43be74e62bfe Uploaded
ric
parents:
diff changeset
41 parser.add_argument('--tubes-out-file', type=str,
43be74e62bfe Uploaded
ric
parents:
diff changeset
42 help='output file containing tube definitions',
43be74e62bfe Uploaded
ric
parents:
diff changeset
43 default='./tubes_def.tsv')
43be74e62bfe Uploaded
ric
parents:
diff changeset
44 parser.add_argument('--flowcells-out-file', type=str,
43be74e62bfe Uploaded
ric
parents:
diff changeset
45 help='output file containing flowcell definitions',
43be74e62bfe Uploaded
ric
parents:
diff changeset
46 default='./flowcells_def.tsv')
43be74e62bfe Uploaded
ric
parents:
diff changeset
47 parser.add_argument('--lanes-out-file', type=str,
43be74e62bfe Uploaded
ric
parents:
diff changeset
48 help='output file containing lane definitions',
43be74e62bfe Uploaded
ric
parents:
diff changeset
49 default='./lanes_def.tsv')
43be74e62bfe Uploaded
ric
parents:
diff changeset
50 parser.add_argument('--laneslots-out-file', type=str,
43be74e62bfe Uploaded
ric
parents:
diff changeset
51 help='output file containing laneslot definitions',
43be74e62bfe Uploaded
ric
parents:
diff changeset
52 default='./laneslots_def.tsv')
43be74e62bfe Uploaded
ric
parents:
diff changeset
53 parser.add_argument('--config-parameters', type=str, required=True,
43be74e62bfe Uploaded
ric
parents:
diff changeset
54 help='a YAML configuration file containing study label and labels namespace, '
43be74e62bfe Uploaded
ric
parents:
diff changeset
55 'namespace is optional')
43be74e62bfe Uploaded
ric
parents:
diff changeset
56 parser.add_argument('--study-output-file', type=str,
43be74e62bfe Uploaded
ric
parents:
diff changeset
57 help='output file containing study definition')
43be74e62bfe Uploaded
ric
parents:
diff changeset
58 parser.add_argument('--tubes-subsamples-output-file', type=str,
43be74e62bfe Uploaded
ric
parents:
diff changeset
59 help='output file containing tubes subsamples (samples produced applying a '
43be74e62bfe Uploaded
ric
parents:
diff changeset
60 'laboratory protocol to existing samples)')
43be74e62bfe Uploaded
ric
parents:
diff changeset
61 return parser
43be74e62bfe Uploaded
ric
parents:
diff changeset
62
43be74e62bfe Uploaded
ric
parents:
diff changeset
63
43be74e62bfe Uploaded
ric
parents:
diff changeset
64 def get_samplesheet_translator(samplesheet_type='default'):
43be74e62bfe Uploaded
ric
parents:
diff changeset
65 translator = {'default': {'flowcell_id': 'FCID',
43be74e62bfe Uploaded
ric
parents:
diff changeset
66 'tube_id': 'SampleID',
43be74e62bfe Uploaded
ric
parents:
diff changeset
67 'lane_id': 'Lane',
43be74e62bfe Uploaded
ric
parents:
diff changeset
68 'sample_tag': 'Index',
43be74e62bfe Uploaded
ric
parents:
diff changeset
69 'protocol': 'Recipe',
43be74e62bfe Uploaded
ric
parents:
diff changeset
70 'operator': 'Operator',
43be74e62bfe Uploaded
ric
parents:
diff changeset
71 'sample_project': 'SampleProject'}
43be74e62bfe Uploaded
ric
parents:
diff changeset
72 }
43be74e62bfe Uploaded
ric
parents:
diff changeset
73 return translator[samplesheet_type]
43be74e62bfe Uploaded
ric
parents:
diff changeset
74
43be74e62bfe Uploaded
ric
parents:
diff changeset
75 def add_namespace(namespace, label, separator='|'):
43be74e62bfe Uploaded
ric
parents:
diff changeset
76 return separator.join([namespace, label])
43be74e62bfe Uploaded
ric
parents:
diff changeset
77
43be74e62bfe Uploaded
ric
parents:
diff changeset
78 def write_tubes_file(records, study_label, translator, ofile,
43be74e62bfe Uploaded
ric
parents:
diff changeset
79 namespace = None, logger = None):
43be74e62bfe Uploaded
ric
parents:
diff changeset
80 ofile_fields = ['study', 'label', 'vessel_type', 'vessel_content',
43be74e62bfe Uploaded
ric
parents:
diff changeset
81 'vessel_status', 'source', 'source_type']
43be74e62bfe Uploaded
ric
parents:
diff changeset
82 with open(ofile, 'w') as out_file:
43be74e62bfe Uploaded
ric
parents:
diff changeset
83 writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
43be74e62bfe Uploaded
ric
parents:
diff changeset
84 writer.writeheader()
43be74e62bfe Uploaded
ric
parents:
diff changeset
85 tubes_def = set([r[translator['tube_id'].strip()] for r in records])
43be74e62bfe Uploaded
ric
parents:
diff changeset
86 for x in tubes_def:
43be74e62bfe Uploaded
ric
parents:
diff changeset
87 writer.writerow({'study' : study_label,
43be74e62bfe Uploaded
ric
parents:
diff changeset
88 'label' : x if not namespace else add_namespace(namespace, x),
43be74e62bfe Uploaded
ric
parents:
diff changeset
89 'vessel_type' : 'Tube',
43be74e62bfe Uploaded
ric
parents:
diff changeset
90 'vessel_content' : 'DNA',
43be74e62bfe Uploaded
ric
parents:
diff changeset
91 'vessel_status' : 'UNKNOWN',
43be74e62bfe Uploaded
ric
parents:
diff changeset
92 'source' : 'None',
43be74e62bfe Uploaded
ric
parents:
diff changeset
93 'source_type' : 'NO_SOURCE'})
43be74e62bfe Uploaded
ric
parents:
diff changeset
94
43be74e62bfe Uploaded
ric
parents:
diff changeset
95
43be74e62bfe Uploaded
ric
parents:
diff changeset
96 def write_subsamples_file(records, study_label, translator, ofile,
43be74e62bfe Uploaded
ric
parents:
diff changeset
97 namespace = None, logger = None):
43be74e62bfe Uploaded
ric
parents:
diff changeset
98 ofile_fields = ['study', 'label', 'vessel_type', 'vessel_content',
43be74e62bfe Uploaded
ric
parents:
diff changeset
99 'vessel_status', 'source', 'source_type', 'options']
43be74e62bfe Uploaded
ric
parents:
diff changeset
100 with open(ofile, 'w') as out_file:
43be74e62bfe Uploaded
ric
parents:
diff changeset
101 writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
43be74e62bfe Uploaded
ric
parents:
diff changeset
102 writer.writeheader()
43be74e62bfe Uploaded
ric
parents:
diff changeset
103 subsamples_def = set([('%s::%s' % (r[translator['tube_id']].strip(), r[translator['protocol']].strip()),
43be74e62bfe Uploaded
ric
parents:
diff changeset
104 r[translator['tube_id']].strip(),
43be74e62bfe Uploaded
ric
parents:
diff changeset
105 r[translator['protocol']].strip()) for r in records])
43be74e62bfe Uploaded
ric
parents:
diff changeset
106 for x in subsamples_def:
43be74e62bfe Uploaded
ric
parents:
diff changeset
107 writer.writerow({'study' : study_label,
43be74e62bfe Uploaded
ric
parents:
diff changeset
108 'label' : x[0] if not namespace else add_namespace(namespace, x[0]),
43be74e62bfe Uploaded
ric
parents:
diff changeset
109 'vessel_type' : 'Tube',
43be74e62bfe Uploaded
ric
parents:
diff changeset
110 'vessel_content' : 'DNA',
43be74e62bfe Uploaded
ric
parents:
diff changeset
111 'vessel_status' : 'UNKNOWN',
43be74e62bfe Uploaded
ric
parents:
diff changeset
112 'source' : x[1] if not namespace else add_namespace(namespace, x[1]),
43be74e62bfe Uploaded
ric
parents:
diff changeset
113 'source_type' : 'Tube',
43be74e62bfe Uploaded
ric
parents:
diff changeset
114 'options' : 'protocol=%s' % x[2]})
43be74e62bfe Uploaded
ric
parents:
diff changeset
115
43be74e62bfe Uploaded
ric
parents:
diff changeset
116
43be74e62bfe Uploaded
ric
parents:
diff changeset
117 def write_flowcells_file(records, study_label, translator, ofile,
43be74e62bfe Uploaded
ric
parents:
diff changeset
118 namespace = None, logger=None):
43be74e62bfe Uploaded
ric
parents:
diff changeset
119 ofile_fields = ['study', 'label', 'barcode', 'container_status',
43be74e62bfe Uploaded
ric
parents:
diff changeset
120 'number_of_slots']
43be74e62bfe Uploaded
ric
parents:
diff changeset
121 with open(ofile, 'w') as out_file:
43be74e62bfe Uploaded
ric
parents:
diff changeset
122 writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
43be74e62bfe Uploaded
ric
parents:
diff changeset
123 writer.writeheader()
43be74e62bfe Uploaded
ric
parents:
diff changeset
124 flowcells_def = set([(r[translator['flowcell_id']].strip()) for r in records])
43be74e62bfe Uploaded
ric
parents:
diff changeset
125 for x in flowcells_def:
43be74e62bfe Uploaded
ric
parents:
diff changeset
126 writer.writerow({'study' : study_label,
43be74e62bfe Uploaded
ric
parents:
diff changeset
127 'label' : x if not namespace else add_namespace(namespace, x),
43be74e62bfe Uploaded
ric
parents:
diff changeset
128 'barcode' : x if not namespace else add_namespace(namespace, x),
43be74e62bfe Uploaded
ric
parents:
diff changeset
129 'container_status' : 'INSTOCK',
43be74e62bfe Uploaded
ric
parents:
diff changeset
130 'number_of_slots' : '8'})
43be74e62bfe Uploaded
ric
parents:
diff changeset
131
43be74e62bfe Uploaded
ric
parents:
diff changeset
132
43be74e62bfe Uploaded
ric
parents:
diff changeset
133 def write_lanes_file(records, study_label, translator, ofile,
43be74e62bfe Uploaded
ric
parents:
diff changeset
134 namespace = None, logger=None):
43be74e62bfe Uploaded
ric
parents:
diff changeset
135 ofile_fields = ['study', 'flow_cell', 'slot', 'container_status']
43be74e62bfe Uploaded
ric
parents:
diff changeset
136 with open(ofile, 'w') as out_file:
43be74e62bfe Uploaded
ric
parents:
diff changeset
137 writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
43be74e62bfe Uploaded
ric
parents:
diff changeset
138 writer.writeheader()
43be74e62bfe Uploaded
ric
parents:
diff changeset
139 lanes_def = set([(r[translator['flowcell_id']].strip(),
43be74e62bfe Uploaded
ric
parents:
diff changeset
140 r[translator['lane_id']].strip())
43be74e62bfe Uploaded
ric
parents:
diff changeset
141 for r in records])
43be74e62bfe Uploaded
ric
parents:
diff changeset
142 for x in lanes_def:
43be74e62bfe Uploaded
ric
parents:
diff changeset
143 writer.writerow({'study' : study_label,
43be74e62bfe Uploaded
ric
parents:
diff changeset
144 'flow_cell' : x[0] if not namespace else add_namespace(namespace, x[0]),
43be74e62bfe Uploaded
ric
parents:
diff changeset
145 'slot' : x[1],
43be74e62bfe Uploaded
ric
parents:
diff changeset
146 'container_status' : 'INSTOCK'})
43be74e62bfe Uploaded
ric
parents:
diff changeset
147
43be74e62bfe Uploaded
ric
parents:
diff changeset
148
43be74e62bfe Uploaded
ric
parents:
diff changeset
149 def write_laneslots_file(records, study_label, translator, ofile,
43be74e62bfe Uploaded
ric
parents:
diff changeset
150 subsamples_enabled=False,
43be74e62bfe Uploaded
ric
parents:
diff changeset
151 namespace = None, logger=None):
43be74e62bfe Uploaded
ric
parents:
diff changeset
152 logger.debug ('subsamples_ensabled: %r' % subsamples_enabled)
43be74e62bfe Uploaded
ric
parents:
diff changeset
153 ofile_fields = ['study', 'lane', 'tag', 'content', 'source',
43be74e62bfe Uploaded
ric
parents:
diff changeset
154 'source_type', 'options']
43be74e62bfe Uploaded
ric
parents:
diff changeset
155 # Get NGLIMS host and key
43be74e62bfe Uploaded
ric
parents:
diff changeset
156 try:
43be74e62bfe Uploaded
ric
parents:
diff changeset
157 galaxy_host = os.environ['NGLIMS_GALAXY_HOST']
43be74e62bfe Uploaded
ric
parents:
diff changeset
158 api_key = os.environ['NGLIMS_GALAXY_API_KEY']
43be74e62bfe Uploaded
ric
parents:
diff changeset
159 except KeyError as ke:
43be74e62bfe Uploaded
ric
parents:
diff changeset
160 msg = 'No environment variables %s set to configure access to the Galaxy server' % ke
43be74e62bfe Uploaded
ric
parents:
diff changeset
161 sys.exit(msg)
43be74e62bfe Uploaded
ric
parents:
diff changeset
162 # Get flowcell label (assuming label is the same for all records)
43be74e62bfe Uploaded
ric
parents:
diff changeset
163 fc_id = records[0][translator['flowcell_id']].strip()
43be74e62bfe Uploaded
ric
parents:
diff changeset
164 # Get flowcell details from nglims
43be74e62bfe Uploaded
ric
parents:
diff changeset
165 gi = nglimsclient.setup(GalaxyInstance(galaxy_host, api_key))
43be74e62bfe Uploaded
ric
parents:
diff changeset
166 if gi.nglims.exists_flowcell_id(fc_id):
43be74e62bfe Uploaded
ric
parents:
diff changeset
167 fc_data = gi.nglims.flowcell_complete_details(fc_id)
43be74e62bfe Uploaded
ric
parents:
diff changeset
168 with open(ofile, 'w') as out_file:
43be74e62bfe Uploaded
ric
parents:
diff changeset
169 writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
43be74e62bfe Uploaded
ric
parents:
diff changeset
170 writer.writeheader()
43be74e62bfe Uploaded
ric
parents:
diff changeset
171 laneslots_def = set()
43be74e62bfe Uploaded
ric
parents:
diff changeset
172 for r in records:
43be74e62bfe Uploaded
ric
parents:
diff changeset
173 fc_id = r[translator['flowcell_id']].strip() if not namespace else \
43be74e62bfe Uploaded
ric
parents:
diff changeset
174 add_namespace(namespace, r[translator['flowcell_id']]).strip()
43be74e62bfe Uploaded
ric
parents:
diff changeset
175 if subsamples_enabled:
43be74e62bfe Uploaded
ric
parents:
diff changeset
176 source_tube_id = '%s::%s' % (r[translator['tube_id']].strip(),
43be74e62bfe Uploaded
ric
parents:
diff changeset
177 r[translator['protocol']].strip())
43be74e62bfe Uploaded
ric
parents:
diff changeset
178 else:
43be74e62bfe Uploaded
ric
parents:
diff changeset
179 source_tube_id = r[translator['tube_id']].strip()
43be74e62bfe Uploaded
ric
parents:
diff changeset
180 # Identify adapter
43be74e62bfe Uploaded
ric
parents:
diff changeset
181 adapter = [i['adapter'] for i in fc_data['details'] if i['name']==r[translator['tube_id']].strip() and i['lane']==int(r[translator['lane_id']].strip())]
43be74e62bfe Uploaded
ric
parents:
diff changeset
182 laneslots_def.add(('%s:%s' % (fc_id, r[translator['lane_id']].strip()),
43be74e62bfe Uploaded
ric
parents:
diff changeset
183 r[translator['sample_tag']].strip(),
43be74e62bfe Uploaded
ric
parents:
diff changeset
184 source_tube_id,
43be74e62bfe Uploaded
ric
parents:
diff changeset
185 r[translator['protocol']].strip(),
43be74e62bfe Uploaded
ric
parents:
diff changeset
186 r[translator['operator']].strip(),
43be74e62bfe Uploaded
ric
parents:
diff changeset
187 r[translator['sample_project']].strip(),
43be74e62bfe Uploaded
ric
parents:
diff changeset
188 adapter[0]))
43be74e62bfe Uploaded
ric
parents:
diff changeset
189 for x in laneslots_def:
43be74e62bfe Uploaded
ric
parents:
diff changeset
190 writer.writerow({'study' : study_label,
43be74e62bfe Uploaded
ric
parents:
diff changeset
191 'lane' : x[0],
43be74e62bfe Uploaded
ric
parents:
diff changeset
192 'tag' : x[1],
43be74e62bfe Uploaded
ric
parents:
diff changeset
193 'content' : 'DNA',
43be74e62bfe Uploaded
ric
parents:
diff changeset
194 'source' : x[2] if not namespace else \
43be74e62bfe Uploaded
ric
parents:
diff changeset
195 add_namespace(namespace, x[2]),
43be74e62bfe Uploaded
ric
parents:
diff changeset
196 'source_type' : 'Tube',
43be74e62bfe Uploaded
ric
parents:
diff changeset
197 'options' : 'protocol=%s,operator=%s,sample_project=%s,adapter=%s' %
43be74e62bfe Uploaded
ric
parents:
diff changeset
198 (x[3], x[4], x[5], x[6])})
43be74e62bfe Uploaded
ric
parents:
diff changeset
199
43be74e62bfe Uploaded
ric
parents:
diff changeset
200
43be74e62bfe Uploaded
ric
parents:
diff changeset
201 def write_study_file(study_label, records, translator, ofile, logger=None):
43be74e62bfe Uploaded
ric
parents:
diff changeset
202 ofile_fields = ['label', 'description']
43be74e62bfe Uploaded
ric
parents:
diff changeset
203 with open(ofile, 'w') as out_file:
43be74e62bfe Uploaded
ric
parents:
diff changeset
204 writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t', )
43be74e62bfe Uploaded
ric
parents:
diff changeset
205 writer.writeheader()
43be74e62bfe Uploaded
ric
parents:
diff changeset
206 writer.writerow({'label': study_label})
43be74e62bfe Uploaded
ric
parents:
diff changeset
207
43be74e62bfe Uploaded
ric
parents:
diff changeset
208
43be74e62bfe Uploaded
ric
parents:
diff changeset
209 def main(argv):
43be74e62bfe Uploaded
ric
parents:
diff changeset
210 parser = make_parser()
43be74e62bfe Uploaded
ric
parents:
diff changeset
211 args = parser.parse_args(argv)
43be74e62bfe Uploaded
ric
parents:
diff changeset
212
43be74e62bfe Uploaded
ric
parents:
diff changeset
213 log_level = getattr(logging, args.loglevel)
43be74e62bfe Uploaded
ric
parents:
diff changeset
214 kwargs = {'format' : LOG_FORMAT,
43be74e62bfe Uploaded
ric
parents:
diff changeset
215 'datefmt' : LOG_DATEFMT,
43be74e62bfe Uploaded
ric
parents:
diff changeset
216 'level' : log_level}
43be74e62bfe Uploaded
ric
parents:
diff changeset
217 if args.logfile:
43be74e62bfe Uploaded
ric
parents:
diff changeset
218 kwargs['filename'] = args.logfile
43be74e62bfe Uploaded
ric
parents:
diff changeset
219 logging.basicConfig(**kwargs)
43be74e62bfe Uploaded
ric
parents:
diff changeset
220 logger = logging.getLogger('prepare_seq_dsample_inputs')
43be74e62bfe Uploaded
ric
parents:
diff changeset
221
43be74e62bfe Uploaded
ric
parents:
diff changeset
222 with open(args.in_file, 'rU') as f:
43be74e62bfe Uploaded
ric
parents:
diff changeset
223 logger.info('Loading data from file %s' % args.in_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
224 reader = csv.DictReader(f, delimiter='\t')
43be74e62bfe Uploaded
ric
parents:
diff changeset
225 recs = [r for r in reader]
43be74e62bfe Uploaded
ric
parents:
diff changeset
226 translator = get_samplesheet_translator()
43be74e62bfe Uploaded
ric
parents:
diff changeset
227
43be74e62bfe Uploaded
ric
parents:
diff changeset
228 with open(args.config_parameters) as cfgf:
43be74e62bfe Uploaded
ric
parents:
diff changeset
229 conf = yaml.load(cfgf)
43be74e62bfe Uploaded
ric
parents:
diff changeset
230 if not conf.has_key('config_parameters'):
43be74e62bfe Uploaded
ric
parents:
diff changeset
231 raise RuntimeError('Bad configuration file')
43be74e62bfe Uploaded
ric
parents:
diff changeset
232 else:
43be74e62bfe Uploaded
ric
parents:
diff changeset
233 try:
43be74e62bfe Uploaded
ric
parents:
diff changeset
234 study_label = conf['config_parameters']['study_label']
43be74e62bfe Uploaded
ric
parents:
diff changeset
235 except KeyError:
43be74e62bfe Uploaded
ric
parents:
diff changeset
236 raise RuntimeError('No study_label provided')
43be74e62bfe Uploaded
ric
parents:
diff changeset
237 if conf['config_parameters'].has_key('namespace'):
43be74e62bfe Uploaded
ric
parents:
diff changeset
238 namespace = conf['config_parameters']['namespace']
43be74e62bfe Uploaded
ric
parents:
diff changeset
239 else:
43be74e62bfe Uploaded
ric
parents:
diff changeset
240 namespace = None
43be74e62bfe Uploaded
ric
parents:
diff changeset
241
43be74e62bfe Uploaded
ric
parents:
diff changeset
242 if args.study_output_file:
43be74e62bfe Uploaded
ric
parents:
diff changeset
243 logger.info('Writing Study definition file %s' % args.study_output_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
244 write_study_file(study_label, recs, translator, args.study_output_file, logger)
43be74e62bfe Uploaded
ric
parents:
diff changeset
245 logger.info('Done writing file %s' % args.study_output_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
246
43be74e62bfe Uploaded
ric
parents:
diff changeset
247 logger.info('Writing Tube definitions file %s' % args.tubes_out_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
248 write_tubes_file(recs, study_label, translator,
43be74e62bfe Uploaded
ric
parents:
diff changeset
249 args.tubes_out_file, namespace,
43be74e62bfe Uploaded
ric
parents:
diff changeset
250 logger)
43be74e62bfe Uploaded
ric
parents:
diff changeset
251 logger.info('Done writing file %s' % args.tubes_out_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
252
43be74e62bfe Uploaded
ric
parents:
diff changeset
253 if args.tubes_subsamples_output_file:
43be74e62bfe Uploaded
ric
parents:
diff changeset
254 logger.info('Writing Tubes\' subsamples definitions file %s' \
43be74e62bfe Uploaded
ric
parents:
diff changeset
255 % args.tubes_subsamples_output_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
256 write_subsamples_file(recs, study_label, translator,
43be74e62bfe Uploaded
ric
parents:
diff changeset
257 args.tubes_subsamples_output_file,
43be74e62bfe Uploaded
ric
parents:
diff changeset
258 namespace, logger)
43be74e62bfe Uploaded
ric
parents:
diff changeset
259 logger.info('Done writing file %s' % args.tubes_subsamples_output_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
260
43be74e62bfe Uploaded
ric
parents:
diff changeset
261 logger.info('Writing FlowCell definitions file %s' % args.flowcells_out_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
262 write_flowcells_file(recs, study_label, translator,
43be74e62bfe Uploaded
ric
parents:
diff changeset
263 args.flowcells_out_file, namespace,
43be74e62bfe Uploaded
ric
parents:
diff changeset
264 logger)
43be74e62bfe Uploaded
ric
parents:
diff changeset
265 logger.info('Done writing file %s' % args.flowcells_out_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
266
43be74e62bfe Uploaded
ric
parents:
diff changeset
267 logger.info('Writing Lane definitions file %s' % args.lanes_out_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
268 write_lanes_file(recs, study_label, translator,
43be74e62bfe Uploaded
ric
parents:
diff changeset
269 args.lanes_out_file, namespace,
43be74e62bfe Uploaded
ric
parents:
diff changeset
270 logger)
43be74e62bfe Uploaded
ric
parents:
diff changeset
271 logger.info('Done writing file %s' % args.lanes_out_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
272
43be74e62bfe Uploaded
ric
parents:
diff changeset
273 logger.info('Writing LaneSlot definitions file %s' % args.laneslots_out_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
274 write_laneslots_file(recs, study_label, translator,
43be74e62bfe Uploaded
ric
parents:
diff changeset
275 args.laneslots_out_file,
43be74e62bfe Uploaded
ric
parents:
diff changeset
276 'tubes_subsamples_output_file' in args, # Check if subsamples have been created
43be74e62bfe Uploaded
ric
parents:
diff changeset
277 namespace,
43be74e62bfe Uploaded
ric
parents:
diff changeset
278 logger)
43be74e62bfe Uploaded
ric
parents:
diff changeset
279 logger.info('Done writing file %s' % args.laneslots_out_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
280
43be74e62bfe Uploaded
ric
parents:
diff changeset
281
43be74e62bfe Uploaded
ric
parents:
diff changeset
282 if __name__ == '__main__':
43be74e62bfe Uploaded
ric
parents:
diff changeset
283 main(sys.argv[1:])