Mercurial > repos > ric > test1
diff galaxy-tools/biobank/utils/prepare_illumina_import_inputs.py @ 3:43be74e62bfe draft
Uploaded
author | ric |
---|---|
date | Thu, 22 Sep 2016 08:57:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-tools/biobank/utils/prepare_illumina_import_inputs.py Thu Sep 22 08:57:04 2016 -0400 @@ -0,0 +1,155 @@ +from bl.core.io.illumina import GenomeStudioSampleSheetReader as gsr +from bl.vl.utils import LOG_LEVELS, get_logger +import csv, argparse, sys, re + + +def make_parser(): + parser = argparse.ArgumentParser('Split GenomeStudio samplesheet in TSV files to import data within OMERO') + parser.add_argument('--logfile', type=str, help='log file (default=stderr)') + parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, + help='logging level', default='INFO') + parser.add_argument('--input-file', type=str, required=True, + help='GenomeStudio samplesheet') + parser.add_argument('--arrays-out-file', type=str, + help='output file containing IlluminaArrayOfArrays definitions', + default='./array_of_arrays.tsv') + parser.add_argument('--bead-chip-out-file', type=str, + help='output file containing IlluminaBeadChipArray definitions', + default='./bead_chip.tsv') + parser.add_argument('--array-measure-out-file', type=str, + help='output file containing IlluminaBeadChipMeasure definitions', + default='./array_measure.tsv') + parser.add_argument('--array-measures-out-file', type=str, + help='output file containing IlluminaBeadChipMeasures definitions', + default='./array_measures.tsv') + parser.add_argument('--study', type=str, required=True, + help='Study label that will be used in the import procedure') + return parser + + +def get_assay_type_enum(manifest_file): + return manifest_file.strip().replace('.bpm', '').replace('-', '_').replace(' ', '_').upper() + + +def prepare_array_of_arrays_input(barcode, study, elements): + ICHIPCORDS_PATTERN = re.compile(r'^r(\d{2})c(\d{2})$', re.IGNORECASE) + rows = [] + cols = [] + for x in elements: + m = re.match(ICHIPCORDS_PATTERN, x['array_label']) + rows.append(int(m.groups()[0])) + cols.append(int(m.groups()[1])) + return { + 'barcode': barcode, + 'rows': max(rows), + 'columns': max(cols), + 'label': barcode, + 'study': study, + } + + +def barcodes_to_labels(elements, wells_map, strict_mapping, logger): + from copy import deepcopy + + mapped_elements = [] + for e in elements: + if e['source'] in wells_map: + new_el = deepcopy(e) + new_el['source'] = wells_map[e['source']] + mapped_elements.append(new_el) + else: + logger.warning('Unable to map well %s' % e['source']) + + if strict_mapping and len(mapped_elements) < len(elements): + msg = 'Mapped %d records of %d' %(len(elements), len(mapped_elements)) + logger.critical(msg) + sys.exit(msg) + return mapped_elements + + +def prepare_bead_chip_array_input(array_barcode, assay_type, study, elements): + return [{ + 'illumina_array': array_barcode, + 'label': x['array_label'], + 'source': x['source'], + 'bead_chip_assay_type': assay_type, + 'study': study, + } for x in elements] + + +def prepare_bead_chip_measure_input(array_barcode, study, elements, + device='generic_illumina_scanner', + status='USABLE'): + records = [] + for channel in ['Grn', 'Red']: + records.extend( + [ + { + 'label': '%s_%s_%s' % (array_barcode, x['array_label'], channel), + 'source': '%s:%s' % (array_barcode, x['array_label']), + 'scanner': device, + 'status': status, + 'study': study, + } for x in elements + ] + ) + return records + + +def prepare_bead_chip_array_measures_input(array_barcode, study, elements): + return [{ + 'study': study, + 'label': '%s_%s' % (array_barcode, x['array_label']), + 'red_channel': '%s_%s_Red' % (array_barcode, x['array_label']), + 'green_channel': '%s_%s_Grn' %(array_barcode, x['array_label']), + 'source': '%s:%s' % (array_barcode, x['array_label']), + } for x in elements] + + +def main(argv): + parser = make_parser() + args = parser.parse_args(argv) + + logger = get_logger('prepare_illumina_import_inputs', level=args.loglevel, + filename=args.logfile) + + logger.info('Processing file %s', args.input_file) + with open(args.input_file) as in_file: + reader = gsr(in_file) + assay_type = get_assay_type_enum(reader.header['A']) + arrays_map = {} + for r in reader: + arrays_map.setdefault(r['SentrixBarcode_A'], []).append({'source': r['Sample_ID'], + 'array_label': r['SentrixPosition_A']}) + with open(args.arrays_out_file, 'w') as array_file,\ + open(args.bead_chip_out_file, 'w') as chip_file,\ + open(args.array_measures_out_file, 'w') as measures_file,\ + open(args.array_measure_out_file, 'w') as measure_file: + arrays_writer = csv.DictWriter(array_file, + ['study', 'label', 'barcode', 'rows', 'columns'], + delimiter='\t') + arrays_writer.writeheader() + chip_writer = csv.DictWriter(chip_file, + ['study', 'illumina_array', 'label', 'source', + 'bead_chip_assay_type'], + delimiter='\t') + chip_writer.writeheader() + measure_writer = csv.DictWriter(measure_file, + ['study', 'label', 'source', 'scanner', 'status'], + delimiter='\t') + measure_writer.writeheader() + measures_writer = csv.DictWriter(measures_file, + ['study', 'label', 'red_channel', 'green_channel', + 'source'], + delimiter='\t') + measures_writer.writeheader() + for k, v in arrays_map.iteritems(): + arrays_writer.writerow(prepare_array_of_arrays_input(k, args.study, v)) + chip_writer.writerows(prepare_bead_chip_array_input(k, assay_type, args.study, v)) + measure_writer.writerows(prepare_bead_chip_measure_input(k, args.study, v)) + measures_writer.writerows(prepare_bead_chip_array_measures_input(k, args.study, v)) + logger.info('Job completed') + + +if __name__ == '__main__': + main(sys.argv[1:]) \ No newline at end of file