# HG changeset patch # User ric # Date 1474551045 14400 # Node ID 8924f77c59ee50d521183a21f0337b5033188565 # Parent 8d405ac8a8372dc13da3e36ef12c1fba3391c70f Deleted selected files diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/build_miniped.py --- a/galaxy-tools/biobank/tools/build_miniped.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,155 +0,0 @@ -# BEGIN_COPYRIGHT -# END_COPYRIGHT - -""" -A rough example of basic pedigree info generation. -""" - -import csv, argparse, sys, os - -from bl.vl.kb import KnowledgeBase as KB -from bl.vl.kb.drivers.omero.ehr import EHR -import bl.vl.individual.pedigree as ped -import bl.vl.utils.ome_utils as vlu -from bl.vl.utils import LOG_LEVELS, get_logger - -DIAGNOSIS_ARCH = 'openEHR-EHR-EVALUATION.problem-diagnosis.v1' -DIAGNOSIS_FIELD = 'at0002.1' -T1D_ICD10 = 'icd10-cm:E10' -MS_ICD10 = 'icd10-cm:G35' -NEFRO_ICD10 = 'icd10-cm:E23.2' - -PLINK_MISSING = -9 -PLINK_UNAFFECTED = 1 -PLINK_AFFECTED = 2 - -FIELDS = ["fam_label", "ind_label", "fat_label", "mot_label", "gender", "t1d_status", "ms_status", "nefro_status"] - - -def make_parser(): - parser = argparse.ArgumentParser(description='build the first columns of a ped file from VL') - parser.add_argument('--logfile', type=str, help='log file (default=stderr)') - parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS, - help='logging level', default='INFO') - parser.add_argument('-H', '--host', type=str, help='omero hostname') - parser.add_argument('-U', '--user', type=str, help='omero user') - parser.add_argument('-P', '--passwd', type=str, help='omero password') - parser.add_argument('-S', '--study', type=str, required=True, - help='a list of comma separated studies used to retrieve individuals that will be written to ped file') - parser.add_argument('--ofile', type=str, help='output file path', - required=True) - return parser - -def build_families(individuals, logger): - # Individuals with only one parent will be considered like founders - # for i in individuals: - # if ((i.mother is None) or (i.father is None)): - # i.mother = None - # i.father = None - logger.info("individuals: %d" % len(individuals)) - #logger.info("individuals: with 0 or 2 parents: %d" % len(not_one_parent)) - logger.info("analyzing pedigree") - founders, non_founders, dangling, couples, children = ped.analyze( - individuals - ) - logger.info("splitting into families") - return ped.split_disjoint(individuals, children) - - -def main(argv): - parser = make_parser() - args = parser.parse_args(argv) - - logger = get_logger('build_miniped', level=args.loglevel, - filename=args.logfile) - - try: - host = args.host or vlu.ome_host() - user = args.user or vlu.ome_user() - passwd = args.passwd or vlu.ome_passwd() - except ValueError, ve: - logger.critical(ve) - sys.exit(ve) - - kb = KB(driver='omero')(host, user, passwd) - logger.debug('Loading all individuals from omero') - all_inds = kb.get_objects(kb.Individual) # store all inds to cache - logger.debug('%d individuals loaded' % len(all_inds)) - studies = [kb.get_study(s) for s in args.study.split(',')] - # Removing None values - studies = set(studies) - try: - studies.remove(None) - except KeyError: - pass - studies = list(studies) - if len(studies) == 0: - logger.error('No matches found for labels %s, stopping program' % args.study) - sys.exit(2) - enrolled_map = {} - for study in studies: - logger.info('Loading enrolled individuals for study %s' % study.label) - enrolled = kb.get_enrolled(study) - logger.debug('%d individuals loaded' % len(enrolled)) - for en in enrolled: - if en.individual.id not in enrolled_map: - enrolled_map[en.individual.id] = ('%s:%s' % (en.study.label, en.studyCode), - en.individual) - else: - logger.debug('Individual %s already mapped' % en.individual.id) - logger.debug('Loading EHR records') - ehr_records = kb.get_ehr_records() - logger.debug('%s EHR records loaded' % len(ehr_records)) - ehr_records_map = {} - for r in ehr_records: - ehr_records_map.setdefault(r['i_id'], []).append(r) - affection_map = {} - for ind_id, ehr_recs in ehr_records_map.iteritems(): - affection_map[ind_id] = dict(t1d=PLINK_UNAFFECTED, ms=PLINK_UNAFFECTED, - nefro=PLINK_UNAFFECTED) - ehr = EHR(ehr_recs) - if ehr.matches(DIAGNOSIS_ARCH, DIAGNOSIS_FIELD, T1D_ICD10): - affection_map[ind_id]['t1d'] = PLINK_AFFECTED - if ehr.matches(DIAGNOSIS_ARCH, DIAGNOSIS_FIELD, MS_ICD10): - affection_map[ind_id]['ms'] = PLINK_AFFECTED - if ehr.matches(DIAGNOSIS_ARCH, DIAGNOSIS_FIELD, NEFRO_ICD10): - affection_map[ind_id]['nefro'] = PLINK_AFFECTED - - immuno_inds = [i for (ind_id, (st_code, i)) in enrolled_map.iteritems()] - families = build_families(immuno_inds, logger) - logger.info("found %d families" % len(families)) - - def resolve_label(i): - try: - return enrolled_map[i.id][0] - except KeyError: - return i.id - - def resolve_pheno(i): - try: - immuno_affection = affection_map[i.id] - except KeyError: - return PLINK_MISSING, PLINK_MISSING, PLINK_MISSING - return immuno_affection["t1d"], immuno_affection["ms"], immuno_affection["nefro"] - - kb.Gender.map_enums_values(kb) - gender_map = lambda x: 2 if x == kb.Gender.FEMALE else 1 - - logger.info("writing miniped") - with open(args.ofile, "w") as f: - writer = csv.DictWriter(f, FIELDS, delimiter="\t", lineterminator="\n") - for k, fam in enumerate(families): - fam_label = "FAM_%d" % (k+1) - for i in fam: - r = {} - r["fam_label"] = fam_label - r["ind_label"] = resolve_label(i) - r["fat_label"] = 0 if (i.father is None or i.father not in fam) else resolve_label(i.father) - r["mot_label"] = 0 if (i.mother is None or i.mother not in fam) else resolve_label(i.mother) - r["gender"] = gender_map(i.gender) - r["t1d_status"], r["ms_status"], r["nefro_status"] = resolve_pheno(i) - writer.writerow(r) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/build_miniped.xml --- a/galaxy-tools/biobank/tools/build_miniped.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ - - - Build a reduced ped file from Omero server - - - build_miniped.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - #if $study - --study=${study} - #end if - --ofile=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - It will output a tsv files with a column of codes for each groups of samples. - - The labels of the columns are: - - family - - individual enrollment code (STUDY:CODE) - - father enrollment code (STUDY:CODE) - - mother enrollment code (STUDY:CODE) - - gender - - T1D affection status - - MS affection status - - Nefro affection status - - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/check_merge_individuals.py --- a/galaxy-tools/biobank/tools/check_merge_individuals.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -import sys, csv, argparse, os -from collections import Counter - -from bl.vl.kb import KnowledgeBase as KB -import bl.vl.utils.ome_utils as vlu -from bl.vl.utils import LOG_LEVELS, get_logger - - -def make_parser(): - parser = argparse.ArgumentParser(description='check data that will be passed to the merge_individuals tool') - parser.add_argument('--logfile', type=str, help='log file (default=stderr)') - parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, - help='logging level (default=INFO)', default='INFO') - parser.add_argument('-H', '--host', type=str, help='omero hostname') - parser.add_argument('-U', '--user', type=str, help='omero user') - parser.add_argument('-P', '--passwd', type=str, help='omero password') - parser.add_argument('--in_file', type=str, required=True, - help='input file') - parser.add_argument('--out_file', type=str, required=True, - help='output file') - return parser - - -def get_invalid_vids(records, logger): - records_map = {} - invalid_vids = [] - - for rec in records: - for k,v in rec.iteritems(): - records_map.setdefault(k, []).append(v) - # Check for duplicated sources - ct = Counter() - for x in records_map['source']: - ct[x] += 1 - for k, v in ct.iteritems(): - if v > 1: - logger.error('ID %s appears %d times as source, this ID has been marked as invalid' % (k, v)) - invalid_vids.append(k) - # Check for VIDs that appear bots in 'source' and 'target' fields - sources = set(records_map['source']) - targets = set(records_map['target']) - commons = sources.intersection(targets) - for c in commons: - logger.error('ID %s appears both in \'source\' and \'target\' columns, this ID has been marked as invalid' % c) - invalid_vids.append(c) - - return set(invalid_vids) - - -def check_row(row, individuals, logger): - try: - source = individuals[row['source']] - logger.debug('%s is a valid Individual ID' % source.id) - target = individuals[row['target']] - logger.debug('%s is a valid Individual ID' % target.id) - return True - except KeyError, ke: - logger.error('%s is not a valid Individual ID' % ke) - return False - - -def main(argv): - parser = make_parser() - args = parser.parse_args(argv) - - logger = get_logger('check_merge_individuals', level=args.loglevel, - filename=args.logfile) - - try: - host = args.host or vlu.ome_host() - user = args.user or vlu.ome_user() - passwd = args.passwd or vlu.ome_passwd() - except ValueError, ve: - logger.critical(ve) - sys.exit(ve) - - kb = KB(driver='omero')(host, user, passwd) - - logger.info('Preloading all individuals') - inds = kb.get_objects(kb.Individual) - logger.info('Loaded %d individuals' % len(inds)) - inds_map = {} - for i in inds: - inds_map[i.id] = i - - with open(args.in_file) as infile, open(args.out_file, 'w') as outfile: - reader = csv.DictReader(infile, delimiter='\t') - records = [row for row in reader] - invalid_vids = get_invalid_vids(records, logger) - - writer = csv.DictWriter(outfile, reader.fieldnames, delimiter='\t') - writer.writeheader() - - for record in records: - if record['source'] in invalid_vids or record['target'] in invalid_vids: - logger.error('Skipping record %r because at least one ID was marked as invalid' % record) - else: - if check_row(record, inds_map, logger): - writer.writerow(record) - logger.debug('Record %r written in output file' % record) - - -if __name__ == '__main__': - main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/check_merge_individuals.xml --- a/galaxy-tools/biobank/tools/check_merge_individuals.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ - - - Verify data that will be passed to the merge_individuals tool - - - check_merge_individuals.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - --in_file=${input1} - --out_file=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - - - - - - - - - - - - - - - - - - - - - - - -.. class:: infomark - -**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* - ------ - -It will check merging individuals info using informations from a file like this:: - - source target - V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327 - - - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/check_update_parents_data.py --- a/galaxy-tools/biobank/tools/check_update_parents_data.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -import sys, csv, argparse, logging, os -from collections import Counter - -from bl.vl.kb import KnowledgeBase as KB -import bl.vl.utils.ome_utils as vlu -from bl.vl.utils import get_logger, LOG_LEVELS - - -def make_parser(): - parser = argparse.ArgumentParser(description='check data that will be passed to the update_parents tool') - parser.add_argument('--logfile', type=str, help='log file (default=stderr)') - parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, - help='logging level (default=INFO)', default='INFO') - parser.add_argument('-H', '--host', type=str, help='omero hostname') - parser.add_argument('-U', '--user', type=str, help='omero user') - parser.add_argument('-P', '--passwd', type=str, help='omero password') - parser.add_argument('--in_file', type=str, required=True, - help='input file') - parser.add_argument('--out_file', type=str, required=True, - help='output file') - return parser - - -def check_row(row, individuals_map, kb, logger): - logger.debug('Checking record %r' % row) - try: - ind = individuals_map[row['individual']] - logger.info('%s is a valid Individual ID' % ind.id) - if row['father'] != 'None': - father = individuals_map[row['father']] - logger.info('%s is a valid Individual ID' % father.id) - check_gender(father, kb.Gender.MALE) - logger.info('Gender check passed') - else: - logger.info('None value, no check required') - if row['mother'] != 'None': - mother = individuals_map[row['mother']] - logger.info('%s is a valid Individual ID' % mother.id) - check_gender(mother, kb.Gender.FEMALE) - logger.info('Gender check passed') - else: - logger.info('None value, no check required') - return True - except KeyError, ke: - logger.error('%s is not a valid Individual ID, rejecting row' % ke) - return False - except ValueError, ve: - logger.error(ve) - return False - - -def check_gender(individual, gender): - if individual.gender.enum_label() != gender.enum_label(): - raise ValueError('Gender for individual %s is %s, expected %s, rejecting row' % (individual.id, - individual.gender.enum_label(), - gender.enum_label())) - else: - pass - - -def main(argv): - parser = make_parser() - args = parser.parse_args(argv) - - logger = get_logger('check_update_parents_data', level=args.loglevel, - filename=args.logfile) - - try: - host = args.host or vlu.ome_host() - user = args.user or vlu.ome_user() - passwd = args.passwd or vlu.ome_passwd() - except ValueError, ve: - logger.critical(ve) - sys.exit(ve) - - kb = KB(driver='omero')(host, user, passwd) - - logger.info('Preloading all individuals from the system') - inds = kb.get_objects(kb.Individual) - logger.info('%d individuals loaded' % len(inds)) - inds_lookup = {} - for i in inds: - inds_lookup[i.id] = i - - with open(args.in_file) as infile, open(args.out_file, 'w') as outfile: - reader = csv.DictReader(infile, delimiter='\t') - records = list(reader) - logger.info('Check for duplicated in \'individual\' column') - recs_by_ind = {} - for rec in records: - recs_by_ind.setdefault(rec['individual'], []).append(rec) - ct = Counter() - duplicated = [] - for k,v in recs_by_ind.iteritems(): - if len(v) > 1: - duplicated.append(k) - for dupl in duplicated: - logger.info('Individual %s is a duplicated' % dupl) - for r in recs_by_ind.pop(dupl): - logger.info('Removing record %r' % r) - good_records = sum(recs_by_ind.itervalues(), []) - logger.info('Duplicated check completed') - writer = csv.DictWriter(outfile, reader.fieldnames, delimiter='\t') - writer.writeheader() - logger.info('Checking records') - for row in good_records: - if check_row(row, inds_lookup, kb, logger): - writer.writerow(row) - logger.debug('Record %r written in output file' % row) - logger.info('Records check completed') - -if __name__ == '__main__': - main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/check_update_parents_data.xml --- a/galaxy-tools/biobank/tools/check_update_parents_data.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ - - - Verify data that will be passed to the update_parents tool - - - check_update_parents_data.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - --in_file=${input1} - --out_file=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - - - - - - - - - - - - - - - - - - - - - - - -.. class:: infomark - -**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* - ------ - -It will check parental info of individual using informations from a file like this:: - - individual father mother - V08E18411BC66F4987BCA43EFC6F636224 None None - - - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/convert_sam.py --- a/galaxy-tools/biobank/tools/convert_sam.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -# BEGIN_COPYRIGHT -# END_COPYRIGHT - -import sys -from bl.vl.app.snp_manager.main import main - -main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/convert_sam.xml --- a/galaxy-tools/biobank/tools/convert_sam.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ - - converter - - convert_sam.py --logfile ${log_file} convert_sam -i ${input_file} - -o ${output_file} --reftag ${dbkey} --output-format ${output_type} - ## FIXME: find a way to import the default from the relevant module - --flank-size 125 - - - - - - - - - - - - - - - - - - -**What it does** - -This tool converts SAM alignment data to VL marker alignment or Galaxy -extract genomic DNA input. - -Expects single-end BWA alignment data produced by the previous steps -in the workflow (see markers_to_fastq). - -**NOTE:** if the marker_alignment output format is selected, the -Database/Build property must be set in the input SAM file. - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/drop_parental_info.py --- a/galaxy-tools/biobank/tools/drop_parental_info.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -import sys, csv, argparse, os - -from bl.vl.kb import KnowledgeBase as KB -import bl.vl.utils.ome_utils as vlu -from bl.vl.utils import get_logger, LOG_LEVELS - - -def make_parser(): - parser = argparse.ArgumentParser(description='set parents of the selected individuals to None') - parser.add_argument('--logfile', type=str, help='log file (default=stderr)') - parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, - help='logging level (default=INFO)', default='INFO') - parser.add_argument('-H', '--host', type=str, help='omero hostname') - parser.add_argument('-U', '--user', type=str, help='omero user') - parser.add_argument('-P', '--passwd', type=str, help='omero password') - parser.add_argument('--in_file', type=str, required=True, - help='list of the individuals') - parser.add_argument('--out_file', type=str, required=True, - help='output file') - return parser - - -def main(argv): - parser = make_parser() - args = parser.parse_args(argv) - - logger = get_logger('drop_parental_info', level=args.loglevel, - filename=args.logfile) - - try: - host = args.host or vlu.ome_host() - user = args.user or vlu.ome_user() - passwd = args.passwd or vlu.ome_passwd() - except ValueError, ve: - logger.critical(ve) - sys.exit(ve) - - kb = KB(driver='omero')(host, user, passwd) - - logger.info('Retrieving individuals') - inds = kb.get_objects(kb.Individual) - logger.info('Retrieved %d individuals' % len(inds)) - inds_lookup = {} - for i in inds: - inds_lookup[i.id] = i - - with open(args.in_file) as in_file: - reader = csv.DictReader(in_file, delimiter='\t') - records = [] - for row in reader: - try: - # The 'individual' : inds_lookup[row['individual']].id - # is quite redundant but is a usefull check in order - # to filter wrong VIDs - record = {'individual' : inds_lookup[row['individual']].id, - 'father' : 'None', - 'mother' : 'None'} - records.append(record) - except KeyError, ke: - logger.warning('Individual with VID %s does not exist, skipping line' % ke) - - with open(args.out_file, 'w') as out_file: - writer = csv.DictWriter(out_file, ['individual', 'father', 'mother'], - delimiter = '\t') - writer.writeheader() - writer.writerows(records) - -if __name__ == '__main__': - main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/drop_parental_info.xml --- a/galaxy-tools/biobank/tools/drop_parental_info.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ - - - Remove parental info of individuals - - - drop_parental_info.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - --in_file=${input1} - --out_file=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - - - - - - - - - - - - - - - - - - - - - - - - -It will remove parental info of individual using informations from a file like this:: - - individual - V08E18411BC66F4987BCA43EFC6F636224 - -and build a tsv file like this:: - - individual father mother - V08E18411BC66F4987BCA43EFC6F636224 None None - ------ - -.. class:: warningmark - -Note that galaxy don't recognize a tsv file with just one column like a tabular file, so need to be converted by hand - - - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/enrollments_by_platewells.py --- a/galaxy-tools/biobank/tools/enrollments_by_platewells.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ -""" -From a list of platewells retrieves the enrollments code of the connected individual -""" -import argparse -import csv -import sys - -from bl.vl.kb import KnowledgeBase as KB -import bl.vl.utils.ome_utils as vlu -from bl.vl.utils import LOG_LEVELS, get_logger - -def make_parser(): - parser = argparse.ArgumentParser(description='From platewells 2 enrollments code') - parser.add_argument('--logfile', type=str, help='log file (default=stderr)') - parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS, - help='logging level', default='INFO') - parser.add_argument('-H', '--host', type=str, help='omero hostname') - parser.add_argument('-U', '--user', type=str, help='omero user') - parser.add_argument('-P', '--passwd', type=str, help='omero password') - parser.add_argument('-S', '--study', type=str, required=True, - help='a study used to retrieve individuals') - parser.add_argument('--ifile', type=str, required=True, - help='list of platewells used to fetch data') - parser.add_argument('--ofile', type=str, help='output file path', - required=True) - return parser - - -def main(argv): - parser = make_parser() - args = parser.parse_args(argv) - - logger = get_logger('pws2enrolls', level=args.loglevel, - filename=args.logfile) - try: - host = args.host or vlu.ome_host() - user = args.user or vlu.ome_user() - passwd = args.passwd or vlu.ome_passwd() - except ValueError, ve: - logger.critical(ve) - sys.exit(ve) - - kb = KB(driver='omero')(host, user, passwd) - study = kb.get_study(args.study) - enrolled_map = {e.individual.id:e for e in kb.get_enrolled(study)} - logger.info('Loaded {} enrolled individuals for study {}'.format(len(enrolled_map), study.label)) - plates = kb.get_objects(kb.TiterPlate) - logger.info('Loaded {} plates'.format(len(plates))) - pws_map = {':'.join([w.container.barcode, w.label]):w for w in kb.get_objects(kb.PlateWell) - if w.container.barcode} - logger.info('Loaded {} platewells'.format(len(pws_map))) - #records = [] - of=open(args.ofile, 'w') - writer=csv.DictWriter(of, ['platewell', 'status', 'enrollment'], - delimiter='\t', quotechar='"', restval='None') - writer.writeheader() - with open(args.ifile, 'r') as f: - reader=csv.DictReader(f, delimiter='\t') - logger.info('Searching individuals connected to the platewells') - for r in reader: - ind=kb.dt.get_connected(pws_map[r['platewell']], aklass = kb.Individual, - direction = kb.dt.DIRECTION_INCOMING) - try: - record = {'platewell': r['platewell'], - 'status': pws_map[r['platewell']].status.enum_label(), - 'enrollment': ':'.join([study.label,enrolled_map[ind[0].id].studyCode])} - except KeyError as e: - logger.warning('not enrolled {}'.format(r['platewell'])) - record = {'platewell': r['platewell'], - 'status': pws_map[r['platewell']].status.enum_label(), - 'enrollment': ':'.join([study.label,'not_enrolled'])} - writer.writerow(record) - of.close() - - -if __name__ == "__main__": - main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/enrollments_by_platewells.xml --- a/galaxy-tools/biobank/tools/enrollments_by_platewells.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ - - - From a list of platewells retrieves the enrollments code of the connected individual - - - enrollments_by_platewells.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=$advanced_configuration.vl_host - --user=$advanced_configuration.vl_user - --passwd=$advanced_configuration.vl_passwd - #end if - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - --ifile=$input - --ofile=${output} - #if str($study_label) != 'no_study' - --study=${study_label} - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -This tool, from a list of platewells retrieves the enrollments code of the connected individual. - -Input file must be in TABULAR format and like:: - - platewell - A9033P3B:C09 - A9033P3B:G09 - A9033P3B:G10 - A9033P3B:H05 - A9033WRT:E08 - A9033WRT:E10 - A9033WRT:F03 - A9033WRT:F04 - ... - -Output file will be like:: - - platewell status enrollment - A9033P3B:C09 DISCARDED AUTOIMMUNITY:X3M6XP517 - A9033P3B:G09 DISCARDED AUTOIMMUNITY:RYMRK2NLJ - A9033P3B:G10 DISCARDED AUTOIMMUNITY:OV13V99M9 - A9033P3B:H05 DISCARDED AUTOIMMUNITY:OV13ZQK19 - A9033WRT:E08 DISCARDED AUTOIMMUNITY:7GMWNX9M8 - A9033WRT:E10 DISCARDED AUTOIMMUNITY:R3MKP0GL4 - A9033WRT:F03 DISCARDED AUTOIMMUNITY:N1VD2Q915 - A9033WZT:A04 CONTENTUSABLE AUTOIMMUNITY:210JRG4MW - ... - - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/flowcell_samplesheet.xml --- a/galaxy-tools/biobank/tools/flowcell_samplesheet.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,125 +0,0 @@ - - - Build the samplesheet for a given FlowCell - - - - kb_query.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=$advanced_configuration.vl_host - --user=$advanced_configuration.vl_user - --passwd=$advanced_configuration.vl_passwd - #end if - --operator=$__user_email__ - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - --ofile=${outfile} - flowcell_samplesheet - #if $namespace.ignore_namespace - --flowcell=${flowcell} - --ignore_namespace - #else - #if $namespace.namespace_value - -- flowcell=${namespace.namespace_value}|${flowcell} - #else - --flowcell="${flowcell}" - #end if - #end if - #if $remove_namespaces - --remove_namespaces - #end if - #if $add_sample_label - --sample_label - #end if - --separator=${csv_separator} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Export data related to a FlowCell identified by the Flowcell ID field -into a csv file like - - FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator - - foofc1,1,v012aa2,hg19,ATCACG,,WG,john doe - - foofc1,2,v0441a1,hg19,GATCAG,,EXOME,john doe - - foofc1,2,v021441,hg19,TAGCTT,,WG,john doe - - ... - -If the checkbox "Add sample labels" is enabled, the output file will -have a new column at the end of each row with the Label of the sample -like - - FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleLabel - - foofc1,1,v012aa2,hg19,ATCACG,,WG,john doe,foosmp1 - - foofc1,2,v0441a1,hg19,GATCAG,,EXOME,john doe,foosmp2 - - foofc1,2,v021441,hg19,TAGCTT,,WG,john doe,foosmp3 - - ... - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/get_kinship_input.xml --- a/galaxy-tools/biobank/tools/get_kinship_input.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ - - - Build input files for kinship MR application - - - kb_query.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=$advanced_configuration.vl_host - --user=$advanced_configuration.vl_user - --passwd=$advanced_configuration.vl_passwd - #end if - --operator=galaxy - #if $enable_debug - --loglevel=DEBUG - #else - --loglevel=$__app__.config.vl_loglevel - #end if - --logfile=${logfile} - --ofile=${output_gen} - extract_gt - #if str($study) != 'no_study' - --study=${study} - #end if - --out_samples_list=${output_sl} - --marker_set=${mset} - #if $transpose_output - --transpose_output - #end if - --compress_output - --compression_level=${compression_level} - #if $ignore_duplicated - --ignore_duplicated - #end if - #if str($data_collection) != 'no_collection' - --data_collection=${data_collection} - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/get_parents_from_sibling.py --- a/galaxy-tools/biobank/tools/get_parents_from_sibling.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ -''' -From a file like this -individual sibling -V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327 -V0FAE2B10F690041509739A3F4B314DC8F V00875417B31684EC2A62EE37717913445 -V0382EF862AA4B475697C95D3777043239 V08E376727ED8E4B369DAA3B62A9395E1B -.... - -retrieve indivual's parents using sibling informations and build a file like - -individual father mother -V08E18411BC66F4987BCA43EFC6F636224 V027DE334753424F07B81A70053EF5B873 V035222CAEE0474AFEBB9A161D4B64914E -V0FAE2B10F690041509739A3F4B314DC8F V0E966B53BDCC942C09D6B6D96DE98F4F4 V0F7B6926C6FBE4F0BB38BBC6CFB13A825 -.... - -''' - -import sys, csv, argparse, logging, os - -from bl.vl.kb import KnowledgeBase as KB -import bl.vl.utils.ome_utils as vlu -from bl.vl.utils import get_logger, LOG_LEVELS - - -def make_parser(): - parser = argparse.ArgumentParser(description='retrieve parents information using sibling') - parser.add_argument('--logfile', type=str, help='log file (default=stderr)') - parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, - help='logging level (default=INFO)', default='INFO') - parser.add_argument('-H', '--host', type=str, help='omero hostname') - parser.add_argument('-U', '--user', type=str, help='omero user') - parser.add_argument('-P', '--passwd', type=str, help='omero passwd') - parser.add_argument('--in_file', type=str, required=True, - help='input file with individual-sibling couples') - parser.add_argument('--out_file', type=str, required=True, - help='output file with parents information') - return parser - - -def main(argv): - parser = make_parser() - args = parser.parse_args(argv) - - logger = get_logger('get_parents_from_sibling', level=args.loglevel, - filename=args.logfile) - - try: - host = args.host or vlu.ome_host() - user = args.user or vlu.ome_user() - passwd = args.passwd or vlu.ome_passwd() - except ValueError, ve: - logger.critical(ve) - sys.exit(ve) - - kb = KB(driver='omero')(host, user, passwd) - - logger.info('Retrieving individuals') - inds = kb.get_objects(kb.Individual) - logger.info('Retrieved %d individuals' % len(inds)) - inds_lookup = {} - for ind in inds: - inds_lookup[ind.id] = ind - - with open(args.in_file) as infile: - reader = csv.DictReader(infile, delimiter='\t') - records = [] - for row in reader: - try: - sib = inds_lookup[row['sibling']] - rec = {'individual' : row['individual'], - 'father' : sib.father.id if sib.father else 'None', - 'mother' : sib.mother.id if sib.mother else 'None'} - logger.info('Individual %s, father: %s - mother: %s' % (row['individual'], - rec['father'], - rec['mother'])) - records.append(rec) - except KeyError: - logger.error('Unable to find individual %s' % row['sibling']) - - logger.info('Retrieved parents for %d individuals' % len(records)) - - with open(args.out_file, 'w') as outfile: - writer = csv.DictWriter(outfile, ['individual', 'father', 'mother'], - delimiter='\t') - writer.writeheader() - writer.writerows(records) - -if __name__ == '__main__': - main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/get_parents_from_sibling.xml --- a/galaxy-tools/biobank/tools/get_parents_from_sibling.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,66 +0,0 @@ - - - Retrieve individual's parents using sibling informations - - - get_parents_from_sibling.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - --in_file=${input1} - --out_file=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - - - - - - - - - - - - - - - - - - - - - - - -.. class:: infomark - -**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* - ------ - -It will retrieve indivual's parents using sibling informations from a file like this:: - - individual sibling - V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327 - V0FAE2B10F690041509739A3F4B314DC8F V00875417B31684EC2A62EE37717913445 - V0382EF862AA4B475697C95D3777043239 V08E376727ED8E4B369DAA3B62A9395E1B - -and build a tsv file like this:: - - individual father mother - V08E18411BC66F4987BCA43EFC6F636224 None None - V0FAE2B10F690041509739A3F4B314DC8F V07282522B89FC4F7CA08094537A13C0D1 V09D459311D1254095AE9F00B45E5A101E - V0382EF862AA4B475697C95D3777043239 V04CD9561F753F4853838E2E96819AAAC0 V0382EF862AA4B475697C95D3777043239 - - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/get_studies_details.py --- a/galaxy-tools/biobank/tools/get_studies_details.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -''' -''' - -import argparse, csv, sys - -from bl.vl.kb import KnowledgeBase as KB -import bl.vl.utils.ome_utils as vlu -from bl.vl.utils import get_logger, LOG_LEVELS - - -def make_parser(): - parser = argparse.ArgumentParser(description='retrieve studies details') - parser.add_argument('--logfile', type=str, help='log file (default=stderr)') - parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, - help='logging level (default=INFO)', default='INFO') - parser.add_argument('-H', '--host', type=str, help='omero hostname') - parser.add_argument('-U', '--user', type=str, help='omero user') - parser.add_argument('-P', '--passwd', type=str, help='omero passwd') - parser.add_argument('--out_file', type=str, required=True, - help='output file with studies details') - return parser - - -def main(argv): - parser = make_parser() - args = parser.parse_args(argv) - - logger = get_logger('get_studies', level=args.loglevel, - filename=args.logfile) - - try: - host = args.host or vlu.ome_host() - user = args.user or vlu.ome_user() - passwd = args.passwd or vlu.ome_passwd() - except ValueError, ve: - logger.critical(ve) - sys.exit(ve) - - kb = KB(driver='omero')(host, user, passwd) - - logger.info('Retrieving studies') - studies = kb.get_objects(kb.Study) - logger.info('Retrieved {} studies'.format(len(studies))) - records = [] - for s in studies: - enr = kb.get_enrolled(s) - rec = {'label': s.label, - 'description': s.description, - 'enrolled': len(enr)} - records.append(rec) - - with open(args.out_file, 'w') as outfile: - writer = csv.DictWriter(outfile, ['label', 'description', 'enrolled'], - delimiter='\t') - # writer.writeheader() - writer.writerows(sorted(records, key=lambda key: key['label'])) - -if __name__ == '__main__': - main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/get_studies_details.xml --- a/galaxy-tools/biobank/tools/get_studies_details.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ - - - Retrieve studies details - - - get_studies_details.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - --out_file=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - - - - - - - - - - - - - - - - - - - - - - -It will produce a list of the studies known by the biobank server - - - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/get_titerplates_details.py --- a/galaxy-tools/biobank/tools/get_titerplates_details.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -''' -''' - -import argparse, csv, sys - -from bl.vl.kb import KnowledgeBase as KB -import bl.vl.utils.ome_utils as vlu -from bl.vl.utils import get_logger, LOG_LEVELS - - -def make_parser(): - parser = argparse.ArgumentParser(description='retrieve titerplates details') - parser.add_argument('--logfile', type=str, help='log file (default=stderr)') - parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, - help='logging level (default=INFO)', default='INFO') - parser.add_argument('-H', '--host', type=str, help='omero hostname') - parser.add_argument('-U', '--user', type=str, help='omero user') - parser.add_argument('-P', '--passwd', type=str, help='omero passwd') - parser.add_argument('--out_file', type=str, required=True, - help='output file with titerplates details') - return parser - - -def main(argv): - parser = make_parser() - args = parser.parse_args(argv) - - logger = get_logger('get_titerplates', level=args.loglevel, - filename=args.logfile) - - try: - host = args.host or vlu.ome_host() - user = args.user or vlu.ome_user() - passwd = args.passwd or vlu.ome_passwd() - except ValueError, ve: - logger.critical(ve) - sys.exit(ve) - - kb = KB(driver='omero')(host, user, passwd) - - logger.info('Retrieving titerplates') - titps = kb.get_objects(kb.TiterPlate) - logger.info('Retrieved {} titerplates'.format(len(titps))) - records = [] - for tp in titps: - if type(tp) == kb.TiterPlate: - rec = {'label': tp.label, - 'barcode': tp.barcode, - 'status': tp.status.enum_label()} - records.append(rec) - - with open(args.out_file, 'w') as outfile: - writer = csv.DictWriter(outfile, ['label', 'barcode', 'status'], - delimiter='\t') - writer.writeheader() - writer.writerows(sorted(records, key=lambda key: key['label'])) - -if __name__ == '__main__': - main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/get_titerplates_details.xml --- a/galaxy-tools/biobank/tools/get_titerplates_details.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ - - - Retrieve titerplates details - - - get_titerplates_details.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - --out_file=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - - - - - - - - - - - - - - - - - - - - - - -It will produce a list of the titerplates known by the biobank server - - - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/global_stats.xml --- a/galaxy-tools/biobank/tools/global_stats.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ - - - Provide global statistics for a given study. - - - kb_query.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=$advanced_configuration.vl_host - --user=$advanced_configuration.vl_user - --passwd=$advanced_configuration.vl_passwd - #end if - --operator=galaxy - --ofile=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - global_stats - #if str($study) != 'all_known_studies' - --study=${study} - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - It will output a tsv files with the following columns:: - - study diagnosis technology gender counts - BSTUDY icd10-cm:G35 AffymetrixCelGENOMEWIDESNP_6 MALE 1 - BSTUDY icd10-cm:E10 AffymetrixCelGENOMEWIDESNP_6 FEMALE 1 - BSTUDY local:at0.3 AffymetrixCelGENOMEWIDESNP_6 MALE 2 - BSTUDY icd10-cm:G35;icd10-cm:E10;icd10-cm:E10 AffymetrixCelGENOMEWIDESNP_6 MALE 1 - BSTUDY icd10-cm:G35 AffymetrixCelGENOMEWIDESNP_6 FEMALE 1 - - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/gstudio_datasheet.xml --- a/galaxy-tools/biobank/tools/gstudio_datasheet.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ - - - Build a Genome Studio datasheet for the given plate - - - kb_query.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=$advanced_configuration.vl_host - --user=$advanced_configuration.vl_user - --passwd=$advanced_configuration.vl_passwd - #end if - --operator=$__user_email__ - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - --ofile=${outfile} - gstudio_datasheet - #if str($plate) != 'no_plate' - --plate=${plate} - --manifest=${manifest} - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Build a Genome Studio datasheet for the given plate - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/kb_query.py --- a/galaxy-tools/biobank/tools/kb_query.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -# BEGIN_COPYRIGHT -# END_COPYRIGHT - -import sys -from bl.vl.app.kb_query.main import main as kb_query - -kb_query(sys.argv[1:]) - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/lookup_index.py --- a/galaxy-tools/biobank/tools/lookup_index.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -# BEGIN_COPYRIGHT -# END_COPYRIGHT - -import sys -from bl.vl.app.snp_manager.main import main - -main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/lookup_index.xml --- a/galaxy-tools/biobank/tools/lookup_index.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ - - lookup dbSNP index - - lookup_index.py --logfile ${log_file} lookup_index -i ${input_file} - -o ${output_file} - --index-file "${ filter( lambda x: str( x[0] ) == str( $indices ), $__app__.tool_data_tables[ 'dbsnp_indexes' ].get_fields() )[0][-1] }" - - - - - - - - - - - - - - - -**What it does** - -This tool reads a Galaxy genome segment extractor output in interval -format and performs a lookup in the selected dbSNP index to get the -true rs label. It outputs a new marker definitions file with the true -rs labels and masks. - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/map_to_collection.xml --- a/galaxy-tools/biobank/tools/map_to_collection.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ - - - Map a list of objects (vessels or data samples) to the specified - collection. - - - kb_query.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=$advanced_configuration.vl_host - --user=$advanced_configuration.vl_user - --passwd=$advanced_configuration.vl_passwd - #end if - --operator=galaxt - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - --ofile=${outfile} - map_to_collection - --ifile=${infile} - #if str($field_label) != '' - --field_label=${field_label} - #end if - #if str($collection_selector.collection_type) != 'no_coll_selected' - --collection_type=${collection_selector.collection_type} - #if str($collection_selector.collection_type) == 'DataCollection' - #if str($collection_selector.dcoll_label) != 'no_label_selected' - --collection_label=${collection_selector.dcoll_label} - #end if - #elif str($collection_selector.collection_type) == 'VesselsCollection' - #if str($collection_selector.vcoll_label) != 'no_label_selected' - --collection_label=${collection_selector.vcoll_label} - #end if - #end if - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/map_vid.py --- a/galaxy-tools/biobank/tools/map_vid.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -# BEGIN_COPYRIGHT -# END_COPYRIGHT - -import sys -from bl.vl.app.kb_query.main import main as kb_query - -def main(argv): - selected_column, new_column_name, input_file = argv[:3] - selected_column = int(selected_column) - 1 - new_column_name = new_column_name.strip() - - # with open(input_file) as f: - # l = f.readline().strip() - # Backport to 2.6 - fi = open(input_file) - l = fi.readline().strip() - fi.close() - - column_names = l.split('\t') - column_name = column_names[selected_column] - - argv = argv[3:] + ['--column=%s,%s' % (column_name, new_column_name)] - kb_query(argv) - -main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/map_vid.xml --- a/galaxy-tools/biobank/tools/map_vid.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,148 +0,0 @@ - - - Map labels of objects known to Omero/VL to their VID - - - map_vid.py - ${selected_column} - ${new_column_name} - ${input1} - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - --operator=galaxy - --ofile=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - map_vid - --ifile=${input1} - --source-type=${source_type.source_type} - #if $source_type.source_type == 'Individual' - #if str($source_type.study) != 'use_provided' - --study=${source_type.study} - #end if - #end if - #if $strict_mapping - --strict-mapping - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -The tool resolves VIDs for the given column and rename the column -iteself with a new label. Usually to map the items' VIDs the simple -item label is necessary but in some cases a special syntax is needed: - -* for Individual items, if no default study is provided, the pattern - to be used is **STUDY:STUDY_LABEL**. If a default study is provided, - the column must contain only the STUDY_LABEL - -* for PlateWell items the pattern is **PLATE_LABEL:WELL_LABEL** - -* for DataCollectionItem items the pattern is - **DATA_COLLECTION_LABEL:ITEM_LABEL** - - - - - - - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/markers_to_fastq.py --- a/galaxy-tools/biobank/tools/markers_to_fastq.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -# BEGIN_COPYRIGHT -# END_COPYRIGHT - -import sys -from bl.vl.app.snp_manager.main import main - -main(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/markers_to_fastq.xml --- a/galaxy-tools/biobank/tools/markers_to_fastq.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ - - converter - - markers_to_fastq.py --logfile ${log_file} markers_to_fastq - -i ${input_file} -o ${output_file} - - - - - - - - - -**What it does** - -This tool converts VL marker definitions to fastq data. - -VL marker definitions files have the following format (spaces are tabs):: - - label rs_label mask - SNP_A-1780419 rs6576700 GGATACATTTTATTGC[A/G]CTTGCAGAGTATTTTT - SNP_A-1780418 rs17054099 GGATACATTACCCAAA[C/T]GGTCACAGGTCAAAGG - SNP_A-1780415 rs7730126 GGATACATCCCCCCCA[A/G]AAAATGAGAATAAAGC - ... - -Where "label" is a unique identifier, "rs_label" is the dbSNP label -and "mask" is the SNP's mask in the -LEFT_FLANK[ALLELE_A/ALLELE_B/...]RIGHT_FLANK format. One fastq record -is generated for each allele in the mask. The string "None" in the -rs_label column means there is no rs label for the marker. - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/plate_dsamples_details.xml --- a/galaxy-tools/biobank/tools/plate_dsamples_details.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,136 +0,0 @@ - - - Retrieve wells and connected data samples related to a known plate - - - kb_query.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=$advanced_configuration.vl_host - --user=$advanced_configuration.vl_user - --passwd=$advanced_configuration.vl_passwd - #end if - --operator=galaxy - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - --ofile=${output} - plate_data_samples - #if str($plate) != 'select_one' - --plate=${plate} - #end if - #if $fetch_all - --fetch_all - #end if - #if str($vcoll_label) != 'no_collection' - --vessels_collection=${vcoll_label} - #end if - #if $vessel_types - --ignore_types=${vessel_types} - #end if - #if str($study_label) != 'no_study' - --map_study=${study_label} - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Using of the the selectable plates barcode, the tool will generate a -report file for the plate like:: - - PLATE_barcode PLATE_label WELL_label WELL_status DATA_SAMPLE_label - XXYYZZKK test_plate A01 CONTENTUSABLE a01_test_sample - XXYYZZKK test_plate A02 CONTENTUSABLE X - XXYYZZKK test_plate A03 UNKNOWN OR EMPTY X - XXYYZZKK test_plate A04 CONTENTUSABLE a04_test_sample - XXYYZZKK test_plate A05 DISCARDED X - ... - -For each plate, all wells will be generated in the output file, even -the ones not actually recorded into the system, these wells will be -marked wit a 'UNKOWN OR EMPTY' status. - -For each well, the tool performs a query in order to find if at least -one data sample is directly connected to the well itself; if at least -one is found, the label of the data sample will be placed in the -DATA_SAMPLE_label column, if no data sample is connected to the well a -'X' will be placed. - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/query.xml --- a/galaxy-tools/biobank/tools/query.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,95 +0,0 @@ - - - Provides a simplified environment to perform complex queries to - BIOBANK. - - - kb_query.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - --operator=galaxy - --ofile=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - query - --group=$study - --code-file=$code_file - - - - - - - - - - - - - - - - - - - - - - - - - - - ${str($query_code).replace('__sq__', "'").replace('__cr____cn__', '\n')} - - - - - - - - -The following is an example of a query that will dump family relations -within the group:: - - writeheader('study', 'id', 'gender', 'father', 'mother') - for i in Individuals(group): - writerow(group.id, enum_label(i.gender), - i.father.id if i.father else 'None', - i.mother.id if i.mother else 'None') - - -The next example will prepare a file that could be used to define a -data collection and then as the input for a genotyping run:: - - writeheader('dc_id', 'gender', 'data_sample', - 'path', 'mimetype', 'size', 'sha1') - for i in Individuals(group): - for d in DataSamples(i, 'AffymetrixCel'): - for o in DataObjects(d): - writerow(group.id, enum_label(i.gender), d.id, - o.path, o.mimetype, o.size, o.sha1) - -In the examples above, '''group''' (actually a study) corresponds to -the group whose label is assigned by the '''--group''' flag. - -**Note** This is clearly an extremely dangerous tool. - - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/select_sub_group.xml --- a/galaxy-tools/biobank/tools/select_sub_group.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,149 +0,0 @@ - - - Selects groups of individuals. - - - kb_query.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=${advanced_configuration.vl_host} - --user=${advanced_configuration.vl_user} - --passwd=${advanced_configuration.vl_passwd} - #end if - --operator=galaxy - --ofile=${output1} - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - selector - #if str($study) != 'use_all' - --study=$study - #end if - --group-label=$group_label - --total-number=$total_number - --male-fraction=$male_fraction - --reference-disease=$reference_diagnosis - --control-fraction=$control_fraction - #if str($required_datasample) != 'unselect' - --required-datasample=$required_datasample - #end if - #if int($seed) != 0 - --seed=$seed - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -It will select a group of individuals from a specific group (from all -avalable individuals, if no group is selected). The selection is -controlled by the following parameters: - - * total number of individuals selected - * male fraction - * reference disease - * control fraction - * presence of specific datasets - -The results will be presented as a file that can be used to generate a -new group (actually a study). The file will have the following columns:: - - study label individual - XXX 0001 V20940239409 - XXX 0002 V20940239509 - XXX 0003 V20940239609 - XXX 0004 V20940239709 - ... - - where study is the name of the new study - - - \ No newline at end of file diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/snp_manager.py --- a/galaxy-tools/biobank/tools/snp_manager.py Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -# BEGIN_COPYRIGHT -# END_COPYRIGHT - -import sys -from bl.vl.app.snp_manager.main import main as snp_manager - -snp_manager(sys.argv[1:]) diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/snp_manager.xml --- a/galaxy-tools/biobank/tools/snp_manager.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ - - get true rs label and mask from dbSNP - - snp_manager.py - - - - - - - - - - - - - - - - FIXME - - diff -r 8d405ac8a837 -r 8924f77c59ee galaxy-tools/biobank/tools/vessels_by_individual.xml --- a/galaxy-tools/biobank/tools/vessels_by_individual.xml Thu Sep 22 09:30:00 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,74 +0,0 @@ - - - Retrieve all vessels related to individuals passed with the input - file. Vessel type and a Vessel Collection can be used as filters. - - - kb_query.py - #if $advanced_configuration.configuration_level == 'advanced' - --host=$advanced_configuration.vl_host - --user=$advanced_configuration.vl_user - --passwd=$advanced_configuration.vl_passwd - #end if - --operator=galaxy - --loglevel=$__app__.config.vl_loglevel - --logfile=${logfile} - --ofile=${outfile} - vessels_by_individual - --ifile=${infile} - #if str($collection_label) != 'no_collection' - --vessels_collection=${collection_label} - #end if - #if str($vessel_type) != 'no_type' - --vessel_type=${vessel_type} - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file