# HG changeset patch
# User ric
# Date 1474549024 14400
# Node ID 43be74e62bfeefc12ab28d228ee0b4bbfea3c695
# Parent 47bf0086e08287dbfd6dfabde6fd38a7df89ec6c
Uploaded
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/bin/clean_stuff.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/bin/clean_stuff.sh Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+GalaxyDir=/SHARE/USERFS/els7/users/sequencing/galaxy-dist
+n_days=15
+
+echo "Purging deleted galaxy entities older than ${n_days} days"
+
+cd "${GalaxyDir}"
+
+for i in 2 3 4 5 ; do
+ python scripts/cleanup_datasets/cleanup_datasets.py --days=${n_days} -${i} --remove_from_disk universe_wsgi.ini
+done
+
+
+echo "Done"
+exit 0
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/exporter/export_titer_plates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/exporter/export_titer_plates.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,67 @@
+import logging, csv, argparse, sys, os
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.kb.drivers.omero.utils as vlu
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+CSV_FIELDS = ['label', 'barcode', 'rows', 'columns', 'plate_status']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='dump all TiterPlate objects to a TSV file')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--ofile', type=str, help='output file',
+ required=True)
+ return parser
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format' : LOG_FORMAT,
+ 'datefmt' : LOG_DATEFMT,
+ 'level' : log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger()
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ logging.info('Loading TiterPlate objects')
+ plates = kb.get_objects(kb.TiterPlate)
+ logging.info('Loaded %d objects' % len(plates))
+
+ with open(args.ofile, 'w') as ofile:
+ writer = csv.DictWriter(ofile, CSV_FIELDS, delimiter='\t')
+ writer.writeheader()
+ for pl in plates:
+ logger.debug('Dumping plate %d/%d' % (plates.index(pl) + 1,
+ len(plates)))
+ writer.writerow({'label' : pl.label,
+ 'barcode' : pl.barcode,
+ 'rows' : pl.rows,
+ 'columns' : pl.columns,
+ 'plate_status' : pl.status.enum_label()})
+ logger.info('Job done')
+
+
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/biosample.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/biosample.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,247 @@
+
+ import BioSample definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ biosample
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source-type ${source_type}
+ #end if
+ #if str($vessel_type_selector.vessel_type) != 'use_provided'
+ --vessel-type ${vessel_type_selector.vessel_type}
+ #end if
+ #if str($vessel_content) != 'use_provided'
+ --vessel-content=${vessel_content}
+ #end if
+ #if str($vessel_status) != 'use_provided'
+ --vessel-status=${vessel_status}
+ #end if
+ #if str($vessel_type_selector) == 'IlluminaBeadChipArray'
+ #if str($vessel_type_selector.assay_type) != 'use_provided'
+ --bead-chip-assay-type=${vessel_type_selector.assay_type}
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+A biosample record will have, at least, the following fields::
+
+ label source
+ I001-bs-2 V932814892
+ I002-bs-2 V932814892
+ I003-bs-2 None
+
+Where label is the label of the biosample container. If a 'None' value
+has been passed in the source column, the biosample will be imported
+as a new unlinked object into the biobanks. Another example, this time
+involving DNA samples::
+
+ label source used_volume current_volume activation_date
+ I001-dna V932814899 0.3 0.2 17/03/2007
+ I002-dna V932814900 0.22 0.2 21/01/2004
+
+A special case is when records refer to biosamples contained in plate
+wells. In this case, an additional column must be present with the VID
+of the corresponding TiterPlate object. For instance::
+
+ plate label source
+ V39030 A01 V932814892
+ V39031 A02 V932814893
+ V39032 A03 V932814894
+
+where the label column is now the label of the well position.
+
+If row and column (optional) are provided, the program will use them;
+if they are not provided, it will infer them from label (e.g., J01 ->
+row=10, column=1). Missing labels will be generated as::
+
+ '%s%03d' % (chr(row+ord('A')-1), column)
+
+A badly formed label will result in the rejection of the record; the
+same will happen if label, row and column are inconsistent. The well
+will be filled by current_volume material produced by removing
+used_volume material taken from the bio material contained in the
+vessel identified by source. row and column are base 1.
+
+If the sample is a IlluminaBeadChipArray the plate column used in the
+PlateWell case will become a illumina_array column and a new column, named
+bead_chip_assay_type, is required::
+
+ illumina_array label source bead_chip_assay_type
+ V1351235 R01C01 V412441 HUMANEXOME_12V1_B
+ V1351235 R01C02 V351151 HUMANEXOME_12V1_B
+ V1351235 R02C01 V345115 HUMANEXOME_12V1_B
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/birth_data.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/birth_data.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,82 @@
+
+ import diagnosis data within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ birth_data
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study individual timestamp birth_date birth_place
+ ASTUDY V1234 1310057541608 12/03/1978 006171
+ ASTUDY V14112 1310057541608 25/04/1983 006149
+ ASTUDY V1241 1310057541608 12/03/2001 006172
+ .....
+
+where birth_place is a valid ISTAT code for an Italian city or a
+foreign Country and birth_date must have the dd/mm/YYYY format.
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/data_collection.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/data_collection.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,125 @@
+
+ import DataCollection definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ data_collection
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($data_sample_type) != 'use_provided'
+ --data_sample-type=${data_sample_type}
+ #end if
+ #if str($label)
+ --label=${label}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study label data_sample
+ BSTUDY dc-01 V0390290
+ BSTUDY dc-01 V0390291
+ BSTUDY dc-02 V0390292
+ BSTUDY dc-02 V390293
+ ...
+
+This will create new DataCollection(s), whose label is defined by the
+label column, and link to it, using DataCollectionItem objects,
+the DataSample object(s) identified by data_sample (a VID).
+
+Records that point to an unknown DataSample will abort the data
+collection loading. Previously seen collections will be noisily
+ignored. It is not legal to use the importer to add items to a
+previously known collection.
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/data_object.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/data_object.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,111 @@
+
+ import DataObject definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ data_object
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($mimetype) != 'use_provided'
+ --mimetype=${mimetype}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study path data_sample mimetype size sha1
+
+ TEST01 file:/share/fs/v039303.cel V2902 x-vl/affymetrix-cel 39090 E909090
+ ....
+
+Records that point to an unknown data sample will be noisily
+ignored. The same will happen to records that have the same path of a
+previously seen data_object
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/data_sample.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/data_sample.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,166 @@
+
+ import DataSample definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ data_sample
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source-type=${source_type}
+ #end if
+ #if str($device_type) != 'use_provided'
+ --device-type=${device_type}
+ #end if
+ #if str($scanner) != 'use_provided'
+ --scanner=${scanner}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study label source device device_type scanner options
+ ASTUDY foo01 v03909 v9309 Chip v99020 celID=0009099090
+ ASTUDY foo02 v03909 v99022 Scanner v99022 conf1=...,conf2=...
+ ....
+
+In this example, the first line corresponds to a dataset obtained by
+using chip v9309 on scanner v99020, while the second datasample has
+been obtained using a technology directly using a scanner, e.g., an
+Illumina HiSeq 2000. The '''scanner''' column is there as a
+convenience to support a more detailed description of a chip-based
+acquisition.
+
+The general strategy is to decide what data objects should be
+instantiated by looking at the chip column and at its corresponding
+maker,model,release.
+
+The optional column '''scanner''', the vid of the scanner device, is
+used in cases, such as Affymetrix genotyping, where it is relevant.
+
+It is also possible to import DataSample(s) that are the results of
+processing other DataSample(s). Here is an example::
+
+ study label source device device_type options
+ ASTUDY foo01 v03909 v99021 SoftwareProgram conf1=...,conf2=...
+ ASTUDY foo02 v03909 v99021 SoftwareProgram conf1=...,conf2=...
+ ....
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/device.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/device.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,129 @@
+
+ import Device definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ device
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($device_type) != 'use_provided'
+ --device-type=${device_type}
+ #end if
+ #if str($maker)
+ --maker=${maker}
+ #end if
+ #if str($model)
+ --model=${model}
+ #end if
+ #if str($release)
+ --relese=${release}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study device_type label barcode maker model release location
+ BSTUDY Scanner pula01 8989898 Affymetrix GeneChip Scanner 3000 7G Pula bld. 5
+ BSTUDY Chip chip001 8329482 Affymetrix Genome-Wide Human SNP Array 6.0 None
+
+All devices have a type, a label, an optional barcode, a maker, a
+model, a release and an optional physical location. In the example
+above, in the first line we have defined a scanner, which is
+physically located in the building 5 lab in Pula. The second line
+defines a chip.
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/diagnosis.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/diagnosis.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,79 @@
+
+ import diagnosis data within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ diagnosis
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study individual timestamp diagnosis
+ ASTUDY V899 1310057541608 icd10-cm:E10
+ ASTUDY V899 1310057541608 icd10-cm:G35
+ ASTYDY V1806 1310057541608 exclusion-problem_diagnosis
+ ...
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/enrollment.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/enrollment.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,108 @@
+
+ Create new enrollmnents for existing individuals within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__) == 1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if str($advanced_configuration.configuration_level) == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ enrollment
+ #if str($study_label) != 'use_provided'
+ --study=$study_label
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Import of new enrollments related to existing individuals.
+An enrollment is characterized by the following fields::
+
+ source study label
+ V044DE795E7F9F42FEB9855288CF577A77 xxx id1
+ V06C59B915C0FD47DABE6AE02C731780AF xxx id2
+ V01654DCFC5BB640C0BB7EE088194E629D xxx id3
+
+where source must be the VID of an existing Individual object, study a
+label of an existing Study object and label the enrollment code for
+the patient in the study.
+
+The enrollment sub-operation will retrieve the source individual from
+the DB, create a new enrollment related to it and output the VIDs of
+newly created enrollments. It is not possible to create two
+enrollments with the same code related to the same study, nor is it
+possible to enroll a patient twice in the same study, even with
+different codes.
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/group.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/group.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,75 @@
+
+ Create a new group within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if str($advanced_configuration.configuration_level) == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input1
+ --ofile=$output1
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ group
+ #if str($group_label) != ''
+ --group=$group_label
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will create a new group of individuals from a file with the following columns::
+
+ study label individual
+ foo I0000 V06C59B915C0FD47DABE6AE02C731780AF
+ foo I0001 V0B718B77691B145BFA8901FCCF6B37998
+ ...
+
+where the column study is optional (it can be provided via the
+group_label param). Labels should be unique within the file and the
+individual field should contain VIDs of existing (within omero/vl)
+Individual objects.
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/illumina_bead_chip_measures.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/illumina_bead_chip_measures.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,127 @@
+
+ import IlluminaBeadChipMeasures definitions within OMERO
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ illumina_bead_chip_measures
+ #if str($study) != 'use_provided'
+ --study=${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source_type=${source_type}
+ #end if
+ #if str($action_category) != 'use_provided'
+ --action_category=${action_category}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read a tsv file with the following columns::
+
+ study label red_channel green_channel source source_type
+ ASTUDY CHIP_01_R01C01 V1415151235513 V135135661356161 V351351351551 IlluminaBeadChipArray
+ ASTUDY CHIP_01_R01C02 V2346262462462 V112395151351623 V135113513223 IlluminaBeadChipArray
+ ASTUDY CHIP_01_R02C01 V1351362899135 V913977551235981 V100941215192 IlluminaBeadChipArray
+
+This will create new IlluminaBeadChipMeasures whose labels are defined in the
+label column.
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/importer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/importer.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.importer.main import main
+
+main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/individual.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/individual.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,99 @@
+
+ import individual definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=$logfile
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ individual
+ #if str($study) != 'use_provided'
+ --study $study
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will import a stream of new individual definitions defined by the
+following columns::
+
+ label gender father mother
+ id2 male id4 id5
+ id3 female None None
+ ....
+
+It is not possible to import the same individual twice: the related
+file rows will be noisily ignored.
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/laneslot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/laneslot.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,127 @@
+
+ import LaneSlot definitions within omero.biobank
+
+ importer.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=${in_file}
+ --ofile=${out_file}
+ --report_file=${report_file}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${log_file}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ laneslot
+ #if str($study) != 'use_provided'
+ --study=${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source_type=${source_type}
+ #end if
+ #if str($content) != 'use_provided'
+ --content=${content}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+A lane slot record will have the following fields::
+
+ lane tag content source
+ V123411 ATCACG DNA V4512415
+ V123411 CGATGT DNA V1415512
+ V412511 DNA V1909012
+ V661251 TGACCA DNA V1123111
+ V661251 CTTGTA DNA V1211141
+ ....
+
+the content column can be option if passed as script's input value,
+tag column is optional too.
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/marker_alignment.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/marker_alignment.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,140 @@
+
+ import marker aligments within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ marker_alignment
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($ref_genome)
+ --ref-genome ${reg_genome}
+ #end if
+ #if str($message)
+ --message ${message}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ marker_vid ref_genome chromosome pos strand allele copies
+ V0909090 hg18 10 82938938 True A 1
+ V0909091 hg18 1 82938999 True A 2
+ V0909092 hg18 1 82938938 True B 2
+ ...
+
+Since pos is relative to 5', if the marker has been aligned on the
+other strand, it is the responsibility of the aligner app to report
+the actual distance from 5', while, at the same time, registering that
+the SNP has actually been aligned on the other strand.
+
+The chromosome field is an integer field with values in the [1, 26]
+range, with 23-26 representing, respectively, the X chromosome, the Y
+chromosome, the pseudoautosomal regions (XY) and the mitochondrial DNA
+(MT).
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/marker_definition.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/marker_definition.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,119 @@
+
+ import Marker definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input1
+ --ofile=$output1
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ marker_definition
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ --source ${source}
+ --context ${context}
+ --release ${release}
+ --ref-genome ${ref_genome}
+ --dbsnp-build ${dbsnp_build}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ label rs_label mask strand allele_a allele_b
+ SNP_A-1780419 rs6576700 [A/G] TOP A G
+ ...
+
+Where label is supposed to be the unique label for this marker in the
+(source, context, release) context, rs_label is the dbSNP db label for
+this snp (it could be the string ``None`` if not defined or not
+known). The column mask contains the SNP definition. The strand column
+could either be the actual 'illumina style' strand used to define the
+alleles in the alleles columns, or the string 'None', which means that
+the alleles in the allele column are defined wrt the mask in the
+mask column.
+
+It will, for each row, convert the mask to the TOP strand following
+Illumina conventions and then save a record for it in VL. The saved
+tuple is (source, context, release, label, rs_label, TOP_mask). There
+are no collision controls.
+
+It will output a a tsv file with the following columns::
+
+ study label type vid
+ ASTUDY SNP_A-xxx Marker V000002222
+ ...
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/markers_set.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/markers_set.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,102 @@
+
+ import Marker definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ markers_set
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($label)
+ --label ${label}
+ #end if
+ #if str($maker) != 'use_provided'
+ --maker ${maker}
+ #end if
+ #if str($model) != 'use_provided'
+ --model ${model}
+ #end if
+ #if str($release)
+ --release ${release}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ marker_vid marker_indx allele_flip
+ V902909090 0 False
+ V902909091 1 False
+ V902909092 2 True
+ ...
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/samples_container.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/samples_container.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,287 @@
+
+ import samples container definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ samples_container
+ #if str($study) != 'use_provided'
+ --study=${study}
+ #end if
+ #if str($container_type_selector.container_type) != 'use_provided'
+ --container-type=${container_type_selector.container_type}
+ #if str($container_type_selector.container_type) == 'TiterPlate'
+ #if str($container_type_selector.plate_shape) != 'use_provided'
+ --plate-shape=${container_type_selector.plate_shape}
+ #end if
+ #elif str($container_type_selector.container_type) == 'FlowCell'
+ #if str($container_type_selector.flow_cell_slots) != 'use_provided'
+ --number-of-slots=${container_type_selector.flow_cell_slots}
+ #end if
+ #elif str($container_type_selector.container_type) == 'IlluminaArrayOfArrays'
+ #if str($container_type_selector.ill_shape) != 'use_provided'
+ --plate-shape=${container_type_selector.ill_shape}
+ #end if
+ #if str($container_type_selector.ill_slots) != 'use_provided'
+ --number_of_slots=${container_type_selector.ill_slots}
+ #end if
+ #if str($container_type_selector.array_type) != 'use_provided'
+ --illumina-array-type=${container_type_selector.array_type}
+ #end if
+ #if str($container_type_selector.array_class) != 'use_provided'
+ --illumina-array-class=${container_type_selector.array_class}
+ #end if
+ #if str($container_type_selector.assay_type) != 'use_provided'
+ --illumina-assay-type=${container_type_selector.assay_type}
+ #end if
+ #end if
+ #end if
+ #if str($container_status) != 'use_provided'
+ --container-status=${container_status}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+A container record will have the following fields::
+
+ label container_status creation_date
+ A_CONTAINER USABLE 13/02/2012
+ B_CONTAINER INSTOCK 12/01/2001
+ C_CONTAINER USABLE 25/04/2012
+ ....
+
+the creation_date column is optional, if not specified current date
+will be set as the object's creation date, also the container_status
+column can be optional if this values is passed as input parameter.
+
+
+When importing new containers, special fields can be included in the
+CSV file depending on the type of the objects that you want to
+import.
+
+For TITER PLATES objects the syntax can be the following::
+
+ label barcode container_status rows columns
+ A_TITERPLATE XXYYZZ111 INSTOCK 8 12
+ B_TITERPLATE XXYYZZ112 INSTOCK 8 12
+ C_TITERPLATE XXYYZZ113 READY 8 12
+ ....
+
+rows and columns values can be optional if these values are passed as
+input parameters, barcode column is optional.
+
+For ILLUMINA ARRAY OF ARRAYS objects the syntax can be the following::
+
+ label barcode container_status rows columns illumina_array_type illumina_array_class illumina_assay_type
+ A_ILLARRAY XXYYZZ111 INSTOCK 4 2 BeadChip_12x1Q Slide Infinium_HD
+ B_ILLARRAY XXYYZZ112 INSTOCK 4 2 BeadChip_12x1Q Slide Infinium_HD
+ C_ILLARRAY XXYYZZ113 INSTOCK 4 2 BeadChip_12x1Q Slide Infinium_HD
+
+rows, columns, illumina_array_type, illumina_array_class and illumina_assay_type
+can be optional if these values are passed as input parameters, barcode column
+is optional.
+
+For FLOW CELL objects the syntax can be the following::
+
+ label barcode container_status number_of_slots
+ A_FLOWCELL XXYYZZ221 INSTOCK 8
+ B_FLOWCELL XXYYZZ222 INSTOCK 8
+ C_FLOWCELL XXYYZZ223 INSTOCK 8
+ ....
+
+number_of_slots column can be optional if this value is passed as
+input paramter, barcode column is optional.
+
+For LANE objects the syntax can be the following::
+
+ flow_cell slot container_status
+ V112441441 1 INSTOCK
+ V112441441 2 INSTOCK
+ V112441441 3 INSTOCK
+ V351145519 1 INSTOCK
+ V351145519 2 INSTOCK
+ ....
+
+for Lane objects, no label column has to be provided, the importer
+will automatically calculate the labels for each imported object.
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/sequencing_data_sample.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/sequencing_data_sample.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,198 @@
+
+
+ Import sequencing related DataSample definitions within omero/vl
+
+
+ importer.py
+ --operator=galaxy
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ seq_data_sample
+ #if str($study) != 'use_provided'
+ --study=${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source-type=${source_type}
+ #end if
+ #if str($seq_dsample_type) != 'use_provided'
+ --seq-dsample-type=${seq_dsample_type}
+ #end if
+ #if str($dsample_status) != 'use_provided'
+ --status=${dsample_status}
+ #end if
+ #if str($device) != 'use_provided'
+ --device=${device}
+ #end if
+ #if str($history) != 'None'
+ --history=${history}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read a tsv file with the following columns::
+
+ study label source source_type seq_dsample_type status device
+ FOOBAR seq_out_1 V012141 FlowCell SequencerOutput USABLE V123141
+ FOOBAR seq_out_2 V012141 FlowCell SequencerOutput USABLE V123141
+ FOOBAR seq_out_3 V1AD124 FlowCell SequencerOutput USABLE V123141
+ ...
+
+where
+ * seq_dsample_type can assume one of the following values: SequencerOutput, RawSeqDataSample, SeqDataSample
+ * source_type can assume one of the following values: FlowCell, SequencerOutput, RawSeqDataSample
+
+study, source_type, seq_dsample_type, status and device columns can be
+overwritten by using command line options.
+
+A special case of the previous file is when seq_dsample_type is
+SeqDataSample, in this case a mandatory sample column is required,
+this column has to contain IDs of Tube objects.
+The file will look like this
+
+ study label source source_type seq_dsample_type status device sample
+ FOOBAR seq_dsample_1 V041241 SequencerOutput SeqDataSample USABLE VBB2351 V124AA41
+ FOOBAR seq_dsample_2 V051561 SequencerOutput SeqDataSample USABLE VBB2351 V4151AAE
+ FOOBAR seq_dsample_3 V151561 SequencerOutput SeqDataSample USABLE VBB2351 V15199CD
+ ...
+
+A file containing ax export of the Galaxy history that produced the
+data that are going to be imported can be passed as input parameter,
+history details must represented as a string serialized in JSON
+format.
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/study.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/study.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,83 @@
+
+ import study definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ study
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will import a stream of new study definitions defined by the following
+tab-separated columns. A typical file will look like the following::
+
+ label description
+ BSTUDY A basically empty description of BSTUDY
+ CSTUDY A basically empty description of CSTUDY
+ ....
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/unauthorized_access.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/unauthorized_access.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,6 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+
+sys.exit("You are not authorized to use this tool")
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/importer/vessels_collection.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/vessels_collection.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,111 @@
+
+ import VesselsCollection definitions within omero/vl
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #importer.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ vessels_collection
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($vessel_type) != 'use_provided'
+ --vessel_type=${vessel_type}
+ #end if
+ #if str($label)
+ --label=${label}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+TODO: add doc here...
+
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/all_enrollments.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/all_enrollments.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,82 @@
+import csv, os, sys, argparse
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='Retrieve all enrollments')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS,
+ help='logger level', default='INFO')
+ parser.add_argument('--host', type=str, help='omero hostname')
+ parser.add_argument('--user', type=str, help='omero user')
+ parser.add_argument('--passwd', type=str, help='omero password')
+ parser.add_argument('--ofile', type=str, help='output file path',
+ required=True)
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ # This is a temporary hack!!!
+ to_be_ignored = ['IMMUNOCHIP_DISCARDED', 'CASI_MS_CSM_TMP',
+ 'CASI_MS_CSM_CODES']
+
+ logger = get_logger('all_enrollments', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ try:
+ out_file_path = args.ofile
+ except IndexError:
+ logger.error('Mandatory field missing.')
+ parser.print_help()
+ sys.exit(2)
+
+ # Create the KnowledgeBase object
+ kb = KB(driver='omero')(host, user, passwd)
+
+ # Retrieve all studies from omero
+ studies = kb.get_objects(kb.Study)
+ studies = [s for s in studies if s.label not in to_be_ignored]
+ logger.info('Retrieved %d studies from database' % len(studies))
+
+ csv_header = ['individual_uuid']
+ enrolls_map = {}
+ # For each study, retrieve all enrollments
+ for s in studies:
+ logger.info('Retrieving enrollments for study %s' % s.label)
+ enrolls = kb.get_enrolled(s)
+ logger.info('%s enrollments retrieved' % len(enrolls))
+ if len(enrolls) > 0:
+ logger.debug('Building lookup dictionary....')
+ csv_header.append(s.label) # Add study label to CSV header
+ for e in enrolls:
+ enrolls_map.setdefault(e.individual.omero_id, {})['individual_uuid'] = e.individual.id
+ enrolls_map[e.individual.omero_id][s.label] = e.studyCode
+ else:
+ logger.debug('No enrollments found, skip study %s' % s.label)
+
+ # Write to CSV file
+ logger.debug('Writing CSV file %s' % out_file_path)
+ with open(out_file_path, 'w') as f:
+ writer = csv.DictWriter(f, csv_header,
+ delimiter='\t', quotechar='"',
+ restval = 'None')
+ writer.writeheader()
+ for k, v in enrolls_map.iteritems():
+ writer.writerow(v)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/all_enrollments.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/all_enrollments.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,45 @@
+
+
+ Retrieve all enrollments codes from Omero server
+
+
+ all_enrollments.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --ofile=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It will output a tsv files with the following columns:
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/build_miniped.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/build_miniped.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,155 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+"""
+A rough example of basic pedigree info generation.
+"""
+
+import csv, argparse, sys, os
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.kb.drivers.omero.ehr import EHR
+import bl.vl.individual.pedigree as ped
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+DIAGNOSIS_ARCH = 'openEHR-EHR-EVALUATION.problem-diagnosis.v1'
+DIAGNOSIS_FIELD = 'at0002.1'
+T1D_ICD10 = 'icd10-cm:E10'
+MS_ICD10 = 'icd10-cm:G35'
+NEFRO_ICD10 = 'icd10-cm:E23.2'
+
+PLINK_MISSING = -9
+PLINK_UNAFFECTED = 1
+PLINK_AFFECTED = 2
+
+FIELDS = ["fam_label", "ind_label", "fat_label", "mot_label", "gender", "t1d_status", "ms_status", "nefro_status"]
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='build the first columns of a ped file from VL')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-S', '--study', type=str, required=True,
+ help='a list of comma separated studies used to retrieve individuals that will be written to ped file')
+ parser.add_argument('--ofile', type=str, help='output file path',
+ required=True)
+ return parser
+
+def build_families(individuals, logger):
+ # Individuals with only one parent will be considered like founders
+ # for i in individuals:
+ # if ((i.mother is None) or (i.father is None)):
+ # i.mother = None
+ # i.father = None
+ logger.info("individuals: %d" % len(individuals))
+ #logger.info("individuals: with 0 or 2 parents: %d" % len(not_one_parent))
+ logger.info("analyzing pedigree")
+ founders, non_founders, dangling, couples, children = ped.analyze(
+ individuals
+ )
+ logger.info("splitting into families")
+ return ped.split_disjoint(individuals, children)
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('build_miniped', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ logger.debug('Loading all individuals from omero')
+ all_inds = kb.get_objects(kb.Individual) # store all inds to cache
+ logger.debug('%d individuals loaded' % len(all_inds))
+ studies = [kb.get_study(s) for s in args.study.split(',')]
+ # Removing None values
+ studies = set(studies)
+ try:
+ studies.remove(None)
+ except KeyError:
+ pass
+ studies = list(studies)
+ if len(studies) == 0:
+ logger.error('No matches found for labels %s, stopping program' % args.study)
+ sys.exit(2)
+ enrolled_map = {}
+ for study in studies:
+ logger.info('Loading enrolled individuals for study %s' % study.label)
+ enrolled = kb.get_enrolled(study)
+ logger.debug('%d individuals loaded' % len(enrolled))
+ for en in enrolled:
+ if en.individual.id not in enrolled_map:
+ enrolled_map[en.individual.id] = ('%s:%s' % (en.study.label, en.studyCode),
+ en.individual)
+ else:
+ logger.debug('Individual %s already mapped' % en.individual.id)
+ logger.debug('Loading EHR records')
+ ehr_records = kb.get_ehr_records()
+ logger.debug('%s EHR records loaded' % len(ehr_records))
+ ehr_records_map = {}
+ for r in ehr_records:
+ ehr_records_map.setdefault(r['i_id'], []).append(r)
+ affection_map = {}
+ for ind_id, ehr_recs in ehr_records_map.iteritems():
+ affection_map[ind_id] = dict(t1d=PLINK_UNAFFECTED, ms=PLINK_UNAFFECTED,
+ nefro=PLINK_UNAFFECTED)
+ ehr = EHR(ehr_recs)
+ if ehr.matches(DIAGNOSIS_ARCH, DIAGNOSIS_FIELD, T1D_ICD10):
+ affection_map[ind_id]['t1d'] = PLINK_AFFECTED
+ if ehr.matches(DIAGNOSIS_ARCH, DIAGNOSIS_FIELD, MS_ICD10):
+ affection_map[ind_id]['ms'] = PLINK_AFFECTED
+ if ehr.matches(DIAGNOSIS_ARCH, DIAGNOSIS_FIELD, NEFRO_ICD10):
+ affection_map[ind_id]['nefro'] = PLINK_AFFECTED
+
+ immuno_inds = [i for (ind_id, (st_code, i)) in enrolled_map.iteritems()]
+ families = build_families(immuno_inds, logger)
+ logger.info("found %d families" % len(families))
+
+ def resolve_label(i):
+ try:
+ return enrolled_map[i.id][0]
+ except KeyError:
+ return i.id
+
+ def resolve_pheno(i):
+ try:
+ immuno_affection = affection_map[i.id]
+ except KeyError:
+ return PLINK_MISSING, PLINK_MISSING, PLINK_MISSING
+ return immuno_affection["t1d"], immuno_affection["ms"], immuno_affection["nefro"]
+
+ kb.Gender.map_enums_values(kb)
+ gender_map = lambda x: 2 if x == kb.Gender.FEMALE else 1
+
+ logger.info("writing miniped")
+ with open(args.ofile, "w") as f:
+ writer = csv.DictWriter(f, FIELDS, delimiter="\t", lineterminator="\n")
+ for k, fam in enumerate(families):
+ fam_label = "FAM_%d" % (k+1)
+ for i in fam:
+ r = {}
+ r["fam_label"] = fam_label
+ r["ind_label"] = resolve_label(i)
+ r["fat_label"] = 0 if (i.father is None or i.father not in fam) else resolve_label(i.father)
+ r["mot_label"] = 0 if (i.mother is None or i.mother not in fam) else resolve_label(i.mother)
+ r["gender"] = gender_map(i.gender)
+ r["t1d_status"], r["ms_status"], r["nefro_status"] = resolve_pheno(i)
+ writer.writerow(r)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/build_miniped.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/build_miniped.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,77 @@
+
+
+ Build a reduced ped file from Omero server
+
+
+ build_miniped.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ #if $study
+ --study=${study}
+ #end if
+ --ofile=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It will output a tsv files with a column of codes for each groups of samples.
+
+ The labels of the columns are:
+
+ family
+
+ individual enrollment code (STUDY:CODE)
+
+ father enrollment code (STUDY:CODE)
+
+ mother enrollment code (STUDY:CODE)
+
+ gender
+
+ T1D affection status
+
+ MS affection status
+
+ Nefro affection status
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/check_merge_individuals.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/check_merge_individuals.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,104 @@
+import sys, csv, argparse, os
+from collections import Counter
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='check data that will be passed to the merge_individuals tool')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file')
+ return parser
+
+
+def get_invalid_vids(records, logger):
+ records_map = {}
+ invalid_vids = []
+
+ for rec in records:
+ for k,v in rec.iteritems():
+ records_map.setdefault(k, []).append(v)
+ # Check for duplicated sources
+ ct = Counter()
+ for x in records_map['source']:
+ ct[x] += 1
+ for k, v in ct.iteritems():
+ if v > 1:
+ logger.error('ID %s appears %d times as source, this ID has been marked as invalid' % (k, v))
+ invalid_vids.append(k)
+ # Check for VIDs that appear bots in 'source' and 'target' fields
+ sources = set(records_map['source'])
+ targets = set(records_map['target'])
+ commons = sources.intersection(targets)
+ for c in commons:
+ logger.error('ID %s appears both in \'source\' and \'target\' columns, this ID has been marked as invalid' % c)
+ invalid_vids.append(c)
+
+ return set(invalid_vids)
+
+
+def check_row(row, individuals, logger):
+ try:
+ source = individuals[row['source']]
+ logger.debug('%s is a valid Individual ID' % source.id)
+ target = individuals[row['target']]
+ logger.debug('%s is a valid Individual ID' % target.id)
+ return True
+ except KeyError, ke:
+ logger.error('%s is not a valid Individual ID' % ke)
+ return False
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('check_merge_individuals', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Preloading all individuals')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('Loaded %d individuals' % len(inds))
+ inds_map = {}
+ for i in inds:
+ inds_map[i.id] = i
+
+ with open(args.in_file) as infile, open(args.out_file, 'w') as outfile:
+ reader = csv.DictReader(infile, delimiter='\t')
+ records = [row for row in reader]
+ invalid_vids = get_invalid_vids(records, logger)
+
+ writer = csv.DictWriter(outfile, reader.fieldnames, delimiter='\t')
+ writer.writeheader()
+
+ for record in records:
+ if record['source'] in invalid_vids or record['target'] in invalid_vids:
+ logger.error('Skipping record %r because at least one ID was marked as invalid' % record)
+ else:
+ if check_row(record, inds_map, logger):
+ writer.writerow(record)
+ logger.debug('Record %r written in output file' % record)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/check_merge_individuals.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/check_merge_individuals.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,58 @@
+
+
+ Verify data that will be passed to the merge_individuals tool
+
+
+ check_merge_individuals.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --in_file=${input1}
+ --out_file=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
+
+-----
+
+It will check merging individuals info using informations from a file like this::
+
+ source target
+ V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/check_update_parents_data.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/check_update_parents_data.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,113 @@
+import sys, csv, argparse, logging, os
+from collections import Counter
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='check data that will be passed to the update_parents tool')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file')
+ return parser
+
+
+def check_row(row, individuals_map, kb, logger):
+ logger.debug('Checking record %r' % row)
+ try:
+ ind = individuals_map[row['individual']]
+ logger.info('%s is a valid Individual ID' % ind.id)
+ if row['father'] != 'None':
+ father = individuals_map[row['father']]
+ logger.info('%s is a valid Individual ID' % father.id)
+ check_gender(father, kb.Gender.MALE)
+ logger.info('Gender check passed')
+ else:
+ logger.info('None value, no check required')
+ if row['mother'] != 'None':
+ mother = individuals_map[row['mother']]
+ logger.info('%s is a valid Individual ID' % mother.id)
+ check_gender(mother, kb.Gender.FEMALE)
+ logger.info('Gender check passed')
+ else:
+ logger.info('None value, no check required')
+ return True
+ except KeyError, ke:
+ logger.error('%s is not a valid Individual ID, rejecting row' % ke)
+ return False
+ except ValueError, ve:
+ logger.error(ve)
+ return False
+
+
+def check_gender(individual, gender):
+ if individual.gender.enum_label() != gender.enum_label():
+ raise ValueError('Gender for individual %s is %s, expected %s, rejecting row' % (individual.id,
+ individual.gender.enum_label(),
+ gender.enum_label()))
+ else:
+ pass
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('check_update_parents_data', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Preloading all individuals from the system')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('%d individuals loaded' % len(inds))
+ inds_lookup = {}
+ for i in inds:
+ inds_lookup[i.id] = i
+
+ with open(args.in_file) as infile, open(args.out_file, 'w') as outfile:
+ reader = csv.DictReader(infile, delimiter='\t')
+ records = list(reader)
+ logger.info('Check for duplicated in \'individual\' column')
+ recs_by_ind = {}
+ for rec in records:
+ recs_by_ind.setdefault(rec['individual'], []).append(rec)
+ ct = Counter()
+ duplicated = []
+ for k,v in recs_by_ind.iteritems():
+ if len(v) > 1:
+ duplicated.append(k)
+ for dupl in duplicated:
+ logger.info('Individual %s is a duplicated' % dupl)
+ for r in recs_by_ind.pop(dupl):
+ logger.info('Removing record %r' % r)
+ good_records = sum(recs_by_ind.itervalues(), [])
+ logger.info('Duplicated check completed')
+ writer = csv.DictWriter(outfile, reader.fieldnames, delimiter='\t')
+ writer.writeheader()
+ logger.info('Checking records')
+ for row in good_records:
+ if check_row(row, inds_lookup, kb, logger):
+ writer.writerow(row)
+ logger.debug('Record %r written in output file' % row)
+ logger.info('Records check completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/check_update_parents_data.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/check_update_parents_data.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,58 @@
+
+
+ Verify data that will be passed to the update_parents tool
+
+
+ check_update_parents_data.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --in_file=${input1}
+ --out_file=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
+
+-----
+
+It will check parental info of individual using informations from a file like this::
+
+ individual father mother
+ V08E18411BC66F4987BCA43EFC6F636224 None None
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/convert_sam.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/convert_sam.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.snp_manager.main import main
+
+main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/convert_sam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/convert_sam.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,37 @@
+
+ converter
+
+ convert_sam.py --logfile ${log_file} convert_sam -i ${input_file}
+ -o ${output_file} --reftag ${dbkey} --output-format ${output_type}
+ ## FIXME: find a way to import the default from the relevant module
+ --flank-size 125
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool converts SAM alignment data to VL marker alignment or Galaxy
+extract genomic DNA input.
+
+Expects single-end BWA alignment data produced by the previous steps
+in the workflow (see markers_to_fastq).
+
+**NOTE:** if the marker_alignment output format is selected, the
+Database/Build property must be set in the input SAM file.
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/drop_parental_info.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/drop_parental_info.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,69 @@
+import sys, csv, argparse, os
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='set parents of the selected individuals to None')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='list of the individuals')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file')
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('drop_parental_info', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving individuals')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('Retrieved %d individuals' % len(inds))
+ inds_lookup = {}
+ for i in inds:
+ inds_lookup[i.id] = i
+
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ records = []
+ for row in reader:
+ try:
+ # The 'individual' : inds_lookup[row['individual']].id
+ # is quite redundant but is a usefull check in order
+ # to filter wrong VIDs
+ record = {'individual' : inds_lookup[row['individual']].id,
+ 'father' : 'None',
+ 'mother' : 'None'}
+ records.append(record)
+ except KeyError, ke:
+ logger.warning('Individual with VID %s does not exist, skipping line' % ke)
+
+ with open(args.out_file, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ['individual', 'father', 'mother'],
+ delimiter = '\t')
+ writer.writeheader()
+ writer.writerows(records)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/drop_parental_info.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/drop_parental_info.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,64 @@
+
+
+ Remove parental info of individuals
+
+
+ drop_parental_info.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --in_file=${input1}
+ --out_file=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+It will remove parental info of individual using informations from a file like this::
+
+ individual
+ V08E18411BC66F4987BCA43EFC6F636224
+
+and build a tsv file like this::
+
+ individual father mother
+ V08E18411BC66F4987BCA43EFC6F636224 None None
+
+-----
+
+.. class:: warningmark
+
+Note that galaxy don't recognize a tsv file with just one column like a tabular file, so need to be converted by hand
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/enrollments_by_platewells.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/enrollments_by_platewells.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,77 @@
+"""
+From a list of platewells retrieves the enrollments code of the connected individual
+"""
+import argparse
+import csv
+import sys
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='From platewells 2 enrollments code')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-S', '--study', type=str, required=True,
+ help='a study used to retrieve individuals')
+ parser.add_argument('--ifile', type=str, required=True,
+ help='list of platewells used to fetch data')
+ parser.add_argument('--ofile', type=str, help='output file path',
+ required=True)
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('pws2enrolls', level=args.loglevel,
+ filename=args.logfile)
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ study = kb.get_study(args.study)
+ enrolled_map = {e.individual.id:e for e in kb.get_enrolled(study)}
+ logger.info('Loaded {} enrolled individuals for study {}'.format(len(enrolled_map), study.label))
+ plates = kb.get_objects(kb.TiterPlate)
+ logger.info('Loaded {} plates'.format(len(plates)))
+ pws_map = {':'.join([w.container.barcode, w.label]):w for w in kb.get_objects(kb.PlateWell)
+ if w.container.barcode}
+ logger.info('Loaded {} platewells'.format(len(pws_map)))
+ #records = []
+ of=open(args.ofile, 'w')
+ writer=csv.DictWriter(of, ['platewell', 'status', 'enrollment'],
+ delimiter='\t', quotechar='"', restval='None')
+ writer.writeheader()
+ with open(args.ifile, 'r') as f:
+ reader=csv.DictReader(f, delimiter='\t')
+ logger.info('Searching individuals connected to the platewells')
+ for r in reader:
+ ind=kb.dt.get_connected(pws_map[r['platewell']], aklass = kb.Individual,
+ direction = kb.dt.DIRECTION_INCOMING)
+ try:
+ record = {'platewell': r['platewell'],
+ 'status': pws_map[r['platewell']].status.enum_label(),
+ 'enrollment': ':'.join([study.label,enrolled_map[ind[0].id].studyCode])}
+ except KeyError as e:
+ logger.warning('not enrolled {}'.format(r['platewell']))
+ record = {'platewell': r['platewell'],
+ 'status': pws_map[r['platewell']].status.enum_label(),
+ 'enrollment': ':'.join([study.label,'not_enrolled'])}
+ writer.writerow(record)
+ of.close()
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/enrollments_by_platewells.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/enrollments_by_platewells.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,90 @@
+
+
+ From a list of platewells retrieves the enrollments code of the connected individual
+
+
+ enrollments_by_platewells.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --ifile=$input
+ --ofile=${output}
+ #if str($study_label) != 'no_study'
+ --study=${study_label}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool, from a list of platewells retrieves the enrollments code of the connected individual.
+
+Input file must be in TABULAR format and like::
+
+ platewell
+ A9033P3B:C09
+ A9033P3B:G09
+ A9033P3B:G10
+ A9033P3B:H05
+ A9033WRT:E08
+ A9033WRT:E10
+ A9033WRT:F03
+ A9033WRT:F04
+ ...
+
+Output file will be like::
+
+ platewell status enrollment
+ A9033P3B:C09 DISCARDED AUTOIMMUNITY:X3M6XP517
+ A9033P3B:G09 DISCARDED AUTOIMMUNITY:RYMRK2NLJ
+ A9033P3B:G10 DISCARDED AUTOIMMUNITY:OV13V99M9
+ A9033P3B:H05 DISCARDED AUTOIMMUNITY:OV13ZQK19
+ A9033WRT:E08 DISCARDED AUTOIMMUNITY:7GMWNX9M8
+ A9033WRT:E10 DISCARDED AUTOIMMUNITY:R3MKP0GL4
+ A9033WRT:F03 DISCARDED AUTOIMMUNITY:N1VD2Q915
+ A9033WZT:A04 CONTENTUSABLE AUTOIMMUNITY:210JRG4MW
+ ...
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/flowcell_samplesheet.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/flowcell_samplesheet.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,125 @@
+
+
+ Build the samplesheet for a given FlowCell
+
+
+
+ kb_query.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=$__user_email__
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --ofile=${outfile}
+ flowcell_samplesheet
+ #if $namespace.ignore_namespace
+ --flowcell=${flowcell}
+ --ignore_namespace
+ #else
+ #if $namespace.namespace_value
+ -- flowcell=${namespace.namespace_value}|${flowcell}
+ #else
+ --flowcell="${flowcell}"
+ #end if
+ #end if
+ #if $remove_namespaces
+ --remove_namespaces
+ #end if
+ #if $add_sample_label
+ --sample_label
+ #end if
+ --separator=${csv_separator}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Export data related to a FlowCell identified by the Flowcell ID field
+into a csv file like
+
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator
+
+ foofc1,1,v012aa2,hg19,ATCACG,,WG,john doe
+
+ foofc1,2,v0441a1,hg19,GATCAG,,EXOME,john doe
+
+ foofc1,2,v021441,hg19,TAGCTT,,WG,john doe
+
+ ...
+
+If the checkbox "Add sample labels" is enabled, the output file will
+have a new column at the end of each row with the Label of the sample
+like
+
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleLabel
+
+ foofc1,1,v012aa2,hg19,ATCACG,,WG,john doe,foosmp1
+
+ foofc1,2,v0441a1,hg19,GATCAG,,EXOME,john doe,foosmp2
+
+ foofc1,2,v021441,hg19,TAGCTT,,WG,john doe,foosmp3
+
+ ...
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/get_kinship_input.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_kinship_input.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,129 @@
+
+
+ Build input files for kinship MR application
+
+
+ kb_query.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ #if $enable_debug
+ --loglevel=DEBUG
+ #else
+ --loglevel=$__app__.config.vl_loglevel
+ #end if
+ --logfile=${logfile}
+ --ofile=${output_gen}
+ extract_gt
+ #if str($study) != 'no_study'
+ --study=${study}
+ #end if
+ --out_samples_list=${output_sl}
+ --marker_set=${mset}
+ #if $transpose_output
+ --transpose_output
+ #end if
+ --compress_output
+ --compression_level=${compression_level}
+ #if $ignore_duplicated
+ --ignore_duplicated
+ #end if
+ #if str($data_collection) != 'no_collection'
+ --data_collection=${data_collection}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/get_parents_from_sibling.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_parents_from_sibling.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,89 @@
+'''
+From a file like this
+individual sibling
+V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327
+V0FAE2B10F690041509739A3F4B314DC8F V00875417B31684EC2A62EE37717913445
+V0382EF862AA4B475697C95D3777043239 V08E376727ED8E4B369DAA3B62A9395E1B
+....
+
+retrieve indivual's parents using sibling informations and build a file like
+
+individual father mother
+V08E18411BC66F4987BCA43EFC6F636224 V027DE334753424F07B81A70053EF5B873 V035222CAEE0474AFEBB9A161D4B64914E
+V0FAE2B10F690041509739A3F4B314DC8F V0E966B53BDCC942C09D6B6D96DE98F4F4 V0F7B6926C6FBE4F0BB38BBC6CFB13A825
+....
+
+'''
+
+import sys, csv, argparse, logging, os
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='retrieve parents information using sibling')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero passwd')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file with individual-sibling couples')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file with parents information')
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('get_parents_from_sibling', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving individuals')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('Retrieved %d individuals' % len(inds))
+ inds_lookup = {}
+ for ind in inds:
+ inds_lookup[ind.id] = ind
+
+ with open(args.in_file) as infile:
+ reader = csv.DictReader(infile, delimiter='\t')
+ records = []
+ for row in reader:
+ try:
+ sib = inds_lookup[row['sibling']]
+ rec = {'individual' : row['individual'],
+ 'father' : sib.father.id if sib.father else 'None',
+ 'mother' : sib.mother.id if sib.mother else 'None'}
+ logger.info('Individual %s, father: %s - mother: %s' % (row['individual'],
+ rec['father'],
+ rec['mother']))
+ records.append(rec)
+ except KeyError:
+ logger.error('Unable to find individual %s' % row['sibling'])
+
+ logger.info('Retrieved parents for %d individuals' % len(records))
+
+ with open(args.out_file, 'w') as outfile:
+ writer = csv.DictWriter(outfile, ['individual', 'father', 'mother'],
+ delimiter='\t')
+ writer.writeheader()
+ writer.writerows(records)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/get_parents_from_sibling.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_parents_from_sibling.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,66 @@
+
+
+ Retrieve individual's parents using sibling informations
+
+
+ get_parents_from_sibling.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --in_file=${input1}
+ --out_file=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
+
+-----
+
+It will retrieve indivual's parents using sibling informations from a file like this::
+
+ individual sibling
+ V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327
+ V0FAE2B10F690041509739A3F4B314DC8F V00875417B31684EC2A62EE37717913445
+ V0382EF862AA4B475697C95D3777043239 V08E376727ED8E4B369DAA3B62A9395E1B
+
+and build a tsv file like this::
+
+ individual father mother
+ V08E18411BC66F4987BCA43EFC6F636224 None None
+ V0FAE2B10F690041509739A3F4B314DC8F V07282522B89FC4F7CA08094537A13C0D1 V09D459311D1254095AE9F00B45E5A101E
+ V0382EF862AA4B475697C95D3777043239 V04CD9561F753F4853838E2E96819AAAC0 V0382EF862AA4B475697C95D3777043239
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/get_studies_details.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_studies_details.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,59 @@
+'''
+'''
+
+import argparse, csv, sys
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='retrieve studies details')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero passwd')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file with studies details')
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('get_studies', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving studies')
+ studies = kb.get_objects(kb.Study)
+ logger.info('Retrieved {} studies'.format(len(studies)))
+ records = []
+ for s in studies:
+ enr = kb.get_enrolled(s)
+ rec = {'label': s.label,
+ 'description': s.description,
+ 'enrolled': len(enr)}
+ records.append(rec)
+
+ with open(args.out_file, 'w') as outfile:
+ writer = csv.DictWriter(outfile, ['label', 'description', 'enrolled'],
+ delimiter='\t')
+ # writer.writeheader()
+ writer.writerows(sorted(records, key=lambda key: key['label']))
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/get_studies_details.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_studies_details.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,45 @@
+
+
+ Retrieve studies details
+
+
+ get_studies_details.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --out_file=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+It will produce a list of the studies known by the biobank server
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/get_titerplates_details.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_titerplates_details.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,59 @@
+'''
+'''
+
+import argparse, csv, sys
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='retrieve titerplates details')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero passwd')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file with titerplates details')
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('get_titerplates', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving titerplates')
+ titps = kb.get_objects(kb.TiterPlate)
+ logger.info('Retrieved {} titerplates'.format(len(titps)))
+ records = []
+ for tp in titps:
+ if type(tp) == kb.TiterPlate:
+ rec = {'label': tp.label,
+ 'barcode': tp.barcode,
+ 'status': tp.status.enum_label()}
+ records.append(rec)
+
+ with open(args.out_file, 'w') as outfile:
+ writer = csv.DictWriter(outfile, ['label', 'barcode', 'status'],
+ delimiter='\t')
+ writer.writeheader()
+ writer.writerows(sorted(records, key=lambda key: key['label']))
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/get_titerplates_details.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_titerplates_details.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,45 @@
+
+
+ Retrieve titerplates details
+
+
+ get_titerplates_details.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --out_file=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+It will produce a list of the titerplates known by the biobank server
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/global_stats.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/global_stats.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,72 @@
+
+
+ Provide global statistics for a given study.
+
+
+ kb_query.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --ofile=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ global_stats
+ #if str($study) != 'all_known_studies'
+ --study=${study}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It will output a tsv files with the following columns::
+
+ study diagnosis technology gender counts
+ BSTUDY icd10-cm:G35 AffymetrixCelGENOMEWIDESNP_6 MALE 1
+ BSTUDY icd10-cm:E10 AffymetrixCelGENOMEWIDESNP_6 FEMALE 1
+ BSTUDY local:at0.3 AffymetrixCelGENOMEWIDESNP_6 MALE 2
+ BSTUDY icd10-cm:G35;icd10-cm:E10;icd10-cm:E10 AffymetrixCelGENOMEWIDESNP_6 MALE 1
+ BSTUDY icd10-cm:G35 AffymetrixCelGENOMEWIDESNP_6 FEMALE 1
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/gstudio_datasheet.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/gstudio_datasheet.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,70 @@
+
+
+ Build a Genome Studio datasheet for the given plate
+
+
+ kb_query.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=$__user_email__
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --ofile=${outfile}
+ gstudio_datasheet
+ #if str($plate) != 'no_plate'
+ --plate=${plate}
+ --manifest=${manifest}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Build a Genome Studio datasheet for the given plate
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/kb_query.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/kb_query.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,9 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.kb_query.main import main as kb_query
+
+kb_query(sys.argv[1:])
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/lookup_index.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/lookup_index.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.snp_manager.main import main
+
+main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/lookup_index.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/lookup_index.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,29 @@
+
+ lookup dbSNP index
+
+ lookup_index.py --logfile ${log_file} lookup_index -i ${input_file}
+ -o ${output_file}
+ --index-file "${ filter( lambda x: str( x[0] ) == str( $indices ), $__app__.tool_data_tables[ 'dbsnp_indexes' ].get_fields() )[0][-1] }"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool reads a Galaxy genome segment extractor output in interval
+format and performs a lookup in the selected dbSNP index to get the
+true rs label. It outputs a new marker definitions file with the true
+rs labels and masks.
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/map_to_collection.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/map_to_collection.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,105 @@
+
+
+ Map a list of objects (vessels or data samples) to the specified
+ collection.
+
+
+ kb_query.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxt
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --ofile=${outfile}
+ map_to_collection
+ --ifile=${infile}
+ #if str($field_label) != ''
+ --field_label=${field_label}
+ #end if
+ #if str($collection_selector.collection_type) != 'no_coll_selected'
+ --collection_type=${collection_selector.collection_type}
+ #if str($collection_selector.collection_type) == 'DataCollection'
+ #if str($collection_selector.dcoll_label) != 'no_label_selected'
+ --collection_label=${collection_selector.dcoll_label}
+ #end if
+ #elif str($collection_selector.collection_type) == 'VesselsCollection'
+ #if str($collection_selector.vcoll_label) != 'no_label_selected'
+ --collection_label=${collection_selector.vcoll_label}
+ #end if
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/map_vid.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/map_vid.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,25 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.kb_query.main import main as kb_query
+
+def main(argv):
+ selected_column, new_column_name, input_file = argv[:3]
+ selected_column = int(selected_column) - 1
+ new_column_name = new_column_name.strip()
+
+ # with open(input_file) as f:
+ # l = f.readline().strip()
+ # Backport to 2.6
+ fi = open(input_file)
+ l = fi.readline().strip()
+ fi.close()
+
+ column_names = l.split('\t')
+ column_name = column_names[selected_column]
+
+ argv = argv[3:] + ['--column=%s,%s' % (column_name, new_column_name)]
+ kb_query(argv)
+
+main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/map_vid.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/map_vid.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,148 @@
+
+
+ Map labels of objects known to Omero/VL to their VID
+
+
+ map_vid.py
+ ${selected_column}
+ ${new_column_name}
+ ${input1}
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --operator=galaxy
+ --ofile=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ map_vid
+ --ifile=${input1}
+ --source-type=${source_type.source_type}
+ #if $source_type.source_type == 'Individual'
+ #if str($source_type.study) != 'use_provided'
+ --study=${source_type.study}
+ #end if
+ #end if
+ #if $strict_mapping
+ --strict-mapping
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+The tool resolves VIDs for the given column and rename the column
+iteself with a new label. Usually to map the items' VIDs the simple
+item label is necessary but in some cases a special syntax is needed:
+
+* for Individual items, if no default study is provided, the pattern
+ to be used is **STUDY:STUDY_LABEL**. If a default study is provided,
+ the column must contain only the STUDY_LABEL
+
+* for PlateWell items the pattern is **PLATE_LABEL:WELL_LABEL**
+
+* for DataCollectionItem items the pattern is
+ **DATA_COLLECTION_LABEL:ITEM_LABEL**
+
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/markers_to_fastq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/markers_to_fastq.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.snp_manager.main import main
+
+main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/markers_to_fastq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/markers_to_fastq.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,34 @@
+
+ converter
+
+ markers_to_fastq.py --logfile ${log_file} markers_to_fastq
+ -i ${input_file} -o ${output_file}
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool converts VL marker definitions to fastq data.
+
+VL marker definitions files have the following format (spaces are tabs)::
+
+ label rs_label mask
+ SNP_A-1780419 rs6576700 GGATACATTTTATTGC[A/G]CTTGCAGAGTATTTTT
+ SNP_A-1780418 rs17054099 GGATACATTACCCAAA[C/T]GGTCACAGGTCAAAGG
+ SNP_A-1780415 rs7730126 GGATACATCCCCCCCA[A/G]AAAATGAGAATAAAGC
+ ...
+
+Where "label" is a unique identifier, "rs_label" is the dbSNP label
+and "mask" is the SNP's mask in the
+LEFT_FLANK[ALLELE_A/ALLELE_B/...]RIGHT_FLANK format. One fastq record
+is generated for each allele in the mask. The string "None" in the
+rs_label column means there is no rs label for the marker.
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/plate_dsamples_details.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/plate_dsamples_details.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,136 @@
+
+
+ Retrieve wells and connected data samples related to a known plate
+
+
+ kb_query.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --ofile=${output}
+ plate_data_samples
+ #if str($plate) != 'select_one'
+ --plate=${plate}
+ #end if
+ #if $fetch_all
+ --fetch_all
+ #end if
+ #if str($vcoll_label) != 'no_collection'
+ --vessels_collection=${vcoll_label}
+ #end if
+ #if $vessel_types
+ --ignore_types=${vessel_types}
+ #end if
+ #if str($study_label) != 'no_study'
+ --map_study=${study_label}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Using of the the selectable plates barcode, the tool will generate a
+report file for the plate like::
+
+ PLATE_barcode PLATE_label WELL_label WELL_status DATA_SAMPLE_label
+ XXYYZZKK test_plate A01 CONTENTUSABLE a01_test_sample
+ XXYYZZKK test_plate A02 CONTENTUSABLE X
+ XXYYZZKK test_plate A03 UNKNOWN OR EMPTY X
+ XXYYZZKK test_plate A04 CONTENTUSABLE a04_test_sample
+ XXYYZZKK test_plate A05 DISCARDED X
+ ...
+
+For each plate, all wells will be generated in the output file, even
+the ones not actually recorded into the system, these wells will be
+marked wit a 'UNKOWN OR EMPTY' status.
+
+For each well, the tool performs a query in order to find if at least
+one data sample is directly connected to the well itself; if at least
+one is found, the label of the data sample will be placed in the
+DATA_SAMPLE_label column, if no data sample is connected to the well a
+'X' will be placed.
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/query.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/query.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,95 @@
+
+
+ Provides a simplified environment to perform complex queries to
+ BIOBANK.
+
+
+ kb_query.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --operator=galaxy
+ --ofile=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ query
+ --group=$study
+ --code-file=$code_file
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${str($query_code).replace('__sq__', "'").replace('__cr____cn__', '\n')}
+
+
+
+
+
+
+
+
+The following is an example of a query that will dump family relations
+within the group::
+
+ writeheader('study', 'id', 'gender', 'father', 'mother')
+ for i in Individuals(group):
+ writerow(group.id, enum_label(i.gender),
+ i.father.id if i.father else 'None',
+ i.mother.id if i.mother else 'None')
+
+
+The next example will prepare a file that could be used to define a
+data collection and then as the input for a genotyping run::
+
+ writeheader('dc_id', 'gender', 'data_sample',
+ 'path', 'mimetype', 'size', 'sha1')
+ for i in Individuals(group):
+ for d in DataSamples(i, 'AffymetrixCel'):
+ for o in DataObjects(d):
+ writerow(group.id, enum_label(i.gender), d.id,
+ o.path, o.mimetype, o.size, o.sha1)
+
+In the examples above, '''group''' (actually a study) corresponds to
+the group whose label is assigned by the '''--group''' flag.
+
+**Note** This is clearly an extremely dangerous tool.
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/select_sub_group.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/select_sub_group.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,149 @@
+
+
+ Selects groups of individuals.
+
+
+ kb_query.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --operator=galaxy
+ --ofile=${output1}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ selector
+ #if str($study) != 'use_all'
+ --study=$study
+ #end if
+ --group-label=$group_label
+ --total-number=$total_number
+ --male-fraction=$male_fraction
+ --reference-disease=$reference_diagnosis
+ --control-fraction=$control_fraction
+ #if str($required_datasample) != 'unselect'
+ --required-datasample=$required_datasample
+ #end if
+ #if int($seed) != 0
+ --seed=$seed
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+It will select a group of individuals from a specific group (from all
+avalable individuals, if no group is selected). The selection is
+controlled by the following parameters:
+
+ * total number of individuals selected
+ * male fraction
+ * reference disease
+ * control fraction
+ * presence of specific datasets
+
+The results will be presented as a file that can be used to generate a
+new group (actually a study). The file will have the following columns::
+
+ study label individual
+ XXX 0001 V20940239409
+ XXX 0002 V20940239509
+ XXX 0003 V20940239609
+ XXX 0004 V20940239709
+ ...
+
+ where study is the name of the new study
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/snp_manager.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/snp_manager.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.snp_manager.main import main as snp_manager
+
+snp_manager(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/snp_manager.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/snp_manager.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,24 @@
+
+ get true rs label and mask from dbSNP
+
+ snp_manager.py
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ FIXME
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/tools/vessels_by_individual.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/vessels_by_individual.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,74 @@
+
+
+ Retrieve all vessels related to individuals passed with the input
+ file. Vessel type and a Vessel Collection can be used as filters.
+
+
+ kb_query.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --operator=galaxy
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --ofile=${outfile}
+ vessels_by_individual
+ --ifile=${infile}
+ #if str($collection_label) != 'no_collection'
+ --vessels_collection=${collection_label}
+ #end if
+ #if str($vessel_type) != 'no_type'
+ --vessel_type=${vessel_type}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/change_source_item.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/change_source_item.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,258 @@
+# The tool changes the source of an object inside the system.
+# Expected input file format is
+#
+# target new_source
+# V1415515 V1241441
+# V1351124 V1511141
+# .....
+#
+# Where target is the object whose source will be changed with the
+# new_source object. New source type will be specified using the
+# command line option.
+
+import csv, argparse, sys, os, json, time
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+import omero
+import omero.model
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='change the source for given items')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logger level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--operator', type=str, required=True,
+ help='operator username')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='list of items with new sources')
+ parser.add_argument('--target_type', type=str, required=True,
+ help='type of the target objects')
+ parser.add_argument('--source_type', type=str, required=True,
+ help='type of the new source objects')
+ return parser
+
+
+def do_check(records, targets, sources,
+ target_type, source_type,
+ kb, logger):
+ logger.info('Starting consistency checks')
+ src_map = dict([(s.id, s) for s in sources])
+ trg_map = dict([(t.id, t) for t in targets])
+ good_records = []
+ targets = {}
+ sources = {}
+ for i, r in enumerate(records):
+ if r['target'] not in trg_map:
+ logger.warning('No %s with ID %s, rejecting record %d' % (target_type,
+ r['target'], i))
+ continue
+ if r['new_source'] not in src_map:
+ logger.warning('No %s with ID %s, rejecting record %d' % (source_type,
+ r['new_source'], i))
+ continue
+ targets[r['target']] = trg_map[r['target']]
+ sources[r['new_source']] = src_map[r['new_source']]
+ good_records.append(r)
+ logger.info('Done with consistency checks')
+ return good_records, targets, sources
+
+
+def update_data(records, targets, sources, operator, act_conf,
+ kb, logger, batch_size = 500):
+ def get_chunk(batch_size, records):
+ offset = 0
+ while len(records[offset:]) > 0:
+ yield records[offset:offset+batch_size]
+ offset += batch_size
+ dev = get_device(kb, logger)
+ for i, recs in enumerate(get_chunk(batch_size, records)):
+ logger.info('Updating batch %d' % i)
+ batch_to_save = []
+ edges_to_delete = []
+ for r in recs:
+ target = targets[r['target']]
+ # Build the ActionOnAction backup object
+ if not target.lastUpdate:
+ last_action = target.action
+ else:
+ last_action = target.lastUpdate
+ old_action = target.action
+ asconf = {'backup' : {'action' : old_action.id}}
+ aslabel = 'updater.update_source_item-%f' % time.time()
+ backup = build_action(operator, old_action.context,
+ dev, last_action, aslabel,
+ asconf, kb, logger)
+ target.lastUpdate = backup
+ # Build the Action in order to attach the new source to
+ # the target object
+ new_source = sources[r['new_source']]
+ if new_source.is_mapped:
+ new_source.unload()
+ asconf = act_conf
+ aslabel = 'updater.update_source_item-%f' % time.time()
+ new_act = build_action(operator, old_action.context,
+ dev, new_source, aslabel,
+ asconf, kb, logger)
+ target.action = new_act
+ if old_action.OME_TABLE == 'Action':
+ # no old source, just save the new action
+ batch_to_save.append(target)
+ else:
+ # check if the old target and the new one are different
+ if new_source != old_action.target:
+ batch_to_save.append(target)
+ edges_to_delete.append((old_action.target, target))
+ if len(batch_to_save) > 0:
+ kb.save_array(batch_to_save)
+ else:
+ logger.info('No record need to be updated')
+ for vert in edges_to_delete:
+ kb.dt.destroy_edge(*vert)
+
+
+def build_action(operator, context, device, target,
+ action_setup_label, action_setup_conf,
+ kb, logger):
+ if action_setup_label:
+ asetup = get_action_setup(action_setup_label, action_setup_conf,
+ kb, logger)
+ else:
+ asetup = None
+ aconf = {
+ 'device' : device,
+ 'actionCategory' : kb.ActionCategory.IMPORT,
+ 'operator' : 'operator',
+ 'context' : context,
+ 'target' : target,
+ }
+ if asetup:
+ aconf['setup'] = asetup
+ action = kb.factory.create(retrieve_action_type(target, kb), aconf)
+ return action
+
+
+def retrieve_action_type(target, kb):
+ tklass = target.ome_obj.__class__.__name__
+ for i, k in enumerate(target.ome_obj.__class__.__mro__):
+ if k is omero.model.IObject:
+ tklass = target.ome_obj.__class__.__mro__[i-1].__name__
+ if tklass == 'Vessel':
+ return kb.ActionOnVessel
+ elif tklass == 'Individual':
+ return kb.ActionOnIndividual
+ elif tklass == 'DataSample':
+ return kb.ActionOnDataSample
+ elif tklass == 'DataCollectionItem':
+ return kb.ActionOnDataCollectionItem
+ elif tklass == 'Action':
+ return kb.ActionOnAction
+ # elif tklass == 'VLCollection':
+ # return kb.ActionOnCollection
+ else:
+ raise ValueError('No Action related to %s klass' % tklass)
+
+
+def get_action_setup(label, conf, kb, logger):
+ asetup_conf = {
+ 'label' : label,
+ 'conf' : json.dumps(conf),
+ }
+ asetup = kb.factory.create(kb.ActionSetup, asetup_conf)
+ return asetup
+
+
+def get_device(kb, logger):
+ dev_model = 'UPDATE'
+ dev_maker = 'CRS4'
+ dev_release = '0.1'
+ dev_label = 'updater-%s.update_source_item' % dev_release
+ device = kb.get_device(dev_label)
+ if not device:
+ logger.debug('No device with label %s, creating one' % dev_label)
+ conf = {
+ 'maker' : dev_maker,
+ 'model' : dev_model,
+ 'release' : dev_release,
+ 'label' : dev_label,
+ }
+ device = kb.factory.create(kb.Device, conf).save()
+ return device
+
+
+def find_action_setup_conf(args):
+ action_setup_conf = {}
+ for x in dir(args):
+ if not (x.startswith('_') or x.startswith('func')):
+ action_setup_conf[x] = getattr(args, x)
+ if 'passwd' in action_setup_conf:
+ action_setup_conf.pop('passwd') # Storing passwords into an
+ # Omero obj is not a great idea...
+ return action_setup_conf
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('change_source_item', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ logger.info('Loading data from input file')
+ with open(args.in_file) as f:
+ reader = csv.DictReader(f, delimiter='\t')
+ records = list(reader)
+ logger.info('Loaded %d records' % len(records))
+
+ logger.info('Loading %s type objects' % args.target_type)
+ targets = kb.get_objects(getattr(kb, args.target_type))
+ logger.info('Loaded %d objects' % len(targets))
+ if len(targets) == 0:
+ msg = 'No targets loaded from the system, nothing to do'
+ logger.critical(msg)
+ sys.exit(msg)
+
+ logger.info('Loading %s type objects' % args.source_type)
+ sources = kb.get_objects(getattr(kb, args.source_type))
+ logger.info('Loaded %d objects' % len(sources))
+ if len(sources) == 0:
+ msg = 'No sources loaded from the system, nothing to do'
+ logger.critical(msg)
+ sys.exit(msg)
+
+ logger.info('Loading Action type objects')
+ acts = kb.get_objects(kb.Action)
+ logger.info('Loaded %d objects' % len(acts))
+
+ records, targets, sources = do_check(records, targets, sources,
+ args.target_type, args.source_type,
+ kb, logger)
+ if len(records) == 0:
+ msg = 'No records passed consistency checks, nothing to do'
+ logger.critical(msg)
+ sys.exit(msg)
+
+ aconf = find_action_setup_conf(args)
+
+ update_data(records, targets, sources, args.operator,
+ aconf, kb, logger)
+
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/change_source_item.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/change_source_item.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,57 @@
+
+
+ Change source items for given objects
+
+
+ change_source_item.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --operator=$__user_email__
+ --in_file=${infile}
+ --target_type=${target_type}
+ --source_type=${source_type}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/discard_from_collection.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/discard_from_collection.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,91 @@
+import csv, argparse, sys, os
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+COLLECTION_TYPES = {'VesselsCollection' : 'VesselsCollectionItem',
+ 'DataCollection' : 'DataCollectionItem'}
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='remove elements from a Vessels or Data Collection')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logger level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-I', '--ifile', type=str, required=True,
+ help='list of collection items that will be removed')
+ parser.add_argument('--collection_type', type=str, required=True,
+ choices=COLLECTION_TYPES.keys(),
+ help='type of the collection')
+ parser.add_argument('--collection_label', type=str, required=True,
+ help='label of the collection')
+
+ return parser
+
+def load_collection(coll_type, coll_label, kb):
+ query = 'SELECT coll FROM %s coll WHERE coll.label = :coll_label' % coll_type
+ coll = kb.find_all_by_query(query, {'coll_label' : coll_label})
+ return coll[0] if len(coll) > 0 else None
+
+def load_collection_items(collection, coll_type, kb):
+ if COLLECTION_TYPES[coll_type] == 'VesselsCollectionItem':
+ citems = kb.get_vessels_collection_items(collection)
+ elif COLLECTION_TYPES[coll_type] == 'DataCollectionItem':
+ citems = kb.get_data_collection_items(collection)
+ else:
+ raise ValueError('Unknown data collection type %s' % COLLECTION_TYPES[coll_type])
+ ci_map = {}
+ for ci in citems:
+ ci_map[ci.id] = ci
+ return ci_map
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('discard_from_collection', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ logger.info('Loading collection %s from %s' % (args.collection_label,
+ args.collection_type))
+ coll = load_collection(args.collection_type, args.collection_label, kb)
+ if not coll:
+ msg = 'No %s found with label %s' % (args.collection_type,
+ args.collection_label)
+ logger.error(msg)
+ sys.exit(msg)
+ logger.info('Loading items from collection')
+ coll_items = load_collection_items(coll, args.collection_type, kb)
+ logger.info('Fetched %d elements' % len(coll_items))
+
+ with open(args.ifile) as infile:
+ reader = csv.DictReader(infile, delimiter='\t')
+ to_be_deleted = [row['collection_item'] for row in reader]
+ logger.info('Found %d items to be deleted' % len(to_be_deleted))
+
+ for tbd in to_be_deleted:
+ try:
+ kb.delete(coll_items[tbd])
+ logger.info('%s with ID %s deleted' % (COLLECTION_TYPES[args.collection_type],
+ tbd))
+ except KeyError, ke:
+ logger.warning('No %s related to ID %s' % (COLLECTION_TYPES[args.collection_type],
+ ke))
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/discard_from_collection.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/discard_from_collection.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,116 @@
+
+
+ Discard input elements from the selected collection
+
+
+ discard_from_collection.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --ifile=${ifile}
+ #if str($collection_selector.collection_type) != 'no_coll_selected'
+ --collection_type=${collection_selector.collection_type}
+ #if str($collection_selector.collection_type) == 'DataCollection'
+ #if str($collection_selector.dcoll_label) != 'no_label_selected'
+ --collection_label=${collection_selector.dcoll_label}
+ #end if
+ #elif str($collection_selector.collection_type) == 'VesselsCollection'
+ #if str($collection_selector.vcoll_label) != 'no_label_selected'
+ --collection_label=${collection_selector.vcoll_label}
+ #end if
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool discard from a DataCollection or a VesselCollection one or
+more items.
+
+The expected input file must be like
+
++---------------+
+|collection_item|
++---------------+
+|V013AFF22311 |
++---------------+
+|V0ABB3451516 |
++---------------+
+|V012441AAEEC |
++---------------+
+
+Input file rows must be VIDs obtained using the **map_vid** tool.
+
+Collection must be selected using the specific selection lists that
+show only the ones imported into the system.
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/merge_individuals.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/merge_individuals.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,223 @@
+#=======================================
+# This tool moves all informations related to an individual (source) to
+# another (target). Moved informations are:
+# * children (Individual objects)
+# * ActionOnInvidual
+# * Enrollments
+# * EHR records
+#
+# The tool expects as input a TSV file like this
+# source target
+# V0468D2D96999548BF9FC6AD24C055E038 V060BAA01C662240D181BB98A51885C498
+# V029CC0A614E2D42D0837602B15193EB58 V01B8122A7C75A452E9F80381CEA988557
+# V0B20C93E8A88D43EFB87A7E6911292A05 V0BED85E8E76A54AA7AB0AFB09F95798A8
+# ...
+#
+# NOTE WELL:
+# * Parents of the "source" indivudal WILL NOT BE ASSIGNED
+# to the "target" individual
+# * For the Enrollmnent objects, if
+# "target" individual has already a code in the same study of "source"
+# individual, the script will try to move the Enrollment to the
+# "duplicated" study (this will be fixed when a proper ALIASES
+# manegement will be introduced)
+# =======================================
+
+import sys, argparse, csv, time, json, os
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.kb import KBError
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='merge informations related to an individual ("source") to another one ("target")')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-O', '--operator', type=str, help='operator',
+ required=True)
+ parser.add_argument('--in_file', type=str, required = True,
+ help='input TSV file')
+ return parser
+
+
+def update_object(obj, backup_values, operator, kb, logger):
+ logger.debug('Building ActionOnAction for object %s::%s' %
+ (obj.get_ome_table(),
+ obj.id)
+ )
+ act_setup = build_action_setup('merge-individuals-%f' % time.time(),
+ backup_values, kb)
+ aoa_conf = {
+ 'setup': act_setup,
+ 'actionCategory' : kb.ActionCategory.UPDATE,
+ 'operator': operator,
+ 'target': obj.lastUpdate if obj.lastUpdate else obj.action,
+ 'context': obj.action.context
+ }
+ logger.debug('Updating object with new ActionOnAction')
+ obj.lastUpdate = kb.factory.create(kb.ActionOnAction, aoa_conf)
+
+
+def build_action_setup(label, backup, kb, logger):
+ logger.debug('Creating a new ActionSetup with label %s and backup %r' % (label, backup))
+ conf = {
+ 'label': label,
+ 'conf': json.dumps({'backup' : backup})
+ }
+ asetup = kb.factory.create(kb.ActionSetup, conf)
+ return asetup
+
+
+def update_children(source_ind, target_ind, operator, kb, logger):
+ if source_ind.gender.enum_label() == kb.Gender.MALE.enum_label():
+ parent_type = 'father'
+ elif source_ind.gender.enum_label() == kb.Gender.FEMALE.enum_label():
+ parent_type = 'mother'
+ else:
+ raise ValueError('%s is not a valid gender value' % (source_ind.gender.enum_label()))
+ query = '''
+ SELECT ind FROM Individual ind
+ JOIN ind.{0} AS {0}
+ WHERE {0}.vid = :parent_vid
+ '''.format(parent_type)
+ children = kb.find_all_by_query(query, {'parent_vid' : source_ind.id})
+ logger.info('Retrieved %d children for source individual' % len(children))
+ for child in children:
+ backup = {}
+ logger.debug('Changing %s for individual %s' % (parent_type,
+ child.id))
+ backup[parent_type] = getattr(child, parent_type).id
+ setattr(child, parent_type, target_ind)
+ update_object(child, backup, operator, kb)
+ kb.save_array(children)
+
+
+def update_action_on_ind(source_ind, target_ind, operator, kb, logger):
+ query = '''SELECT act FROM ActionOnIndividual act
+ JOIN act.target AS ind
+ WHERE ind.vid = :ind_vid
+ '''
+ src_acts = kb.find_all_by_query(query, {'ind_vid' : source_ind.id})
+ logger.info('Retrieved %d actions for source individual' % len(src_acts))
+ connected = kb.dt.get_connected(source_ind, direction = kb.dt.DIRECTION_OUTGOING,
+ query_depth = 1)
+ if source_ind in connected:
+ connected.remove(source_ind)
+ for sa in src_acts:
+ logger.debug('Changing target for action %s' % sa.id)
+ sa.target = target_ind
+ logger.debug('Action %s target updated' % sa.id)
+ kb.save_array(src_acts)
+ for conn in connected:
+ kb.dt.destroy_edge(source_ind, conn)
+ kb.dt.create_edge(conn.action, target_ind, conn)
+
+
+def update_enrollments(source_ind, target_ind, operator, kb, logger):
+ query = '''SELECT en FROM Enrollment en
+ JOIN en.individual AS ind
+ WHERE ind.vid = :ind_vid
+ '''
+ enrolls = kb.find_all_by_query(query, {'ind_vid' : source_ind.id})
+ logger.info('Retrieved %d enrollments for source individual' % len(enrolls))
+ for sren in enrolls:
+ try:
+ sren.individual = target_ind
+ logger.debug('Changing individual for enrollment %s in study %s' % (sren.studyCode,
+ sren.study.label))
+ kb.save(sren)
+ logger.info('Changed individual for enrollment %s (study code %s -- study %s)' % (sren.id,
+ sren.studyCode,
+ sren.study.label))
+ except KBError, kbe:
+ logger.warning('Unable to update enrollment %s (study code %s -- study %s)' % (sren.id,
+ sren.studyCode,
+ sren.study.label))
+ move_to_duplicated(sren, operator, kb, logger)
+
+
+def update_ehr_records(source_ind, target_ind, kb):
+ kb.update_table_rows(kb.eadpt.EAV_EHR_TABLE, '(i_vid == "%s")' % source_ind.id,
+ {'i_vid' : target_ind.id})
+
+
+# This method should be considered as a temporary hack that will be
+# used untill a proper ALIAS management will be introduced into the
+# system
+def move_to_duplicated(enrollment, operator, kb, logger):
+ old_st = enrollment.study
+ dupl_st = kb.get_study('%s_DUPLICATI' % old_st.label)
+ if not dupl_st:
+ logger.warning('No "duplicated" study ({0}_DUPLICATI) found for study {0}'.format(old_st.label))
+ return
+ enrollment.study = dupl_st
+ try:
+ kb.save(enrollment)
+ logger.info('Enrollmnet %s moved from study %s to study %s' % (enrollment.studyCode,
+ old_st.label, dupl_st.label))
+ except:
+ logger.error('An error occurred while moving enrollment %s from study %s to %s' % (enrollment.studyCode,
+ old_st.label,
+ dupl_st.label))
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('merge_individuals', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.debug('Retrieving Individuals')
+ individuals = kb.get_objects(kb.Individual)
+ logger.debug('Retrieved %d Individuals' % len(individuals))
+ ind_lookup = {}
+ for i in individuals:
+ ind_lookup[i.id] = i
+
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ for row in reader:
+ try:
+ source = ind_lookup[row['source']]
+ logger.info('Selected as source individual with ID %s' % source.id)
+ target = ind_lookup[row['target']]
+ logger.info('Selected as destination individual with ID %s' % target.id)
+ except KeyError, ke:
+ logger.warning('Unable to retrieve individual with ID %s, skipping row' % ke)
+ continue
+
+ logger.info('Updating children connected to source individual')
+ update_children(source, target, args.operator, kb, logger)
+ logger.info('Children update complete')
+
+ logger.info('Updating ActionOnIndividual related to source individual')
+ update_action_on_ind(source, target, args.operator, kb, logger)
+ logger.info('ActionOnIndividual update completed')
+
+ logger.info('Updating enrollments related to source individual')
+ update_enrollments(source, target, args.operator, kb, logger)
+ logger.info('Enrollments update completed')
+
+ logger.info('Updating EHR records related to source individual')
+ update_ehr_records(source, target, kb)
+ logger.info('EHR records update completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/merge_individuals.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/merge_individuals.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,69 @@
+
+
+ Merge individuals' data
+
+
+ merge_individuals.py
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --in_file=${input}
+ --operator=$__user_email__
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool moves all informations related to an individual (source) to
+another (target). Moved informations are:
+
+ * children (Individual objects)
+ * ActionOnInvidual
+ * Enrollments
+ * EHR records
+
+The tool expects as input a TSV file like this::
+
+ source target
+ V0468D2D96999548BF9FC6AD24C055E038 V060BAA01C662240D181BB98A51885C498
+ V029CC0A614E2D42D0837602B15193EB58 V01B8122A7C75A452E9F80381CEA988557
+ V0B20C93E8A88D43EFB87A7E6911292A05 V0BED85E8E76A54AA7AB0AFB09F95798A8
+ ...
+
+NOTE WELL:
+ * Parents of the "source" indivudal WILL NOT BE ASSIGNED
+ to the "target" individual
+ * For the Enrollmnent objects, if
+ "target" individual has already a code in the same study of "source"
+ individual, the script will try to move the Enrollment to the
+ "duplicated" study
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/update_parents.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/update_parents.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,110 @@
+import sys, csv, argparse, time, json
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='update parents')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-O', '--operator', type=str, help='operator',
+ required=True)
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file with individual, father and mother')
+ return parser
+
+
+def update_parents(individual, father, mother, operator, kb, logger):
+ backup = {}
+ logger.info('Updating parents for individual %s', individual.id)
+ if individual.father != father:
+ backup['father'] = individual.father.id if individual.father else None
+ logger.info('Setting father to %s (old value %s)' % (father.id if father else None,
+ backup['father']))
+ individual.father = father
+ if individual.mother != mother:
+ backup['mother'] = individual.mother.id if individual.mother else None
+ logger.info('Setting mother to %s (old value %s)' % (mother.id if mother else None,
+ backup['mother']))
+ individual.mother = mother
+ if len(backup.items()) > 0:
+ update_object(individual, backup, operator, kb, logger)
+ return individual
+ else:
+ logger.info('No update needed for individual %s' % individual.id)
+ return None
+
+
+def update_object(obj, backup_values, operator, kb, logger):
+ logger.debug('Building ActionOnAction for object %s' % obj.id)
+ act_setup = build_action_setup('update-parents-%f' % time.time(),
+ backup_values, kb, logger)
+ aoa_conf = {
+ 'setup': act_setup,
+ 'actionCategory': kb.ActionCategory.UPDATE,
+ 'operator': operator,
+ 'target': obj.lastUpdate if obj.lastUpdate else obj.action,
+ 'context': obj.action.context
+ }
+ logger.debug('Updating object with new ActionOnAction')
+ obj.lastUpdate = kb.factory.create(kb.ActionOnAction, aoa_conf)
+
+
+def build_action_setup(label, backup, kb, logger):
+ logger.debug('Creating a new ActionSetup with label %s and backup %r' % (label,
+ backup))
+ conf = {
+ 'label': label,
+ 'conf': json.dumps({'backup': backup})
+ }
+ asetup = kb.factory.create(kb.ActionSetup, conf)
+ return asetup
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('update_parents', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving individuals')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('Retrieved %d individuals' % len(inds))
+ inds_lookup = {}
+ for i in inds:
+ inds_lookup[i.id] = i
+
+ with open(args.in_file) as in_file:
+ to_be_updated = []
+ reader = csv.DictReader(in_file, delimiter='\t')
+ for row in reader:
+ ind = inds_lookup[row['individual']]
+ father = inds_lookup[row['father']] if row['father'] != 'None' else None
+ mother = inds_lookup[row['mother']] if row['mother'] != 'None' else None
+ ind = update_parents(ind, father, mother, args.operator, kb, logger)
+ if ind:
+ to_be_updated.append(ind)
+
+ logger.info('%d individuals are going to be updated' % len(to_be_updated))
+ kb.save_array(to_be_updated)
+ logger.info('Update complete')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/update_parents_data.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/update_parents_data.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,54 @@
+
+
+ Update parental info of individuals
+
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #update_parents.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --in_file=${input1}
+ --operator=$__user_email__
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+It will update parental info of individual using informations from a file like this::
+
+ individual father mother
+ V08E18411BC66F4987BCA43EFC6F636224 None None
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/update_titerplates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/update_titerplates.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,126 @@
+'''
+From a file like this
+label barcode container_status rows columns
+A_TITERPLATE XXYYZZ111 INSTOCK 8 12
+B_TITERPLATE XXYYZZ112 INSTOCK 8 12
+C_TITERPLATE XXYYZZ113 READY 8 12
+
+updates barcode, status and geography of given titerplates
+'''
+import sys, csv, argparse
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+from bl.vl.kb.drivers.omero.objects_collections import ContainerStatus
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='update titerplates')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-O', '--operator', type=str, help='operator',
+ required=True)
+ parser.add_argument('-i', '--in_file', type=str, required=True,
+ help='input file with information to update')
+ parser.add_argument('--overwrite', dest='overwrite', action='store_true',
+ help='enable overwrite of data. Default is disabled ')
+ parser.set_defaults(overwrite=False)
+ return parser
+
+
+def update_titerplates(titerplate, barcode, container_status, rows, columns,
+ overwrite, logger):
+ logger.info("Updating {}".format(titerplate.label))
+ r = False
+ if overwrite:
+ if barcode is not None:
+ titerplate.barcode = barcode
+ if container_status is not None:
+ titerplate.status = getattr(ContainerStatus, container_status)
+ if rows is not None:
+ titerplate.rows = rows
+ if columns is not None:
+ titerplate.columns = columns
+ r = True
+ else:
+ if titerplate.barcode is None and barcode is not None:
+ titerplate.barcode = barcode
+ r = True
+ if titerplate.status is None and container_status is not None:
+ titerplate.status = getattr(ContainerStatus, container_status)
+ r = True
+ if titerplate.rows is None and rows is not None:
+ titerplate.rows = rows
+ r = True
+ if titerplate.columns is None and columns is not None:
+ titerplate.columns = columns
+ r = True
+
+ if r:
+ return titerplate
+ else:
+ logger.info('Update denied for titerplate {}'.format(
+ titerplate.label))
+ return None
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('update_titerplates', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ with open(args.in_file) as in_file:
+ to_be_updated = []
+ reader = csv.DictReader(in_file, delimiter='\t')
+ barcode = None
+ container_status = None
+ rows = None
+ columns = None
+ if 'label' in reader.fieldnames:
+ for row in reader:
+ tp_label = row['label']
+ tp = kb.get_by_label(kb.TiterPlate, tp_label)
+ if tp != None:
+ if 'barcode' in reader.fieldnames:
+ barcode = row['barcode']
+ if 'container_status' in reader.fieldnames:
+ container_status = row['container_status']
+ if 'rows' in reader.fieldnames:
+ rows = row['rows']
+ if 'columns' in reader.fieldnames:
+ columns = row['columns']
+
+ tp_up = update_titerplates(tp, barcode,
+ container_status,rows,
+ columns, args.overwrite,
+ logger)
+ if tp_up:
+ to_be_updated.append(tp_up)
+ else:
+ msg = 'Missing required field "label"'
+ logger.critical(msg)
+ sys.exit(msg)
+
+ logger.info('%d titerplates are going to be updated' % len(to_be_updated))
+ kb.save_array(to_be_updated)
+ logger.info('Update complete')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/updater/update_titerplates.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/update_titerplates.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,60 @@
+
+
+ Update titerplates infodata
+
+
+ #if $__app__.config.vl_import_enabled_users.split(',').count($__user_email__)==1 #update_titerplates.py
+ #else #unauthorized_access.py
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+ --in_file=${input1}
+ --operator=$__user_email__
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+It will update titerplates data using informations from a file like this::
+
+ label barcode container_status rows columns
+ A_TP 55555555 DISCARDED 8 12
+ B_TP 66666666 DISCARDED 8 12
+
+
+The first column is mandatory, the others only for the information that you want to update.
+
+For example, if you want to update a barcode data only that column is required
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/before_prepare_seq_dsample_inputs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/before_prepare_seq_dsample_inputs.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,89 @@
+"""
+This tool produce files used as input by 'prepare_seq_dsample_inputs'.
+
+It needs as input an Illumina samplesheet and produce as output:
+the same samplesheet with a new column 'sample_project'
+a configuration file
+The configuration file is a YAML file with the following structure:
+
+ config_parameters:
+ study_label: study_label
+
+where study_label is mandatory
+"""
+
+import csv, sys, argparse, logging, yaml
+
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', '-i', type=str, required=True,
+ help='input file')
+ parser.add_argument('--samplesheet-output-file', type=str,
+ help='output file containing flowcell samplesheet',
+ default='./samplesheet.tsv')
+ parser.add_argument('--config-output-file', type=str,
+ help='output file containing config definitions',
+ default='./config_parameters.yaml')
+ parser.add_argument('--study', type=str, required=True,
+ help='study label parameter for YAML file')
+ return parser
+
+
+def read_samplesheet(in_file):
+ with open(in_file, 'rU') as f:
+ reader = csv.DictReader(f, delimiter='\t')
+ recs = [r for r in reader]
+ fieldnames = reader.fieldnames
+ return recs, fieldnames
+
+
+def write_samplesheet_extended(recs, fields, out_file, sample_project_label):
+ with open(out_file, 'wb') as f:
+ fields.append('SampleProject')
+ writer = csv.DictWriter(f, fields, delimiter='\t')
+ writer.writeheader()
+ for row in recs:
+ row['SampleProject'] = sample_project_label
+ writer.writerow(row)
+
+
+def write_yaml_config_file(out_file, study_label):
+ config_data = {'config_parameters': {'study_label': study_label}}
+ with open(out_file, 'w') as f:
+ yaml.dump(config_data, f)
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format': LOG_FORMAT,
+ 'datefmt': LOG_DATEFMT,
+ 'level': log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger('before_prepare_seq_dsample_inputs')
+
+ logger.info('Loading data from file %s' % args.in_file)
+ recs, out_file_header = read_samplesheet(args.in_file)
+ logger.debug('Retrieved %d records', len(recs))
+
+ logger.info('Writing file %s' % args.samplesheet_output_file)
+ write_samplesheet_extended(recs, out_file_header, args.samplesheet_output_file,
+ args.study)
+
+ logger.info('Writing file %s' % args.config_output_file)
+ write_yaml_config_file(args.config_output_file, args.study)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/before_prepare_seq_dsample_inputs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/before_prepare_seq_dsample_inputs.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,49 @@
+
+
+ prepare inputs for VLUTIL.prepare_seq_dsample_inputs
+
+
+
+ before_prepare_seq_dsample_inputs.py
+ --in-file=${infile}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${log_file}
+ --config-output-file=${config_ofile}
+ --samplesheet-output-file=${samplesheet_ofile}
+ --study=${study}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool produce files used as input by 'prepare_seq_dsample_inputs'.
+
+It needs as input an Illumina samplesheet and produce as output:
+the same samplesheet with a new column 'sample_project'
+a configuration file
+The configuration file is a YAML file with the following structure:
+
+ config_parameters:
+ study_label: study_label
+
+where study_label is mandatory
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/build_enrollments_import.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/build_enrollments_import.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,187 @@
+"""
+prepare a tsv to be imported with a study code foreach individuals not
+present in a specified study.
+
+Can be specified also a study from which each individuals enrolled in will
+be ignored
+
+Report file contains enrollments codes in the others studies
+
+Codes are short hashes from numbers generated using Hashids.org with
+study label as salt parameter
+
+ex:
+source study label
+V03CB1DB357B274B17B139EA56A2FFA19E AUTOIMMUNITY ORVL5KMK5
+V0BA695C2E326F4C13AD7F6052BB20539B AUTOIMMUNITY 9R0M2E12N
+V067C445E35DA04ECCA21FA3E2DF3BBCF6 AUTOIMMUNITY QGZLQJ1RV
+...
+
+"""
+
+import argparse
+import csv
+import string
+import sys
+
+from hashids import Hashids
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.utils import LOG_LEVELS, get_logger
+import bl.vl.utils.ome_utils as vlu
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='Retrieve all individuals not enrolled in the specified project')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logger level', default='INFO')
+ parser.add_argument('--study', type=str, help='Study label', required=True)
+ parser.add_argument('--study_to_be_ignored', type=str,
+ help='Study label to be ignored')
+ parser.add_argument('--host', type=str, help='Omero hostname')
+ parser.add_argument('--user', type=str, help='Omero user')
+ parser.add_argument('--passwd', type=str, help='Omero password')
+ parser.add_argument('--ofile', type=str, help='output file path',
+ required=True)
+ parser.add_argument('--reportfile', type=str, help='report file',
+ default='report.tsv')
+ return parser
+
+
+def init_hashids(study):
+ hashids = Hashids(salt=study, min_length=9,
+ alphabet=string.ascii_uppercase + string.digits)
+ return hashids
+
+
+def write_csv_to_be_enrolled(logger, hashids, path, inds_map,
+ highest_id=0):
+ csv_header = ['source', 'study', 'label']
+ study_id = highest_id
+
+ # Write to CSV file
+ logger.debug('Writing CSV file %s' % path)
+ with open(path, 'w') as f:
+ writer = csv.DictWriter(f, csv_header,
+ delimiter='\t', quotechar='"',
+ restval='None')
+ writer.writeheader()
+ for k, v in inds_map.iteritems():
+ study_id += 1
+ v['label'] = hashids.encrypt(study_id)
+ writer.writerow(v)
+ return
+
+
+def write_csv_enrollment_codes(logger, filename, csv_header, enrolls_map):
+ logger.debug('Writing CSV file %s' % filename)
+ with open(filename, 'w') as f:
+ writer = csv.DictWriter(f, csv_header,
+ delimiter='\t', quotechar='"',
+ restval='None')
+ writer.writeheader()
+ for k, v in enrolls_map.iteritems():
+ writer.writerow(v)
+ return
+
+
+def get_enrollments_codes(logger, kb, inds_map):
+ """Retrieve enrollments codes in other studies for the individuals
+ to be enrolled into the specified study"""
+ # Retrieve all studies from omero
+ studies = kb.get_objects(kb.Study)
+ logger.info('Retrieved %d studies from database' % len(studies))
+
+ csv_header = ['individual_uuid']
+ enrolls_map = {}
+ # For each study, retrieve all enrollments
+ for s in studies:
+ logger.info('Retrieving enrollments for study %s' % s.label)
+ enrolls = kb.get_enrolled(s)
+ logger.info('%s enrollments retrieved' % len(enrolls))
+ if len(enrolls) > 0:
+ logger.debug('Building lookup dictionary....')
+ csv_header.append(s.label) # Add study label to CSV header
+ for e in enrolls:
+ if e.individual.id in inds_map:
+ enrolls_map.setdefault(e.individual.omero_id,
+ {})['individual_uuid'] = e.individual.id
+ enrolls_map[e.individual.omero_id][s.label] = e.studyCode
+ else:
+ logger.debug('No enrollments found, skip study %s' % s.label)
+
+ return csv_header, enrolls_map
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('inds_not_enrolled', level=args.loglevel,
+ filename=args.logfile)
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ out_file_path = args.ofile
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ inds = kb.get_objects(kb.Individual)
+ #len_inds = len(inds)
+ logger.info('Retrieved {} individuals'.format(len(inds)))
+
+ inds_map = {}
+
+ for i in inds:
+ inds_map.setdefault(i.id, {})['source'] = i.id
+ inds_map[i.id]['study'] = args.study
+
+ study = kb.get_by_label(kb.Study, args.study)
+ if study:
+ logger.info('{} present in the database'.format(study.label))
+ else:
+ logger.critical('{} not present in the database'.format(args.study))
+ sys.exit()
+
+ hashids = init_hashids(study.label)
+ enrolls = kb.get_enrolled(study)
+ logger.info("{} enrollments founded in {}".format(len(enrolls),
+ study.label))
+ highest_id = 0
+ #ids = []
+
+ for e in enrolls:
+ if e.individual.id in inds_map:
+ del inds_map[e.individual.id]
+ _ = hashids.decrypt(e.studyCode)
+ if _ > highest_id:
+ highest_id = _[0]
+
+ if args.study_to_be_ignored and kb.get_by_label(kb.Study,
+ args.study_to_be_ignored):
+ to_be_removed = [args.study_to_be_ignored]
+ else:
+ to_be_removed = []
+
+ for tbr_study in to_be_removed:
+ enr = kb.get_enrolled(kb.get_by_label(kb.Study, tbr_study))
+ logger.info('Retrieved {} enrollments from {}'.format(len(enr),
+ tbr_study))
+ for e in enr:
+ if e.individual.id in inds_map:
+ del inds_map[e.individual.id]
+
+ logger.info('{} individuals to be enrolled'.format(len(inds_map)))
+
+ write_csv_to_be_enrolled(logger, hashids, out_file_path, inds_map, highest_id)
+
+ csv_header, enrolls_map = get_enrollments_codes(logger, kb, inds_map)
+ write_csv_enrollment_codes(logger, args.reportfile, csv_header, enrolls_map)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/check_tsv_header.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/check_tsv_header.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,61 @@
+"""
+"""
+import csv, sys, argparse, logging
+
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', '-i', type=str, required=True,
+ help='input file')
+ return parser
+
+def read_samplesheet(in_file):
+ with open(in_file, 'rU') as f:
+ reader = csv.DictReader(f, delimiter='\t')
+ recs = [r for r in reader]
+ fieldnames = reader.fieldnames
+ return recs, fieldnames
+
+def is_header_different(titer_plate_header, header_to_be_checked):
+ result = set(header_to_be_checked).difference(frozenset(titer_plate_header))
+ if len(result) > 0:
+ return True
+ else:
+ return False
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format': LOG_FORMAT,
+ 'datefmt': LOG_DATEFMT,
+ 'level': log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger('check tsv header')
+
+ logger.info('Loading data from file %s' % args.in_file)
+ recs, file_header = read_samplesheet(args.in_file)
+ logger.debug('Retrieved %d records', len(recs))
+
+ titer_plate_header = ['study', 'label', 'barcode', 'rows', 'columns']
+
+ if is_header_different(titer_plate_header, file_header):
+ logger.info("Tsv file header has to have this elements: {}".format(titer_plate_header))
+ # hack to support old galaxy
+ sys.stderr.write("Headers not compliant to: {}".format(titer_plate_header))
+ sys.exit(2)
+ else:
+ logger.info("Header check passed")
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/check_tsv_header.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/check_tsv_header.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,27 @@
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/delete_flowcell_results.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/delete_flowcell_results.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,97 @@
+import argparse, sys
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.utils import get_logger, LOG_LEVELS
+import bl.vl.utils.ome_utils as vlu
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='remove datasamples connected to a specific sample of a flowcell')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero server hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--flowcell-label', type=str, required=True,
+ help='flowcell label')
+ parser.add_argument('--sample-label', type=str, required=True,
+ help='sample label')
+ parser.add_argument('--dry-run', action='store_true',
+ help='run a simulation, don\'t delete anything')
+ return parser
+
+
+def get_flowcell_samples_map(flowcell, kb, logger):
+ fc = kb.get_by_label(kb.FlowCell, flowcell)
+ if not fc:
+ logger.info('No flowcell with label %s', flowcell)
+ sys.exit(0)
+ logger.info('Loading data for flowcell %s', flowcell)
+ dsamples = kb.dt.get_connected(fc, aklass = kb.SeqDataSample)
+ dsamples_map = {}
+ for ds in dsamples:
+ if ds.sample:
+ dsamples_map.setdefault(ds.sample.label, []).append(ds)
+ else:
+ dsamples_map.setdefault('NO_SAMPLE', []).append(ds)
+ return dsamples_map
+
+
+def print_report(dsamples_map, sample_label, kb, logger):
+ dsamples = dsamples_map.get(sample_label)
+ if not dsamples:
+ logger.info('No sample with label %s is related to the flowcell', sample_label)
+ sys.exit(0)
+ for ds in dsamples:
+ dobjs = kb.get_data_objects(ds)
+ logger.info('## data sample: %s', ds.label)
+ for dob in dobjs:
+ logger.info('### data object: %s --- mimetype: %s', dob.path, dob.mimetype)
+
+
+def delete(dsamples_map, sample_label, kb, logger):
+ for ds in dsamples_map[sample_label]:
+ # this is a hack specific for the automator workflow
+ if not ds.label.startswith('stage1'):
+ logger.info('Deleting data for %s', ds.label)
+ dobjs = kb.get_data_objects(ds)
+ for d in dobjs:
+ kb.delete(d)
+ a = ds.action
+ kb.delete(ds)
+ try:
+ kb.delete(a)
+ except:
+ pass
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('delete_flowcell_results', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ dsamples_map = get_flowcell_samples_map(args.flowcell_label, kb, logger)
+ print_report(dsamples_map, args.sample_label, kb, logger)
+ if not args.dry_run:
+ delete(dsamples_map, args.sample_label, kb. logger)
+ pass
+ else:
+ logger.debug('SIMULATION, exit now')
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/delete_flowcell_results.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/delete_flowcell_results.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,35 @@
+
+
+
+ Delete results produced from a flowcell related to a specific sample
+
+
+
+ delete_flowcell_results.py
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${log_file}
+ --flowcell-label=${flowcell_label}
+ --sample-label=${sample_label}
+ #if $simulate
+ --dry-run
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+Delete data samples and related data objects produced from the given flowcell and related to the given sample.
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/drop_flowcell_related_items.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/drop_flowcell_related_items.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,124 @@
+import argparse, sys
+from collections import Counter
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+from bl.vl.kb.drivers.omero.sequencing import SeqDataSample, SequencerOutput
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='delete all items related to the given flowcell')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--flowcell-label', type=str, required=True,
+ help='flowcell label')
+ parser.add_argument('--datasets-only', action='store_true',
+ help='delete only data samples and data objects related to the given flowcell')
+ return parser
+
+
+def get_sources(objs):
+ sources = set()
+ for x in objs:
+ try:
+ sources.add(x.action.target)
+ except AttributeError:
+ # Action has no "target" attribute, no source for item x
+ pass
+ return sources
+
+
+def delete_objects(objs, kb, logger, max_retries = 3):
+ retry_ct = Counter()
+ while len(objs) > 0:
+ o = objs.pop(0)
+ if type(o) in [SeqDataSample, SequencerOutput]:
+ logger.info('Loading DataObjects for %s:%s' % (o.__class__.__name__,
+ o.label))
+ dobjs = kb.get_data_objects(o)
+ logger.info('%d DataObjects loaded' % len(dobjs))
+ for d in dobjs:
+ logger.info('Deleting %s:%s' % (d.__class__.__name__,
+ d.path))
+ kb.delete(d)
+ try:
+ logger.info('Deleting %s:%s' % (o.__class__.__name__,
+ o.id))
+ act = o.action
+ kb.delete(o)
+ try:
+ logger.info('Deleting source action %s:%s' % (act.__class__.__name__,
+ act.id))
+ kb.delete(act)
+ except:
+ logger.info('Can\'t delete action')
+ except:
+ logger.info('Can\'t delete, putting back into objects list')
+ if retry_ct['%s:%s' % (type(o), o.id)] < max_retries:
+ objs.append(o)
+ retry_ct['%s:%s' % (type(o), o.id)] += 1
+ else:
+ logger.info('Reached maximum retry limit for the object, skipping')
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('drop_flowcell_related_items', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving flowcell with label %s' % args.flowcell_label)
+ query = 'SELECT fc FROM FlowCell fc WHERE fc.label = :fc_label'
+ results = kb.find_all_by_query(query, {'fc_label' : args.flowcell_label})
+ if len(results) == 0:
+ logger.info('No FlowCell with label %s in the database' % args.flowcell_label)
+ sys.exit(0)
+ fc = results[0]
+ logger.info('Loaded FlowCell with ID %s' % fc.id)
+ logger.info('Loading related Lanes')
+ lanes = list(kb.get_lanes_by_flowcell(fc))
+ logger.info('%d Lanes loaded' % len(lanes))
+ logger.info('Loading related LaneSlots')
+ lane_slots = []
+ for l in lanes:
+ lane_slots += list(kb.get_laneslots_by_lane(l))
+ logger.info('%d LaneSlots loaded' % len(lane_slots))
+ logger.info('Loading related Tubes')
+ sub_samples = get_sources(lane_slots)
+ samples = get_sources(sub_samples)
+ logger.info('%d Tubes loaded' % (len(sub_samples) + len(samples)))
+ logger.info('Loading related SequencerOutputs')
+ seq_out = kb.dt.get_connected(fc, aklass = kb.SequencerOutput, direction = kb.dt.DIRECTION_OUTGOING)
+ logger.info('%d SequencerOutputs loaded' % len(seq_out))
+ logger.info('Loading related SeqDataSamples')
+ seq_dsamples = kb.dt.get_connected(fc, aklass = kb.SeqDataSample, direction = kb.dt.DIRECTION_OUTGOING)
+ logger.info('%d SeqDataSamples loaded' % len(seq_dsamples))
+
+ if args.datasets_only:
+ delete_items = [seq_dsamples]
+ else:
+ delete_items = [seq_dsamples, seq_out, lane_slots, lanes,
+ [fc], list(sub_samples), list(samples)]
+ for items in delete_items:
+ delete_objects(items, kb, logger)
+
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/format_vessels_by_individual_output.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/format_vessels_by_individual_output.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,91 @@
+# This tool format output files from kb_query vessels_by_individual
+# into a tabular format with all data related to an individual grouped
+# in each row. The tool needs as input a mapping file like
+#
+# individual_id label
+# V12311 A_STUDY:A_CODE
+# V135115 A_STUDY:B_CODE
+#
+# in order to use a known label and not VIDs for each row
+
+import csv, sys, argparse, logging
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='format kb_query vessels_by_individual output file to tabular format')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file (obtained using kb_query vessels by individual tool)')
+ parser.add_argument('--map_file', type=str, required=True,
+ help='mapping file')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file')
+ return parser
+
+def get_mapping(records, grouper_field, grouped_field):
+ mapping = {}
+ for rec in records:
+ mapping.setdefault(rec[grouper_field], []).append(rec[grouped_field])
+ return mapping
+
+def get_labels_mapping(reader, logger):
+ rows = [r for r in reader]
+ lmap = get_mapping(rows, 'individual', 'label')
+ logger.info('%d labels grouped for %d individuals' % (len(rows),
+ len(lmap)))
+ return lmap
+
+def get_vessels_mapping(reader, logger):
+ rows = [r for r in reader]
+ vmap = get_mapping(rows, 'individual', 'vessel_label')
+ logger.info('%d vessels grouped for %d individuals' % (len(rows),
+ len(vmap)))
+ return vmap
+
+def build_record(label, vessels):
+ record = {'individual_label' : '--'.join(label)}
+ for v in vessels:
+ record['vessel_%d' % (vessels.index(v) + 1)] = v
+ return record
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format' : LOG_FORMAT,
+ 'datefmt' : LOG_DATEFMT,
+ 'level' : log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger()
+
+ with open(args.map_file) as mf:
+ reader = csv.DictReader(mf, delimiter='\t')
+ labels_map = get_labels_mapping(reader, logger)
+
+ with open(args.in_file) as inf:
+ reader = csv.DictReader(inf, delimiter='\t')
+ vessels_map = get_vessels_mapping(reader, logger)
+
+ max_vessels_count = max([len(v) for v in vessels_map.values()])
+ csv_fields = ['individual_label']
+ for x in xrange(max_vessels_count):
+ csv_fields.append('vessel_%d' % (x+1))
+
+ with open(args.out_file, 'w') as ofile:
+ writer = csv.DictWriter(ofile, csv_fields, delimiter='\t')
+ writer.writeheader()
+ for ind, vessels in vessels_map.iteritems():
+ writer.writerow(build_record(labels_map[ind], vessels))
+
+ logger.info('Job completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/format_vessels_by_individual_output.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/format_vessels_by_individual_output.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,29 @@
+
+
+ Format the output from VLU.vessels_by_individual into a tabular
+ format
+
+
+ format_vessels_by_individual_output.py
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --in_file=${in_file}
+ --map_file=${map_file}
+ --out_file=${out_file}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/from_well_to_illumina_measures.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/from_well_to_illumina_measures.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,95 @@
+import sys, argparse, csv
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.utils import get_logger, LOG_LEVELS
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.graph.drivers.neo4j import Neo4JDriver
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='map wells label to illumina bead chip measures')
+ parser.add_argument('--logfile', type=str, help='log file (deafult=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='OMERO server hostname')
+ parser.add_argument('-U', '--user', type=str, help='OMERO user')
+ parser.add_argument('-P', '--passwd', type=str, help='OMERO password')
+ parser.add_argument('--in_file', type=str, help='input file',
+ required=True)
+ parser.add_argument('--out_file', type=str, help='output file',
+ required=True)
+ parser.add_argument('--well_column', type=str, help='label of the column that will be mapped',
+ default='source')
+ return parser
+
+
+def get_wells_map(plate, kb, logger):
+ logger.info('Mapping wells for plate %s', plate.label)
+ wells_map = {}
+ for w in kb.get_wells_by_plate(plate):
+ wells_map[w.label] = w
+ logger.info('Mapped %d wells', len(wells_map))
+ return wells_map
+
+
+def get_plates_map(plates_list, kb, logger):
+ logger.info('Loading TiterPlates map')
+ plates_map = {}
+ for pl in kb.get_objects(kb.TiterPlate):
+ if isinstance(pl, kb.TiterPlate) and pl.barcode in plates_list:
+ plates_map[pl.barcode] = get_wells_map(pl, kb, logger)
+ logger.info('Mapped %d plates', len(plates_map))
+ return plates_map
+
+
+def get_connected_illumina_measures(well, kb, logger):
+ logger.debug('Loading connected IlluminaBeadChipMeasures for well %s:%s', well.label,
+ well.container.label)
+ return kb.dt.get_connected(well, aklass = kb.IlluminaBeadChipMeasures,
+ direction = Neo4JDriver.DIRECTION_OUTGOING)
+
+
+def wells_to_illumina(in_file, out_file, column_label, kb, logger):
+ with open(in_file) as ifile, open(out_file, 'w') as ofile:
+ reader = csv.DictReader(ifile, delimiter='\t')
+ in_records = [r for r in reader]
+ plates_barcodes = set([x[column_label].split(':')[0] for x in in_records])
+ plates_map = get_plates_map(plates_barcodes, kb, logger)
+ writer = csv.DictWriter(ofile, reader.fieldnames, delimiter='\t')
+ writer.writeheader()
+ logger.info('Mapping wells to illumina bead chip measures')
+ for rec in in_records:
+ barcode, well = rec[column_label].split(':')
+ measures = get_connected_illumina_measures(plates_map[barcode][well], kb,
+ logger)
+ if len(measures) != 1:
+ logger.warning('Found %d measures for well %s:%s, skipping line', len(measures),
+ barcode, well)
+ continue
+ rec[column_label] = measures[0].label
+ writer.writerow(rec)
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('from_well_to_illumina_measures', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ wells_to_illumina(args.in_file, args.out_file, args.well_column,
+ kb, logger)
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/from_well_to_illumina_measures.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/from_well_to_illumina_measures.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,33 @@
+
+
+
+ Map well labels to illumina bead chip measures
+
+
+
+ from_well_to_illumina_measures.py
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --in_file=${infile}
+ --out_file=${out_file}
+ --well_column=${well_column}
+
+
+
+
+
+
+
+
+
+
+
+
+
+Map a TSV file's column with PlateWell labels in format PLATE_BARCODE:WELL_LABEL to the label of
+the connected IlluminaBeadChipMeasures
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/gdoize_ms.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/gdoize_ms.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,80 @@
+
+
+ Build missing GDOs for the selected markers set
+
+
+ py_protobuff_cpp
+
+
+ gdoize_ms
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ #if str($mset_label) != 'select_one'
+ --markers-set-label=$mset_label
+ #end if
+ #if str($study) != 'select_one'
+ --study-label=$study
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=$advanced_configuration.vl_host
+ --user=$advanced_configuration.vl_user
+ --passwd=$advanced_configuration.vl_passwd
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Iterate over all genotype data samples corresponding to the given
+marker set; create a GDO table row for each genotpye data sample that
+does not already have one.
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_enrollments_import.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_enrollments_import.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,97 @@
+"""
+Split a file like::
+
+source enrollment
+V044DE795E7F9F42FEB9855288CF577A77 ASTUDY:2141
+V06C59B915C0FD47DABE6AE02C731780AF BSTUDY:390
+
+into two separated a new TSV files
+
+source study label
+V044DE795E7F9F42FEB9855288CF577A77 ASTUDY 2141
+V06C59B915C0FD47DABE6AE02C731780AF BSTUDY 390
+
+"""
+
+import sys, argparse, csv
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def get_parser():
+ parser = argparse.ArgumentParser('Prepare input files for enrollments import workflow')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', type=str, required=True,
+ help='input TSV file')
+ parser.add_argument('--out-enrollments', type=str, required=True,
+ help='input file with Enrollments definitions')
+ return parser
+
+
+def get_enrollments_definitions(records, logger):
+ logger.info('Creating enrollment definitions')
+ enr_defs = []
+ for rec in records:
+ try:
+ edef = {}
+ edef['source'] = rec['source']
+ try:
+ edef['study'], edef['label'] = rec['enrollment'].split(':')
+ except ValueError:
+ logger.error('Skipped record %r, wrong label format for %s', rec, rec['enrollment'])
+ continue
+ except KeyError, ke:
+ logger.error('Skipped record %r, missing key %s', rec, ke)
+ continue
+ enr_defs.append(edef)
+ logger.info('Retrieved %d enrollment definitions', len(enr_defs))
+ return enr_defs
+
+
+def get_parents_definitions(records, logger):
+ logger.info('Creating parents definitions')
+ parents_defs = []
+ for rec in records:
+ try:
+ pdef = dict()
+ pdef['individual'] = rec['individual']
+ if rec['father'] != 'None' or rec['mother'] != 'None':
+ pdef['father'] = rec['father']
+ pdef['mother'] = rec['mother']
+ parents_defs.append(pdef)
+ else:
+ continue
+ except KeyError, ke:
+ logger.error('Skipped record %r, missing key %s', rec, ke)
+ continue
+ logger.info('Retrieved %d parents definitions', len(parents_defs))
+ return parents_defs
+
+
+def main(argv):
+ parser = get_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('prepare_enrollments_import', level=args.loglevel,
+ filename=args.logfile)
+
+ logger.info('Start processing file %s', args.in_file)
+
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ records = [row for row in reader]
+ logger.info('Loaded %d records', len(records))
+
+ enrollment_defs = get_enrollments_definitions(records, logger)
+ with open(args.out_enrollments, 'w') as enr_out:
+ enr_writer = csv.DictWriter(enr_out,
+ ['source', 'study', 'label'],
+ delimiter='\t')
+ enr_writer.writeheader()
+ enr_writer.writerows(enrollment_defs)
+
+ logger.info('Job completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_enrollments_import.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_enrollments_import.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,44 @@
+
+
+
+ Prepare input files for enrollments import workflow
+
+
+
+ prepare_enrollments_import.py
+ --logfile=${log_file}
+ --loglevel=$__app__.config.vl_loglevel
+ --in-file=${in_file}
+ --out-enrollments=${enrs_out}
+
+
+
+
+
+
+
+
+
+
+
+
+
+Split a file like::
+
+ source enrollment
+ V044DE795E7F9F42FEB9855288CF577A77 ASTUDY:2141
+ V06C59B915C0FD47DABE6AE02C731780AF BSTUDY:390
+
+into two separated a new TSV files
+
+ source study label
+ V044DE795E7F9F42FEB9855288CF577A77 ASTUDY 2141
+ V06C59B915C0FD47DABE6AE02C731780AF BSTUDY 390
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_illumina_import_inputs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_illumina_import_inputs.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,155 @@
+from bl.core.io.illumina import GenomeStudioSampleSheetReader as gsr
+from bl.vl.utils import LOG_LEVELS, get_logger
+import csv, argparse, sys, re
+
+
+def make_parser():
+ parser = argparse.ArgumentParser('Split GenomeStudio samplesheet in TSV files to import data within OMERO')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--input-file', type=str, required=True,
+ help='GenomeStudio samplesheet')
+ parser.add_argument('--arrays-out-file', type=str,
+ help='output file containing IlluminaArrayOfArrays definitions',
+ default='./array_of_arrays.tsv')
+ parser.add_argument('--bead-chip-out-file', type=str,
+ help='output file containing IlluminaBeadChipArray definitions',
+ default='./bead_chip.tsv')
+ parser.add_argument('--array-measure-out-file', type=str,
+ help='output file containing IlluminaBeadChipMeasure definitions',
+ default='./array_measure.tsv')
+ parser.add_argument('--array-measures-out-file', type=str,
+ help='output file containing IlluminaBeadChipMeasures definitions',
+ default='./array_measures.tsv')
+ parser.add_argument('--study', type=str, required=True,
+ help='Study label that will be used in the import procedure')
+ return parser
+
+
+def get_assay_type_enum(manifest_file):
+ return manifest_file.strip().replace('.bpm', '').replace('-', '_').replace(' ', '_').upper()
+
+
+def prepare_array_of_arrays_input(barcode, study, elements):
+ ICHIPCORDS_PATTERN = re.compile(r'^r(\d{2})c(\d{2})$', re.IGNORECASE)
+ rows = []
+ cols = []
+ for x in elements:
+ m = re.match(ICHIPCORDS_PATTERN, x['array_label'])
+ rows.append(int(m.groups()[0]))
+ cols.append(int(m.groups()[1]))
+ return {
+ 'barcode': barcode,
+ 'rows': max(rows),
+ 'columns': max(cols),
+ 'label': barcode,
+ 'study': study,
+ }
+
+
+def barcodes_to_labels(elements, wells_map, strict_mapping, logger):
+ from copy import deepcopy
+
+ mapped_elements = []
+ for e in elements:
+ if e['source'] in wells_map:
+ new_el = deepcopy(e)
+ new_el['source'] = wells_map[e['source']]
+ mapped_elements.append(new_el)
+ else:
+ logger.warning('Unable to map well %s' % e['source'])
+
+ if strict_mapping and len(mapped_elements) < len(elements):
+ msg = 'Mapped %d records of %d' %(len(elements), len(mapped_elements))
+ logger.critical(msg)
+ sys.exit(msg)
+ return mapped_elements
+
+
+def prepare_bead_chip_array_input(array_barcode, assay_type, study, elements):
+ return [{
+ 'illumina_array': array_barcode,
+ 'label': x['array_label'],
+ 'source': x['source'],
+ 'bead_chip_assay_type': assay_type,
+ 'study': study,
+ } for x in elements]
+
+
+def prepare_bead_chip_measure_input(array_barcode, study, elements,
+ device='generic_illumina_scanner',
+ status='USABLE'):
+ records = []
+ for channel in ['Grn', 'Red']:
+ records.extend(
+ [
+ {
+ 'label': '%s_%s_%s' % (array_barcode, x['array_label'], channel),
+ 'source': '%s:%s' % (array_barcode, x['array_label']),
+ 'scanner': device,
+ 'status': status,
+ 'study': study,
+ } for x in elements
+ ]
+ )
+ return records
+
+
+def prepare_bead_chip_array_measures_input(array_barcode, study, elements):
+ return [{
+ 'study': study,
+ 'label': '%s_%s' % (array_barcode, x['array_label']),
+ 'red_channel': '%s_%s_Red' % (array_barcode, x['array_label']),
+ 'green_channel': '%s_%s_Grn' %(array_barcode, x['array_label']),
+ 'source': '%s:%s' % (array_barcode, x['array_label']),
+ } for x in elements]
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('prepare_illumina_import_inputs', level=args.loglevel,
+ filename=args.logfile)
+
+ logger.info('Processing file %s', args.input_file)
+ with open(args.input_file) as in_file:
+ reader = gsr(in_file)
+ assay_type = get_assay_type_enum(reader.header['A'])
+ arrays_map = {}
+ for r in reader:
+ arrays_map.setdefault(r['SentrixBarcode_A'], []).append({'source': r['Sample_ID'],
+ 'array_label': r['SentrixPosition_A']})
+ with open(args.arrays_out_file, 'w') as array_file,\
+ open(args.bead_chip_out_file, 'w') as chip_file,\
+ open(args.array_measures_out_file, 'w') as measures_file,\
+ open(args.array_measure_out_file, 'w') as measure_file:
+ arrays_writer = csv.DictWriter(array_file,
+ ['study', 'label', 'barcode', 'rows', 'columns'],
+ delimiter='\t')
+ arrays_writer.writeheader()
+ chip_writer = csv.DictWriter(chip_file,
+ ['study', 'illumina_array', 'label', 'source',
+ 'bead_chip_assay_type'],
+ delimiter='\t')
+ chip_writer.writeheader()
+ measure_writer = csv.DictWriter(measure_file,
+ ['study', 'label', 'source', 'scanner', 'status'],
+ delimiter='\t')
+ measure_writer.writeheader()
+ measures_writer = csv.DictWriter(measures_file,
+ ['study', 'label', 'red_channel', 'green_channel',
+ 'source'],
+ delimiter='\t')
+ measures_writer.writeheader()
+ for k, v in arrays_map.iteritems():
+ arrays_writer.writerow(prepare_array_of_arrays_input(k, args.study, v))
+ chip_writer.writerows(prepare_bead_chip_array_input(k, assay_type, args.study, v))
+ measure_writer.writerows(prepare_bead_chip_measure_input(k, args.study, v))
+ measures_writer.writerows(prepare_bead_chip_array_measures_input(k, args.study, v))
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_illumina_import_inputs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_illumina_import_inputs.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,57 @@
+
+
+
+ Prepare inputs file used to import illumina data
+
+
+
+ prepare_illumina_import_inputs.py
+ --input-file=${infile}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${log_file}
+ #if str($study) != 'select_study'
+ --study=${study}
+ #end if
+ --arrays-out-file=${arrays_out_file}
+ --bead-chip-out-file=${bead_chip_out_file}
+ --array-measure-out-file=${measure_out_file}
+ --array-measures-out-file=${measures_out_file}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Prepare file to import IlluminaArrayOfArrays and IlluminaBeadChip objects reading data from a
+GenomeStudio samplesheet
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_individuals_import.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_individuals_import.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,105 @@
+"""
+Split a file like::
+
+ individual gender father mother
+ ASTUDY:2141 MALE ASTUDY:12 ASTUDY:12341
+ ASTUDY:415 MALE ASTUDY:3562 ASTUDY:13612
+
+into two separated TSV files, the first one will be used to import new individuals and enrollments,
+the second one will be used to update father and mother informations for the individuals in the first
+file.
+"""
+
+import sys, argparse, csv
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def get_parser():
+ parser = argparse.ArgumentParser('Prepare input files for individuals import workflow')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', type=str, required=True,
+ help='input TSV file')
+ parser.add_argument('--out-individuals', type=str, required=True,
+ help='input file with Individuals definitions')
+ parser.add_argument('--out-parents', type=str, required=True,
+ help='input file with parents definitions')
+ return parser
+
+
+def get_individual_definitions(records, logger):
+ logger.info('Creating individual definitions')
+ ind_defs = []
+ for rec in records:
+ try:
+ idef = {'father': 'None', 'mother': 'None'}
+ idef['gender'] = rec['gender']
+ try:
+ idef['study'], idef['label'] = rec['individual'].split(':')
+ except ValueError:
+ logger.error('Skipped record %r, wrong label format for %s', rec, rec['individual'])
+ continue
+ except KeyError, ke:
+ logger.error('Skipped record %r, missing key %s', rec, ke)
+ continue
+ ind_defs.append(idef)
+ logger.info('Retrieved %d individual definitions', len(ind_defs))
+ return ind_defs
+
+
+def get_parents_definitions(records, logger):
+ logger.info('Creating parents definitions')
+ parents_defs = []
+ for rec in records:
+ try:
+ pdef = dict()
+ pdef['individual'] = rec['individual']
+ if rec['father'] != 'None' or rec['mother'] != 'None':
+ pdef['father'] = rec['father']
+ pdef['mother'] = rec['mother']
+ parents_defs.append(pdef)
+ else:
+ continue
+ except KeyError, ke:
+ logger.error('Skipped record %r, missing key %s', rec, ke)
+ continue
+ logger.info('Retrieved %d parents definitions', len(parents_defs))
+ return parents_defs
+
+
+def main(argv):
+ parser = get_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('prepare_individuals_import', level=args.loglevel,
+ filename=args.logfile)
+
+ logger.info('Start processing file %s', args.in_file)
+
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ records = [row for row in reader]
+ logger.info('Loaded %d records', len(records))
+
+ individual_defs = get_individual_definitions(records, logger)
+ with open(args.out_individuals, 'w') as inds_out:
+ inds_writer = csv.DictWriter(inds_out,
+ ['study', 'label', 'gender', 'father', 'mother'],
+ delimiter='\t')
+ inds_writer.writeheader()
+ inds_writer.writerows(individual_defs)
+
+ parents_defs = get_parents_definitions(records, logger)
+ with open(args.out_parents, 'w') as parents_out:
+ parents_writer = csv.DictWriter(parents_out, ['individual', 'father', 'mother'],
+ delimiter='\t')
+ parents_writer.writeheader()
+ parents_writer.writerows(parents_defs)
+
+ logger.info('Job completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_individuals_import.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_individuals_import.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,43 @@
+
+
+
+ Prepare input files for individuals import\parents update workflow
+
+
+
+ prepare_individuals_import.py
+ --logfile=${log_file}
+ --loglevel=$__app__.config.vl_loglevel
+ --in-file=${in_file}
+ --out-individuals=${inds_out}
+ --out-parents=${parents_out}
+
+
+
+
+
+
+
+
+
+
+
+
+
+Split a file like::
+
+ individual gender father mother
+ ASTUDY:2141 MALE ASTUDY:12 ASTUDY:12341
+ ASTUDY:415 MALE ASTUDY:3562 ASTUDY:13612
+
+into two separated TSV files, the first one will be used to import new individuals and enrollments,
+the second one will be used to update father and mother informations for the individuals in the first
+file.
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_seq_dsample_inputs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_seq_dsample_inputs.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,283 @@
+"""
+This tool produces files that can be used as input to import
+* samples
+* flowcells
+* lanes
+* laneslots
+within OMERO.biobank using import applications.
+If the optional 'study-output-file' parameter is given as input, the
+script will produce the input file for a new study definition.
+If the optional 'tubes-subsamples-output-file' is given, the script
+will generate another file with tubes definitions where each tube is
+produced appliying a specific laboratory protocol to an existing
+tube. Existing tubes are the ones in tubes-out-file, new tubes' labels
+are created using the pattern ::
+The config_parameters field must point to a YAML configuration file
+with the following structure:
+
+ config_parameters:
+ study_label: study_label
+ namespace: namespace
+
+where study_label is mandatory
+"""
+
+import csv, sys, argparse, logging, yaml
+# Needed to import flowcell data
+from bioblend.galaxy import GalaxyInstance
+import nglimsclient, os
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='split sequencing samplesheet')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', '-i', type=str, required=True,
+ help='input file')
+ parser.add_argument('--tubes-out-file', type=str,
+ help='output file containing tube definitions',
+ default='./tubes_def.tsv')
+ parser.add_argument('--flowcells-out-file', type=str,
+ help='output file containing flowcell definitions',
+ default='./flowcells_def.tsv')
+ parser.add_argument('--lanes-out-file', type=str,
+ help='output file containing lane definitions',
+ default='./lanes_def.tsv')
+ parser.add_argument('--laneslots-out-file', type=str,
+ help='output file containing laneslot definitions',
+ default='./laneslots_def.tsv')
+ parser.add_argument('--config-parameters', type=str, required=True,
+ help='a YAML configuration file containing study label and labels namespace, '
+ 'namespace is optional')
+ parser.add_argument('--study-output-file', type=str,
+ help='output file containing study definition')
+ parser.add_argument('--tubes-subsamples-output-file', type=str,
+ help='output file containing tubes subsamples (samples produced applying a '
+ 'laboratory protocol to existing samples)')
+ return parser
+
+
+def get_samplesheet_translator(samplesheet_type='default'):
+ translator = {'default': {'flowcell_id': 'FCID',
+ 'tube_id': 'SampleID',
+ 'lane_id': 'Lane',
+ 'sample_tag': 'Index',
+ 'protocol': 'Recipe',
+ 'operator': 'Operator',
+ 'sample_project': 'SampleProject'}
+ }
+ return translator[samplesheet_type]
+
+def add_namespace(namespace, label, separator='|'):
+ return separator.join([namespace, label])
+
+def write_tubes_file(records, study_label, translator, ofile,
+ namespace = None, logger = None):
+ ofile_fields = ['study', 'label', 'vessel_type', 'vessel_content',
+ 'vessel_status', 'source', 'source_type']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ tubes_def = set([r[translator['tube_id'].strip()] for r in records])
+ for x in tubes_def:
+ writer.writerow({'study' : study_label,
+ 'label' : x if not namespace else add_namespace(namespace, x),
+ 'vessel_type' : 'Tube',
+ 'vessel_content' : 'DNA',
+ 'vessel_status' : 'UNKNOWN',
+ 'source' : 'None',
+ 'source_type' : 'NO_SOURCE'})
+
+
+def write_subsamples_file(records, study_label, translator, ofile,
+ namespace = None, logger = None):
+ ofile_fields = ['study', 'label', 'vessel_type', 'vessel_content',
+ 'vessel_status', 'source', 'source_type', 'options']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ subsamples_def = set([('%s::%s' % (r[translator['tube_id']].strip(), r[translator['protocol']].strip()),
+ r[translator['tube_id']].strip(),
+ r[translator['protocol']].strip()) for r in records])
+ for x in subsamples_def:
+ writer.writerow({'study' : study_label,
+ 'label' : x[0] if not namespace else add_namespace(namespace, x[0]),
+ 'vessel_type' : 'Tube',
+ 'vessel_content' : 'DNA',
+ 'vessel_status' : 'UNKNOWN',
+ 'source' : x[1] if not namespace else add_namespace(namespace, x[1]),
+ 'source_type' : 'Tube',
+ 'options' : 'protocol=%s' % x[2]})
+
+
+def write_flowcells_file(records, study_label, translator, ofile,
+ namespace = None, logger=None):
+ ofile_fields = ['study', 'label', 'barcode', 'container_status',
+ 'number_of_slots']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ flowcells_def = set([(r[translator['flowcell_id']].strip()) for r in records])
+ for x in flowcells_def:
+ writer.writerow({'study' : study_label,
+ 'label' : x if not namespace else add_namespace(namespace, x),
+ 'barcode' : x if not namespace else add_namespace(namespace, x),
+ 'container_status' : 'INSTOCK',
+ 'number_of_slots' : '8'})
+
+
+def write_lanes_file(records, study_label, translator, ofile,
+ namespace = None, logger=None):
+ ofile_fields = ['study', 'flow_cell', 'slot', 'container_status']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ lanes_def = set([(r[translator['flowcell_id']].strip(),
+ r[translator['lane_id']].strip())
+ for r in records])
+ for x in lanes_def:
+ writer.writerow({'study' : study_label,
+ 'flow_cell' : x[0] if not namespace else add_namespace(namespace, x[0]),
+ 'slot' : x[1],
+ 'container_status' : 'INSTOCK'})
+
+
+def write_laneslots_file(records, study_label, translator, ofile,
+ subsamples_enabled=False,
+ namespace = None, logger=None):
+ logger.debug ('subsamples_ensabled: %r' % subsamples_enabled)
+ ofile_fields = ['study', 'lane', 'tag', 'content', 'source',
+ 'source_type', 'options']
+ # Get NGLIMS host and key
+ try:
+ galaxy_host = os.environ['NGLIMS_GALAXY_HOST']
+ api_key = os.environ['NGLIMS_GALAXY_API_KEY']
+ except KeyError as ke:
+ msg = 'No environment variables %s set to configure access to the Galaxy server' % ke
+ sys.exit(msg)
+ # Get flowcell label (assuming label is the same for all records)
+ fc_id = records[0][translator['flowcell_id']].strip()
+ # Get flowcell details from nglims
+ gi = nglimsclient.setup(GalaxyInstance(galaxy_host, api_key))
+ if gi.nglims.exists_flowcell_id(fc_id):
+ fc_data = gi.nglims.flowcell_complete_details(fc_id)
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ laneslots_def = set()
+ for r in records:
+ fc_id = r[translator['flowcell_id']].strip() if not namespace else \
+ add_namespace(namespace, r[translator['flowcell_id']]).strip()
+ if subsamples_enabled:
+ source_tube_id = '%s::%s' % (r[translator['tube_id']].strip(),
+ r[translator['protocol']].strip())
+ else:
+ source_tube_id = r[translator['tube_id']].strip()
+ # Identify adapter
+ adapter = [i['adapter'] for i in fc_data['details'] if i['name']==r[translator['tube_id']].strip() and i['lane']==int(r[translator['lane_id']].strip())]
+ laneslots_def.add(('%s:%s' % (fc_id, r[translator['lane_id']].strip()),
+ r[translator['sample_tag']].strip(),
+ source_tube_id,
+ r[translator['protocol']].strip(),
+ r[translator['operator']].strip(),
+ r[translator['sample_project']].strip(),
+ adapter[0]))
+ for x in laneslots_def:
+ writer.writerow({'study' : study_label,
+ 'lane' : x[0],
+ 'tag' : x[1],
+ 'content' : 'DNA',
+ 'source' : x[2] if not namespace else \
+ add_namespace(namespace, x[2]),
+ 'source_type' : 'Tube',
+ 'options' : 'protocol=%s,operator=%s,sample_project=%s,adapter=%s' %
+ (x[3], x[4], x[5], x[6])})
+
+
+def write_study_file(study_label, records, translator, ofile, logger=None):
+ ofile_fields = ['label', 'description']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t', )
+ writer.writeheader()
+ writer.writerow({'label': study_label})
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format' : LOG_FORMAT,
+ 'datefmt' : LOG_DATEFMT,
+ 'level' : log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger('prepare_seq_dsample_inputs')
+
+ with open(args.in_file, 'rU') as f:
+ logger.info('Loading data from file %s' % args.in_file)
+ reader = csv.DictReader(f, delimiter='\t')
+ recs = [r for r in reader]
+ translator = get_samplesheet_translator()
+
+ with open(args.config_parameters) as cfgf:
+ conf = yaml.load(cfgf)
+ if not conf.has_key('config_parameters'):
+ raise RuntimeError('Bad configuration file')
+ else:
+ try:
+ study_label = conf['config_parameters']['study_label']
+ except KeyError:
+ raise RuntimeError('No study_label provided')
+ if conf['config_parameters'].has_key('namespace'):
+ namespace = conf['config_parameters']['namespace']
+ else:
+ namespace = None
+
+ if args.study_output_file:
+ logger.info('Writing Study definition file %s' % args.study_output_file)
+ write_study_file(study_label, recs, translator, args.study_output_file, logger)
+ logger.info('Done writing file %s' % args.study_output_file)
+
+ logger.info('Writing Tube definitions file %s' % args.tubes_out_file)
+ write_tubes_file(recs, study_label, translator,
+ args.tubes_out_file, namespace,
+ logger)
+ logger.info('Done writing file %s' % args.tubes_out_file)
+
+ if args.tubes_subsamples_output_file:
+ logger.info('Writing Tubes\' subsamples definitions file %s' \
+ % args.tubes_subsamples_output_file)
+ write_subsamples_file(recs, study_label, translator,
+ args.tubes_subsamples_output_file,
+ namespace, logger)
+ logger.info('Done writing file %s' % args.tubes_subsamples_output_file)
+
+ logger.info('Writing FlowCell definitions file %s' % args.flowcells_out_file)
+ write_flowcells_file(recs, study_label, translator,
+ args.flowcells_out_file, namespace,
+ logger)
+ logger.info('Done writing file %s' % args.flowcells_out_file)
+
+ logger.info('Writing Lane definitions file %s' % args.lanes_out_file)
+ write_lanes_file(recs, study_label, translator,
+ args.lanes_out_file, namespace,
+ logger)
+ logger.info('Done writing file %s' % args.lanes_out_file)
+
+ logger.info('Writing LaneSlot definitions file %s' % args.laneslots_out_file)
+ write_laneslots_file(recs, study_label, translator,
+ args.laneslots_out_file,
+ 'tubes_subsamples_output_file' in args, # Check if subsamples have been created
+ namespace,
+ logger)
+ logger.info('Done writing file %s' % args.laneslots_out_file)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_seq_dsample_inputs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_seq_dsample_inputs.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,76 @@
+
+
+ Extract OMERO.biobank objects from a sequencing samplesheet
+
+
+
+ prepare_seq_dsample_inputs.py
+ --in-file=${infile}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${log_file}
+ --tubes-out-file=${tubes_ofile}
+ --flowcells-out-file=${flowcells_ofile}
+ --lanes-out-file=${lanes_ofile}
+ --laneslots-out-file=${laneslots_ofile}
+ --config-parameters=${config_params}
+ --study-output-file=${study_ofile}
+ --tubes-subsamples-output-file=${subsamples_ofile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool produces files that can be used as input to import
+ * samples
+ * flowcells
+ * lanes
+ * laneslots
+
+within OMERO.biobank using import applications.
+
+If the optional 'study-output-file' parameter is given as input, the
+script will produce the input file for a new study definition.
+
+If the optional 'tubes-subsamples-output-file' is given, the script
+will generate another file with tubes definitions where each tube is
+produced appliying a specific laboratory protocol to an existing
+tube. Existing tubes are the ones in tubes-out-file, new tubes' labels
+are created using the pattern **tube_label::protocol**
+The config_parameters field must point to a YAML configuration file
+with the following structure:
+
+ config_parameters:
+ study_label: study_label
+
+ namespace: namespace
+
+where study_label is mandatory
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_seq_out_inputs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_seq_out_inputs.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,99 @@
+"""
+This tool produces files that can be used as input to import
+ * SequencerOutput data samples
+ * SequencerOutput data objects
+within OMERO.biobank using import applications.
+
+Input file must be like
+
+ run_directory path
+ 130418_SN194_0303_BC1NYHACXX file:///SHARE/USERFS/els7/users/sequencing_data/completed/130418_SN194_0303_BC1NYHACXX/raw
+ 160418_SN194_0304_BCAZYHACXX file:///SHARE/USERFS/els7/users/sequencing_data/completed/160418_SN194_0304_BCAZYHACXX/raw
+ ....
+"""
+
+import csv, sys, argparse, logging
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='build sequencer output import files')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', '-i', type=str, required=True,
+ help='input file')
+ parser.add_argument('--dsamples-out-file', type=str, default='./seq_out_dsamples.tsv',
+ help='output file containing data samples definitions')
+ parser.add_argument('--dobjects-out-file', type=str, default='./seq_out_dobjects.tsv',
+ help='output file containing data objects definitions')
+ parser.add_argument('--study', '-s', type=str, required=True)
+ return parser
+
+
+def write_dsamples_file(records, out_file, study_label):
+
+ def parse_run_directoty(run_dir):
+ _, device, _, flowcell = run_dir.split('_')
+ return device, flowcell[1:]
+
+ with open(out_file, 'w') as ofile:
+ out_file_header = ['study', 'label', 'source', 'source_type', 'seq_dsample_type',
+ 'status', 'device']
+ writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
+ writer.writeheader()
+ for r in records:
+ device, flowcell = parse_run_directoty(r)
+ writer.writerow({'study': study_label,
+ 'label': r,
+ 'source': flowcell,
+ 'source_type': 'FlowCell',
+ 'seq_dsample_type': 'SequencerOutput',
+ 'status': 'USABLE',
+ 'device': device})
+
+
+def write_dobjects_file(records, out_file, study_label):
+ with open(out_file, 'w') as ofile:
+ out_file_header = ['study', 'path', 'data_sample', 'mimetype', 'size', 'sha1']
+ writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
+ writer.writeheader()
+ for r in records:
+ writer.writerow({'study': study_label,
+ 'path': r['path'],
+ 'data_sample': r['run_directory'],
+ 'mimetype': 'x-vl/illumina-run-folder',
+ 'size': '-1',
+ 'sha1': 'N.A.'})
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format': LOG_FORMAT,
+ 'datefmt': LOG_DATEFMT,
+ 'level': log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger('prepare_seq_dsample_inputs')
+
+ with open(args.in_file) as f:
+ logger.info('Loading data from file %s', args.in_file)
+ reader = csv.DictReader(f, delimiter='\t')
+ recs = [r for r in reader]
+
+ logger.info('Writing DataSample data to file %s', args.dsamples_out_file)
+ write_dsamples_file(set([r['run_directory'] for r in recs]),
+ args.dsamples_out_file, args.study)
+ logger.info('Writing DataObjects data to file %s', args.dobjects_out_file)
+ write_dobjects_file(recs, args.dobjects_out_file, args.study)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/prepare_seq_out_inputs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_seq_out_inputs.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,46 @@
+
+
+ Extract OMERO.biobank objects that can be used to import SequencerOutput data
+
+
+
+ prepare_seq_out_inputs.py
+ --in-file=${infile}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${log_file}
+ --dsamples-out-file=${dsamples_ofile}
+ --dobjects-out-file=${dobjects_ofile}
+ --study=${study}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool produces files that can be used as input to import
+ * SequencerOutput data samples
+ * SequencerOutput data objects
+
+within OMERO.biobank using import applications.
+
+Input file must be like
+
+ run_directory path
+ 130418_SN194_0303_BC1NYHACXX file:///SHARE/USERFS/els7/130418_SN194_0303_BC1NYHACXX/raw
+ 160418_SN194_0304_BCAZYHACXX file:///SHARE/USERFS/els7/160418_SN194_0304_BCAZYHACXX/raw
+ ....
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/split_by_study.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/split_by_study.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,90 @@
+"""
+Split a file like::
+
+ individual gender father mother
+ ASTUDY:2141 MALE ASTUDY:12 ASTUDY:12341
+ ASTUDY:415 MALE ASTUDY:3562 ASTUDY:13612
+ BSTUDY:12515 FEMALE BSTUDY:3512 BSTUDY:124
+
+into multiple files based on the STUDY value of the label stored in the "individual" column.
+Each label in the "individual" column must have a STUDY:ENROLLMENT_CODE format, otherwise the line
+will be skipped.
+"""
+
+import sys, argparse, csv, os
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def get_parser():
+ parser = argparse.ArgumentParser('Split a file containing pedigree informations in multiple files using the study as split criteria')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', type=str, required=True,
+ help='input TSV file')
+ parser.add_argument('--record-id', type=int,
+ help='Output ID record given by Galaxy')
+ parser.add_argument('--out-path', type=str, help='Output directory',
+ default='.')
+ return parser
+
+
+def split_element(element, logger):
+ try:
+ study, code = element.split(':')
+ return study, code
+ except ValueError:
+ logger.error('Label %s is not a label with format STUDY:ENROLLMENT_CODE', element)
+ return None, None
+
+
+def map_by_study(records, logger):
+ records_map = {}
+ for rec in records:
+ study, code = split_element(rec['individual'], logger)
+ if not study and not code:
+ logger.debug('Skipping record %r', rec)
+ continue
+ records_map.setdefault(study, []).append(rec)
+ logger.info('Records splitted between %d studies', len(records_map.keys()))
+ return records_map
+
+
+def dump_records(study_label, records, header, output_path, logger, galaxy_record_id=None):
+
+ def get_file_name(study, out_path, galaxy_id=None):
+ if not galaxy_id:
+ file_name = '%s_individuals.tsv' % study
+ else:
+ file_name = 'primary_%d_%s_visible_tabular' % (galaxy_id, study.replace('_', '-'))
+ return os.path.join(out_path, file_name)
+
+ fname = get_file_name(study_label, output_path, galaxy_record_id)
+ with open(fname, 'w') as ofile:
+ logger.info('Dumping %d records to file %s', len(records), fname)
+ writer = csv.DictWriter(ofile, header, delimiter='\t')
+ writer.writeheader()
+ writer.writerows(records)
+
+
+def main(argv):
+ parser = get_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('split_by_study', level=args.loglevel, filename=args.logfile)
+
+ logger.info('Start processing file %s', args.in_file)
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ records = [row for row in reader]
+
+ records_map = map_by_study(records, logger)
+ # Force the header of the output files in order to prevent problems when running the workflow later
+ header = ['individual', 'gender', 'father', 'mother']
+ for study, records in records_map.iteritems():
+ dump_records(study, records, header, args.out_path, logger, args.record_id)
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/split_by_study.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/split_by_study.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,41 @@
+
+
+
+ Split a file containing pedigree informations in multiple files using the study as split criteria
+
+
+
+ split_by_study.py
+ --in-file=${in_file}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${log_file}
+ --record-id=$log_file.id
+ --out-path=$__new_file_path__
+
+
+
+
+
+
+
+
+
+
+
+Split a file like::
+
+ individual gender father mother
+ ASTUDY:2141 MALE ASTUDY:12 ASTUDY:12341
+ ASTUDY:415 MALE ASTUDY:3562 ASTUDY:13612
+ BSTUDY:12515 FEMALE BSTUDY:3512 BSTUDY:124
+
+into multiple files based on the STUDY value of the label stored in the "individual" column.
+Each label in the "individual" column must have a STUDY:ENROLLMENT_CODE format, otherwise the line
+will be skipped.
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/wells_barcode_to_label.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/wells_barcode_to_label.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,112 @@
+"""
+Map PlateWell labels written as PLATE_BARCODE:WELL_LABEL to labels written as
+PLATE_LABEL:WELL_LABEL which is the PlateWell label format required by the map_vid
+application.
+The inputs are a TSV file and the label of the column of this file containing the
+PlateWell labels that are going to be mapped.
+"""
+
+import csv, argparse, sys, copy
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.utils import LOG_LEVELS, get_logger
+import bl.vl.utils.ome_utils as vlu
+
+
+def get_wells_map(kb, plate_barcodes, logger):
+ wells_map = {}
+ logger.info('Start building PlateWells map')
+ res = kb.get_by_field(kb.TiterPlate, 'barcode', plate_barcodes)
+ logger.debug('Plates %r --- Results: %r', plate_barcodes, res)
+ for _, pl in res.iteritems():
+ if pl.OME_TABLE == 'TiterPlate':
+ if pl.barcode:
+ for w in kb.get_wells_by_plate(pl):
+ logger.debug('Mapping well %s of plate %s', w.label, w.container.label)
+ wells_map['%s:%s' % (w.container.barcode, w.label)] = '%s:%s' % (w.container.label,
+ w.label)
+ else:
+ logger.debug('TiterPlate %s has no barcode', pl.label)
+ else:
+ logger.debug('Object is a %r, skipping it', pl.OME_TABLE)
+ logger.info('Mapped %d PlateWells', len(wells_map))
+ return wells_map
+
+
+def get_plates_list(records, plates_column, logger):
+ plates = set()
+ logger.info('Retrieving TiterPlate barcodes from %d records', len(records))
+ for r in records:
+ plates.add(r[plates_column].split(':')[0])
+ logger.info('Found %d TiterPlate objects', len(plates))
+ return list(plates)
+
+
+def make_parser():
+ parser = argparse.ArgumentParser('Map barcodes in PlateWell labels to TiterPlate labels')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='OMERO host')
+ parser.add_argument('-U', '--user', type=str, help='OMERO user')
+ parser.add_argument('-P', '--passwd', type=str, help='OMERO password')
+ parser.add_argument('--in-file', type=str, required=True,
+ help='input TSV file')
+ parser.add_argument('--column-label', type=str, required=True,
+ help='the label of the columun containing the values that will be mapped')
+ parser.add_argument('--out-file', type=str, required=True,
+ help='output TSV file')
+ parser.add_argument('--strict-mapping', action='store_true',
+ help='if output records are less than the input ones, raise an error')
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('wells_barcode_to_label', level=args.loglevel,
+ filename=args.logfile)
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ logger.info('Starting job')
+
+ kb = KB(driver='omero')(host, user, passwd)
+ # wells_map = get_wells_map(kb, logger)
+
+ with open(args.in_file) as in_file, open(args.out_file, 'w') as out_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ if args.column_label not in reader.fieldnames:
+ msg = 'No column %s in file %s' % (args.column_label, args.in_file)
+ logger.critical(msg)
+ raise RuntimeError(msg)
+ records = [row for row in reader]
+ plates = get_plates_list(records, args.column_label, logger)
+ wells_map = get_wells_map(kb, plates, logger)
+ logger.info('Mapping %d records', len(records))
+ writer = csv.DictWriter(out_file, reader.fieldnames, delimiter='\t')
+ writer.writeheader()
+ mapped_records = []
+ for rec in records:
+ mapped = copy.deepcopy(rec)
+ logger.debug('Mapping value %s', mapped[args.column_label])
+ if mapped[args.column_label] in wells_map:
+ mapped[args.column_label] = wells_map[mapped[args.column_label]]
+ mapped_records.append(mapped)
+ if args.strict_mapping and len(mapped_records) < len(records):
+ msg = 'Mapped %d record of %d' % (len(mapped_records), len(records))
+ logger.critical(msg)
+ sys.exit(msg)
+ logger.info('%d records mapped', len(mapped_records))
+ writer.writerows(mapped_records)
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank/utils/wells_barcode_to_label.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/wells_barcode_to_label.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,75 @@
+
+
+
+ Map plate barcodes in well labels to plate labels
+
+
+
+ wells_barcode_to_label.py
+ --in-file=${in_file}
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${log_file}
+ --column-label=${column_label}
+ #if $strict_mapping
+ --strict-mapping
+ #end if
+ --out-file=${out_file}
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --host=${advanced_configuration.vl_host}
+ --user=${advanced_configuration.vl_user}
+ --passwd=${advanced_configuration.vl_passwd}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Map PlateWell labels written as PLATE_BARCODE:WELL_LABEL to labels written as
+PLATE_LABEL:WELL_LABEL which is the PlateWell label format required by the map_vid
+application.
+The inputs are a TSV file and the label of the column of this file containing the
+PlateWell labels that are going to be mapped.
+
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/biobank_tool_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank_tool_conf.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,75 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/history_tools/history_to_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/history_tools/history_to_file.py Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,76 @@
+#-------------------------
+# Export Galaxy history data to a JSON compatible text file
+#
+# Galaxy host can be retrieved using the GALAXY_HOST environment
+# variable
+# Galaxy API key can be retrieved using the GALAXY_API_KEY
+# environment variable
+#-------------------------
+
+from blend.galaxy import GalaxyInstance
+
+import argparse, sys, logging, os, json
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='Export Galaxy history data')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default: INFO)', default='INFO')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--galaxy_host', type=str, help='galaxy host (with port)')
+ parser.add_argument('--api_key', type=str, help='Galaxy API key')
+ parser.add_argument('--history_id', type=str, required=True,
+ help='ID fo the history that will be exported')
+ parser.add_argument('--no_peek', action='store_true',
+ help='remove preview from datasets details')
+ parser.add_argument('--ofile', type=str, required=True,
+ help='output file')
+ return parser
+
+def check_history_id(galaxy_instance, history_id):
+ hmap = dict((str(h['id']), str(h['name']))
+ for h in galaxy_instance.histories.get_histories())
+ return history_id in hmap
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format' : LOG_FORMAT,
+ 'datefmt' : LOG_DATEFMT,
+ 'level' : log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger('history_to_file')
+
+ try:
+ galaxy_host = args.galaxy_host or os.environ['GALAXY_HOST']
+ api_key = args.api_key or os.environ['GALAXY_API_KEY']
+ except KeyError, ke:
+ msg = 'No argument passed and no global variable %s found' % ke
+ logger.critical(msg)
+ sys.exit(msg)
+
+ logger.info('opening connection to %s' % galaxy_host)
+ gi = GalaxyInstance(galaxy_host, key=api_key)
+
+ if not check_history_id(gi, args.history_id):
+ msg = 'Unable to find history with ID %s' % args.history_id
+ logger.critical(msg)
+ sys.exit(msg)
+
+ logger.info('getting data for history %s' % args.history_id)
+ h_data = gi.histories.get_history_details(args.history_id, args.no_peek)
+
+ logger.info('saving to file %s' % args.ofile)
+ with open(args.ofile, 'w') as out_file:
+ out_file.write(json.dumps(h_data))
+ logger.info('Job completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/history_tools/history_to_file.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/history_tools/history_to_file.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,55 @@
+
+ export history data to a text file
+
+ history_to_file.py
+ --logfile=${logfile}
+ --history_id=${history_id}
+ --ofile=${out_file}
+ #if $hide_peek
+ --no_peek
+ #end if
+ #if $advanced_configuration.configuration_level == 'advanced'
+ --galaxy_host=$advanced_configuration.galaxy_host
+ --api_key=$advanced_configuration.api_key
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Export galaxy history's data to a text file (in JSON format).
+History ID has to be given as input parameter.
+Previews can be removed from datasets details by enabling the "hide
+previews" checkbox.
+
+Galaxy host and API key can be specified enabling the
+"advanced_configuration" option, by default these values will be
+retrieved from GALAXY_HOST and GALAXY_API_KEY environment variables.
+
+
\ No newline at end of file
diff -r 47bf0086e082 -r 43be74e62bfe galaxy-tools/history_tools_tool_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/history_tools_tool_conf.xml Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,7 @@
+
+
+
+
+