# HG changeset patch
# User ric
# Date 1475057010 14400
# Node ID ba6cf6ede02726592b01df60847043873e9d7d70
Uploaded
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/.gitignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/.gitignore Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,3 @@
+.idea
+*.pyc
+*~
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/exporter/export_titer_plates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/exporter/export_titer_plates.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,67 @@
+import logging, csv, argparse, sys, os
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.kb.drivers.omero.utils as vlu
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+CSV_FIELDS = ['label', 'barcode', 'rows', 'columns', 'plate_status']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='dump all TiterPlate objects to a TSV file')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--ofile', type=str, help='output file',
+ required=True)
+ return parser
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format' : LOG_FORMAT,
+ 'datefmt' : LOG_DATEFMT,
+ 'level' : log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger()
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ logging.info('Loading TiterPlate objects')
+ plates = kb.get_objects(kb.TiterPlate)
+ logging.info('Loaded %d objects' % len(plates))
+
+ with open(args.ofile, 'w') as ofile:
+ writer = csv.DictWriter(ofile, CSV_FIELDS, delimiter='\t')
+ writer.writeheader()
+ for pl in plates:
+ logger.debug('Dumping plate %d/%d' % (plates.index(pl) + 1,
+ len(plates)))
+ writer.writerow({'label' : pl.label,
+ 'barcode' : pl.barcode,
+ 'rows' : pl.rows,
+ 'columns' : pl.columns,
+ 'plate_status' : pl.status.enum_label()})
+ logger.info('Job done')
+
+
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/biosample.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/biosample.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,195 @@
+
+ import BioSample definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $move_to_common_space
+ --move-to-common-space
+ #end if
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ biosample
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source-type ${source_type}
+ #end if
+ #if str($vessel_type_selector.vessel_type) != 'use_provided'
+ --vessel-type ${vessel_type_selector.vessel_type}
+ #end if
+ #if str($vessel_content) != 'use_provided'
+ --vessel-content=${vessel_content}
+ #end if
+ #if str($vessel_status) != 'use_provided'
+ --vessel-status=${vessel_status}
+ #end if
+ #if str($vessel_type_selector) == 'IlluminaBeadChipArray'
+ #if str($vessel_type_selector.assay_type) != 'use_provided'
+ --bead-chip-assay-type=${vessel_type_selector.assay_type}
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+A biosample record will have, at least, the following fields::
+
+ label source
+ I001-bs-2 V932814892
+ I002-bs-2 V932814892
+ I003-bs-2 None
+
+Where label is the label of the biosample container. If a 'None' value
+has been passed in the source column, the biosample will be imported
+as a new unlinked object into the biobanks. Another example, this time
+involving DNA samples::
+
+ label source used_volume current_volume activation_date
+ I001-dna V932814899 0.3 0.2 17/03/2007
+ I002-dna V932814900 0.22 0.2 21/01/2004
+
+A special case is when records refer to biosamples contained in plate
+wells. In this case, an additional column must be present with the VID
+of the corresponding TiterPlate object. For instance::
+
+ plate label source
+ V39030 A01 V932814892
+ V39031 A02 V932814893
+ V39032 A03 V932814894
+
+where the label column is now the label of the well position.
+
+If row and column (optional) are provided, the program will use them;
+if they are not provided, it will infer them from label (e.g., J01 ->
+row=10, column=1). Missing labels will be generated as::
+
+ '%s%03d' % (chr(row+ord('A')-1), column)
+
+A badly formed label will result in the rejection of the record; the
+same will happen if label, row and column are inconsistent. The well
+will be filled by current_volume material produced by removing
+used_volume material taken from the bio material contained in the
+vessel identified by source. row and column are base 1.
+
+If the sample is a IlluminaBeadChipArray the plate column used in the
+PlateWell case will become a illumina_array column and a new column, named
+bead_chip_assay_type, is required::
+
+ illumina_array label source bead_chip_assay_type
+ V1351235 R01C01 V412441 HUMANEXOME_12V1_B
+ V1351235 R01C02 V351151 HUMANEXOME_12V1_B
+ V1351235 R02C01 V345115 HUMANEXOME_12V1_B
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/birth_data.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/birth_data.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,57 @@
+
+ import birth data within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ birth_data
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study individual timestamp birth_date birth_place
+ ASTUDY V1234 1310057541608 12/03/1978 006171
+ ASTUDY V14112 1310057541608 25/04/1983 006149
+ ASTUDY V1241 1310057541608 12/03/2001 006172
+ .....
+
+where birth_place is a valid ISTAT code for an Italian city or a
+foreign Country and birth_date must have the dd/mm/YYYY format.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/data_collection.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/data_collection.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,120 @@
+
+ import DataCollection definitions within
+ OMERO.biobank
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ #if $omero_configuration.level == 'advanced'
+ --host=$omero_configuration.vl_host
+ --user=$omero_configuration.vl_user
+ --passwd=$omero_configuration.vl_passwd
+ #else
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ #end if
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ data_collection
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($data_sample_type) != 'use_provided'
+ --data_sample-type=${data_sample_type}
+ #end if
+ #if str($label)
+ --label=${label}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study label data_sample
+ BSTUDY dc-01 V0390290
+ BSTUDY dc-01 V0390291
+ BSTUDY dc-02 V0390292
+ BSTUDY dc-02 V390293
+ ...
+
+This will create new DataCollection(s), whose label is defined by the
+label column, and link to it, using DataCollectionItem objects,
+the DataSample object(s) identified by data_sample (a VID).
+
+Records that point to an unknown DataSample will abort the data
+collection loading. Previously seen collections will be noisily
+ignored. It is not legal to use the importer to add items to a
+previously known collection.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/data_object.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/data_object.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,82 @@
+
+ import DataObject definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ data_object
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($mimetype) != 'use_provided'
+ --mimetype=${mimetype}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study path data_sample mimetype size sha1
+
+ TEST01 file:/share/fs/v039303.cel V2902 x-vl/affymetrix-cel 39090 E909090
+ ....
+
+Records that point to an unknown data sample will be noisily
+ignored. The same will happen to records that have the same path of a
+previously seen data_object
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/data_sample.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/data_sample.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,165 @@
+
+ import DataSample definitions within OMERO.biobank
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ #if $omero_configuration.level == 'advanced'
+ --host=$omero_configuration.vl_host
+ --user=$omero_configuration.vl_user
+ --passwd=$omero_configuration.vl_passwd
+ #else
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ #end if
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ data_sample
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source-type=${source_type}
+ #end if
+ #if str($device_type) != 'use_provided'
+ --device-type=${device_type}
+ #end if
+ #if str($scanner) != 'use_provided'
+ --scanner=${scanner}
+ #end if
+ #if str($data_sample_type) != 'use_provided'
+ --data-sample-type=${data_sample_type}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study label source device device_type scanner options
+ ASTUDY foo01 v03909 v9309 Chip v99020 celID=0009099090
+ ASTUDY foo02 v03909 v99022 Scanner v99022 conf1=...,conf2=...
+ ....
+
+In this example, the first line corresponds to a dataset obtained by
+using chip v9309 on scanner v99020, while the second datasample has
+been obtained using a technology directly using a scanner, e.g., an
+Illumina HiSeq 2000. The '''scanner''' column is there as a
+convenience to support a more detailed description of a chip-based
+acquisition.
+
+The general strategy is to decide what data objects should be
+instantiated by looking at the chip column and at its corresponding
+maker,model,release.
+
+The optional column '''scanner''', the vid of the scanner device, is
+used in cases, such as Affymetrix genotyping, where it is relevant.
+
+It is also possible to import DataSample(s) that are the results of
+processing other DataSample(s). Here is an example::
+
+ study label source device device_type options
+ ASTUDY foo01 v03909 v99021 SoftwareProgram conf1=...,conf2=...
+ ASTUDY foo02 v03909 v99021 SoftwareProgram conf1=...,conf2=...
+ ....
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/device.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/device.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,99 @@
+
+ import Device definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ device
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($device_type) != 'use_provided'
+ --device-type=${device_type}
+ #end if
+ #if str($maker)
+ --maker=${maker}
+ #end if
+ #if str($model)
+ --model=${model}
+ #end if
+ #if str($release)
+ --relese=${release}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study device_type label barcode maker model release location
+ BSTUDY Scanner pula01 8989898 Affymetrix GeneChip Scanner 3000 7G Pula bld. 5
+ BSTUDY Chip chip001 8329482 Affymetrix Genome-Wide Human SNP Array 6.0 None
+
+All devices have a type, a label, an optional barcode, a maker, a
+model, a release and an optional physical location. In the example
+above, in the first line we have defined a scanner, which is
+physically located in the building 5 lab in Pula. The second line
+defines a chip.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/diagnosis.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/diagnosis.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,53 @@
+
+ import diagnosis data within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ diagnosis
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ study individual timestamp diagnosis
+ ASTUDY V899 1310057541608 icd10-cm:E10
+ ASTUDY V899 1310057541608 icd10-cm:G35
+ ASTYDY V1806 1310057541608 exclusion-problem_diagnosis
+ ...
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/enrollment.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/enrollment.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,78 @@
+
+ Create new enrollmnents for existing individuals within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ enrollment
+ #if str($study_label) != 'use_provided'
+ --study=$study_label
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Import of new enrollments related to existing individuals.
+An enrollment is characterized by the following fields::
+
+ source study label
+ V044DE795E7F9F42FEB9855288CF577A77 xxx id1
+ V06C59B915C0FD47DABE6AE02C731780AF xxx id2
+ V01654DCFC5BB640C0BB7EE088194E629D xxx id3
+
+where source must be the VID of an existing Individual object, study a
+label of an existing Study object and label the enrollment code for
+the patient in the study.
+
+The enrollment sub-operation will retrieve the source individual from
+the DB, create a new enrollment related to it and output the VIDs of
+newly created enrollments. It is not possible to create two
+enrollments with the same code related to the same study, nor is it
+possible to enroll a patient twice in the same study, even with
+different codes.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/group.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/group.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,53 @@
+
+ Create a new group within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ group
+ #if str($group_label) != ''
+ --group=$group_label
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will create a new group of individuals from a file with the following columns::
+
+ study label individual
+ foo I0000 V06C59B915C0FD47DABE6AE02C731780AF
+ foo I0001 V0B718B77691B145BFA8901FCCF6B37998
+ ...
+
+where the column study is optional (it can be provided via the
+group_label param). Labels should be unique within the file and the
+individual field should contain VIDs of existing (within omero/vl)
+Individual objects.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/illumina_bead_chip_measures.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/illumina_bead_chip_measures.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,90 @@
+
+ import IlluminaBeadChipMeasures definitions within OMERO
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ illumina_bead_chip_measures
+ #if str($study) != 'use_provided'
+ --study=${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source_type=${source_type}
+ #end if
+ #if str($action_category) != 'use_provided'
+ --action_category=${action_category}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read a tsv file with the following columns::
+
+ study label red_channel green_channel source source_type
+ ASTUDY CHIP_01_R01C01 V1415151235513 V135135661356161 V351351351551 IlluminaBeadChipArray
+ ASTUDY CHIP_01_R01C02 V2346262462462 V112395151351623 V135113513223 IlluminaBeadChipArray
+ ASTUDY CHIP_01_R02C01 V1351362899135 V913977551235981 V100941215192 IlluminaBeadChipArray
+
+This will create new IlluminaBeadChipMeasures whose labels are defined in the
+label column.
+
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/importer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/importer.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.importer.main import main
+
+main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/individual.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/individual.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,71 @@
+
+ import individual definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=$input
+ --ofile=$output
+ --report_file=$report
+ --logfile=$logfile
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ individual
+ #if str($study) != 'use_provided'
+ --study $study
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will import a stream of new individual definitions defined by the
+following columns::
+
+ label gender father mother
+ id2 male None None
+ id3 female None None
+ ....
+
+It is not possible to import the same individual twice: the related
+file rows will be noisily ignored.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/laneslot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/laneslot.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,101 @@
+
+ import LaneSlot definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ laneslot
+ #if str($study) != 'use_provided'
+ --study=${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source_type=${source_type}
+ #end if
+ #if str($content) != 'use_provided'
+ --content=${content}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+A lane slot record will have the following fields::
+
+ lane tag content source
+ V123411 ATCACG DNA V4512415
+ V123411 CGATGT DNA V1415512
+ V412511 DNA V1909012
+ V661251 TGACCA DNA V1123111
+ V661251 CTTGTA DNA V1211141
+ ....
+
+the content column can be option if passed as script's input value,
+tag column is optional too.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/launcher.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/launcher.sh Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+CMD=""
+PYTH_PATH="PYTHONPATH=/SHARE/USERFS/els7/users/galaxy/develop/usr-cluster/lib/p\
+ython2.7/site-packages/:/SHARE/USERFS/els7/users/biobank/lib/"
+runner="$(dirname ${BASH_SOURCE[0]})/"
+until [ -z $1 ]
+ do
+
+ opt_host='--host='
+ opt_user='--user='
+ opt_passwd='--passwd='
+ opt_interpreter='--interpreter='
+ opt_runner='--runner='
+ if [[ $1 == $opt_host* ]]; then
+ host=`echo $1 | cut -d '=' -f2 | cut -d '.' -f1`
+ if [ -z $host -o $host == 'None' ]; then
+ echo 'ERROR. Missing omero host parameter. Please, set Omero Host in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ PYTH_PATH+=$host
+ HOST=`echo $1 | cut -d '=' -f2`
+ CMD+=' '$1
+ elif [[ $1 == $opt_user* ]]; then
+ user=`echo $1 | cut -d '=' -f2`
+ if [ -z $user -o $user == 'None' ]; then
+ echo 'ERROR. Missing omero user parameter. Please, set Omero User in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ CMD+=' '$1
+ elif [[ $1 == $opt_passwd* ]]; then
+ passwd=`echo $1 | cut -d '=' -f2`
+ if [ -z $passwd -o $passwd == 'None' ]; then
+ echo 'ERROR. Missing omero password parameter. Please, set Omero Password in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ CMD+=' '$1
+ elif [[ $1 == $opt_runner* ]]; then
+ runner+=`echo $1 | cut -d '=' -f2`
+ elif [[ $1 == $opt_interpreter* ]]; then
+ interpreter=`echo $1 | cut -d '=' -f2`
+ else
+ CMD+=' '$1
+ fi
+ shift
+done
+export $PYTH_PATH/:$PYTHONPATH
+profile="/SHARE/USERFS/els7/users/biobank/lib/${HOST}.biobank.profile"
+if [ -f $profile ]; then
+ source $profile
+ CMD=$interpreter' '$runner$CMD
+ $CMD
+else
+ echo "ERROR. Biobank profile file doesn't exist. Please, check Omero Host in your user preferences" > /dev/null >&2
+ exit -1
+fi
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/marker_alignment.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/marker_alignment.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,113 @@
+
+ import marker aligments within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ marker_alignment
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($ref_genome)
+ --ref-genome ${reg_genome}
+ #end if
+ #if str($message)
+ --message ${message}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ marker_vid ref_genome chromosome pos strand allele copies
+ V0909090 hg18 10 82938938 True A 1
+ V0909091 hg18 1 82938999 True A 2
+ V0909092 hg18 1 82938938 True B 2
+ ...
+
+Since pos is relative to 5', if the marker has been aligned on the
+other strand, it is the responsibility of the aligner app to report
+the actual distance from 5', while, at the same time, registering that
+the SNP has actually been aligned on the other strand.
+
+The chromosome field is an integer field with values in the [1, 26]
+range, with 23-26 representing, respectively, the X chromosome, the Y
+chromosome, the pseudoautosomal regions (XY) and the mitochondrial DNA
+(MT).
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/marker_definition.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/marker_definition.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,92 @@
+
+ import Marker definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ marker_definition
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ --source ${source}
+ --context ${context}
+ --release ${release}
+ --ref-genome ${ref_genome}
+ --dbsnp-build ${dbsnp_build}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ label rs_label mask strand allele_a allele_b
+ SNP_A-1780419 rs6576700 [A/G] TOP A G
+ ...
+
+Where label is supposed to be the unique label for this marker in the
+(source, context, release) context, rs_label is the dbSNP db label for
+this snp (it could be the string ``None`` if not defined or not
+known). The column mask contains the SNP definition. The strand column
+could either be the actual 'illumina style' strand used to define the
+alleles in the alleles columns, or the string 'None', which means that
+the alleles in the allele column are defined wrt the mask in the
+mask column.
+
+It will, for each row, convert the mask to the TOP strand following
+Illumina conventions and then save a record for it in VL. The saved
+tuple is (source, context, release, label, rs_label, TOP_mask). There
+are no collision controls.
+
+It will output a a tsv file with the following columns::
+
+ study label type vid
+ ASTUDY SNP_A-xxx Marker V000002222
+ ...
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/markers_set.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/markers_set.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,74 @@
+
+ import Marker definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ markers_set
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($label)
+ --label ${label}
+ #end if
+ #if str($maker) != 'use_provided'
+ --maker ${maker}
+ #end if
+ #if str($model) != 'use_provided'
+ --model ${model}
+ #end if
+ #if str($release)
+ --release ${release}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read in a tsv file with the following columns::
+
+ marker_vid marker_indx allele_flip
+ V902909090 0 False
+ V902909091 1 False
+ V902909092 2 True
+ ...
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/samples_container.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/samples_container.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,221 @@
+
+ import samples container definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ samples_container
+ #if str($study) != 'use_provided'
+ --study=${study}
+ #end if
+ #if str($container_type_selector.container_type) != 'use_provided'
+ --container-type=${container_type_selector.container_type}
+ #if str($container_type_selector.container_type) == 'TiterPlate'
+ #if str($container_type_selector.plate_shape) != 'use_provided'
+ --plate-shape=${container_type_selector.plate_shape}
+ #end if
+ #elif str($container_type_selector.container_type) == 'FlowCell'
+ #if str($container_type_selector.flow_cell_slots) != 'use_provided'
+ --number-of-slots=${container_type_selector.flow_cell_slots}
+ #end if
+ #elif str($container_type_selector.container_type) == 'IlluminaArrayOfArrays'
+ #if str($container_type_selector.ill_shape) != 'use_provided'
+ --plate-shape=${container_type_selector.ill_shape}
+ #end if
+ #if str($container_type_selector.ill_slots) != 'use_provided'
+ --number_of_slots=${container_type_selector.ill_slots}
+ #end if
+ #if str($container_type_selector.array_type) != 'use_provided'
+ --illumina-array-type=${container_type_selector.array_type}
+ #end if
+ #if str($container_type_selector.array_class) != 'use_provided'
+ --illumina-array-class=${container_type_selector.array_class}
+ #end if
+ #if str($container_type_selector.assay_type) != 'use_provided'
+ --illumina-assay-type=${container_type_selector.assay_type}
+ #end if
+ #end if
+ #end if
+ #if str($container_status) != 'use_provided'
+ --container-status=${container_status}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+A container record will have the following fields::
+
+ label container_status creation_date
+ A_CONTAINER USABLE 13/02/2012
+ B_CONTAINER INSTOCK 12/01/2001
+ C_CONTAINER USABLE 25/04/2012
+ ....
+
+the creation_date column is optional, if not specified current date
+will be set as the object's creation date, also the container_status
+column can be optional if this values is passed as input parameter.
+
+
+When importing new containers, special fields can be included in the
+CSV file depending on the type of the objects that you want to
+import.
+
+For TITER PLATES objects the syntax can be the following::
+
+ label barcode container_status rows columns
+ A_TITERPLATE XXYYZZ111 INSTOCK 8 12
+ B_TITERPLATE XXYYZZ112 INSTOCK 8 12
+ C_TITERPLATE XXYYZZ113 READY 8 12
+ ....
+
+rows and columns values can be optional if these values are passed as
+input parameters, barcode column is optional.
+
+For ILLUMINA ARRAY OF ARRAYS objects the syntax can be the following::
+
+ label barcode container_status rows columns illumina_array_type illumina_array_class illumina_assay_type
+ A_ILLARRAY XXYYZZ111 INSTOCK 4 2 BeadChip_12x1Q Slide Infinium_HD
+ B_ILLARRAY XXYYZZ112 INSTOCK 4 2 BeadChip_12x1Q Slide Infinium_HD
+ C_ILLARRAY XXYYZZ113 INSTOCK 4 2 BeadChip_12x1Q Slide Infinium_HD
+
+rows, columns, illumina_array_type, illumina_array_class and illumina_assay_type
+can be optional if these values are passed as input parameters, barcode column
+is optional.
+
+For FLOW CELL objects the syntax can be the following::
+
+ label barcode container_status number_of_slots
+ A_FLOWCELL XXYYZZ221 INSTOCK 8
+ B_FLOWCELL XXYYZZ222 INSTOCK 8
+ C_FLOWCELL XXYYZZ223 INSTOCK 8
+ ....
+
+number_of_slots column can be optional if this value is passed as
+input paramter, barcode column is optional.
+
+For LANE objects the syntax can be the following::
+
+ flow_cell slot container_status
+ V112441441 1 INSTOCK
+ V112441441 2 INSTOCK
+ V112441441 3 INSTOCK
+ V351145519 1 INSTOCK
+ V351145519 2 INSTOCK
+ ....
+
+for Lane objects, no label column has to be provided, the importer
+will automatically calculate the labels for each imported object.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/sequencing_data_sample.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/sequencing_data_sample.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,184 @@
+
+
+ Import sequencing related DataSample definitions within OMERO>biobank
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ #if $omero_configuration.level == 'advanced'
+ --host=$omero_configuration.vl_host
+ --user=$omero_configuration.vl_user
+ --passwd=$omero_configuration.vl_passwd
+ #else
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ #end if
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ seq_data_sample
+ #if str($study) != 'use_provided'
+ --study=${study}
+ #end if
+ #if str($source_type) != 'use_provided'
+ --source-type=${source_type}
+ #end if
+ #if str($seq_dsample_type) != 'use_provided'
+ --seq-dsample-type=${seq_dsample_type}
+ #end if
+ #if str($dsample_status) != 'use_provided'
+ --status=${dsample_status}
+ #end if
+ #if str($device) != 'use_provided'
+ --device=${device}
+ #end if
+ #if str($history) != 'None'
+ --history=${history}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will read a tsv file with the following columns::
+
+ study label source source_type seq_dsample_type status device
+ FOOBAR seq_out_1 V012141 FlowCell SequencerOutput USABLE V123141
+ FOOBAR seq_out_2 V012141 FlowCell SequencerOutput USABLE V123141
+ FOOBAR seq_out_3 V1AD124 FlowCell SequencerOutput USABLE V123141
+ ...
+
+where
+ * seq_dsample_type can assume one of the following values: SequencerOutput, RawSeqDataSample, SeqDataSample
+ * source_type can assume one of the following values: FlowCell, SequencerOutput, RawSeqDataSample
+
+study, source_type, seq_dsample_type, status and device columns can be
+overwritten by using command line options.
+
+A special case of the previous file is when seq_dsample_type is
+SeqDataSample, in this case a mandatory sample column is required,
+this column has to contain IDs of Tube objects.
+The file will look like this
+
+ study label source source_type seq_dsample_type status device sample
+ FOOBAR seq_dsample_1 V041241 SequencerOutput SeqDataSample USABLE VBB2351 V124AA41
+ FOOBAR seq_dsample_2 V051561 SequencerOutput SeqDataSample USABLE VBB2351 V4151AAE
+ FOOBAR seq_dsample_3 V151561 SequencerOutput SeqDataSample USABLE VBB2351 V15199CD
+ ...
+
+A file containing ax export of the Galaxy history that produced the
+data that are going to be imported can be passed as input parameter,
+history details must represented as a string serialized in JSON
+format.
+
+
+
+
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/study.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/study.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,59 @@
+
+ import study definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ study
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Will import a stream of new study definitions defined by the following
+tab-separated columns. A typical file will look like the following::
+
+ label description
+ BSTUDY A basically empty description of BSTUDY
+ CSTUDY A basically empty description of CSTUDY
+ ....
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/unauthorized_access.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/unauthorized_access.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,6 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+
+sys.exit("You are not authorized to use this tool")
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/importer/vessels_collection.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/importer/vessels_collection.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,94 @@
+
+ import VesselsCollection definitions within omero/vl
+
+ launcher.sh
+ --interpreter=python
+ --runner=importer.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ifile=${input}
+ --ofile=${output}
+ --report_file=${report}
+ --logfile=${logfile}
+ #if $blocking_validation
+ --blocking-validator
+ #end if
+ vessels_collection
+ #if str($study) != 'use_provided'
+ --study ${study}
+ #end if
+ #if str($vessel_type) != 'use_provided'
+ --vessel_type=${vessel_type}
+ #end if
+ #if str($label)
+ --label=${label}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+The input file to correctly import collections of vessels must have the following
+format::
+
+ label vessel vessel_type
+ COLLECTION-A V1234545 Tube
+ COLLECTION-A V1212434 Tube
+ COLLECTION-A V3434176 Tube
+ COLLECTION-B V2321001 Tube
+ COLLECTION-B V1210402 Tube
+ ....
+
+Column 'label' contains the names of the collections to be imported, while 'vessel'
+contains the VID of the tubes or the plates being part of the collections.
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/library/import_to_library.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/library/import_to_library.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,310 @@
+#!/usr/bin/env python
+import sys, os,argparse,logging, yaml, datetime, subprocess, stat
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='')
+
+ parser.add_argument('--ini_file', type=str, default="{0}/init_file.yaml".format(os.path.dirname(os.path.realpath(sys.argv[0]))),help='Configuration file (yaml)')
+
+ parser.add_argument('--host', type=str, required=True, help='omero host')
+ parser.add_argument('--user', type=str, required=True, help='omero user')
+ parser.add_argument('--passwd', type=str, required=True, help='omero passwd')
+
+ parser.add_argument('--galaxy_host', type=str, help='Galaxy Host (with port)')
+ parser.add_argument('--galaxy_api_key', type=str, help='Galaxy API key')
+
+ parser.add_argument('--operator', type=str, help='Galaxy user email')
+
+ parser.add_argument('--library', type=str, required=False, help='library name')
+ parser.add_argument('--folder', type=str, required=False, help='library folder')
+
+ parser.add_argument('--data_objects', type=str, required=True, help='databojects id')
+
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, help='logging level (default: INFO)', default='INFO')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+
+ return parser
+
+def main(argv):
+ global logger
+ global ini_file
+ global kb
+ global apiGalaxy
+
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ # Initializing logger
+ logger = init_logger(args,logging)
+
+ # Reading YAML configuration file
+ ini_file = init_config(args)
+
+ # Initializing python libraries
+ init_pythonpath(args,ini_file)
+
+ # Initializing connection to omero biobank
+ kb = init_omero_biobank(args)
+
+ logger = init_logger(args,logging)
+
+ # Initializing connection to apiGalaxy
+ apiGalaxy = init_api_galaxy(args)
+
+ # Getting library and folder id
+ library_id,folder_id = get_library_and_folder_ids(args)
+
+ # Getting data_objects
+ data_objects = get_data_objects(args)
+
+ import_data_objects(args,data_objects,library_id,folder_id)
+
+def import_data_objects(args,data_objects,library_id,folder_id):
+ user_import_dir = get_user_import_dir(args)
+
+ logger.info("copying datasets in user import dir")
+ files = copy_in_user_import_dir(data_objects,user_import_dir)
+
+ logger.info("wait while copiyng")
+ polling(files)
+
+ logger.info("ready to import in library {0} under folder {1}".format(args.library,args.folder))
+
+ logger.info('importing in library')
+ successfull = 0
+ for file_type,folder in user_import_dir.iteritems():
+ if len(os.listdir(folder)) == 0: continue
+ if 'fastq' in file_type: file_type = 'fastqsanger'
+ status = apiGalaxy.libraries.upload_file_from_server(library_id, folder, folder_id, file_type=file_type,link_data_only='link_to_files')
+ successfull+=len(status)
+ if successfull == len(files):
+ logger.info("SUCCESS")
+ else:
+ logger.critical("ERROR WHILE IMPORTING")
+
+ raise SystemExit
+
+def copy_in_user_import_dir(data_objects,user_import_dir):
+ files = list()
+
+ for dobj in data_objects:
+ if dobj.path.startswith('irods://'):
+ irods_path = dobj.path.replace('irods://','')
+ phys_path = irods.get_object_info(irods_path)['phys_path'].strip()
+
+ elif dobj.path.startswith('file://'):
+ irods_path = None
+ phys_path = dobj.path.replace('file://','')
+ else:
+ #continue
+ irods_path = dobj.path.replace('irods://','')
+ phys_path = irods.get_object_info(irods_path)['phys_path'].strip()
+
+ data_type = dobj.mimetype.split('/')[-1].replace('+64','')
+ dest_path = get_destination_path(irods_path,phys_path,dobj.sample,user_import_dir,data_type).strip()
+ #rsync_command = "qsub -b y /usr/bin/rsync -rcLPhv {0} {1}".format(phys_path,dest_path)
+ rsync_command = "rsync -rcLPhv {0} {1}".format(phys_path,dest_path)
+ logger.info('launching copy for {0} dataset'.format(os.path.basename(dest_path)))
+ subprocess.Popen(rsync_command.split(' '))
+
+ files.append(dest_path)
+
+ return files
+
+def polling(files):
+ all_done = False
+ founds = list()
+ while not all_done:
+ done = True
+ for dest_path in files:
+ if dest_path.endswith('.gz'):
+ unzip_path = dest_path.replace('.gz','')
+ if not os.path.exists(dest_path) and not os.path.exists(unzip_path):
+ done = False
+ elif os.path.exists(dest_path):
+
+ done = False
+ logger.info("found {0}".format(os.path.basename(dest_path)))
+ logger.info("gunzipping {0}".format(os.path.basename(dest_path)))
+ cmd = "gunzip {0}".format(dest_path)
+ g_unzip = subprocess.check_output(cmd, stderr=subprocess.STDOUT,shell=True).strip()
+ logger.info(g_unzip)
+ else:
+ if not os.path.exists(dest_path):
+ done = False
+ elif os.path.exists(dest_path) and dest_path not in founds:
+ founds.append(dest_path)
+ logger.info("found {0}".format(os.path.basename(dest_path)))
+ all_done = done
+ return True
+
+def get_user_import_dir(args):
+ user_import_dir = dict()
+ subfolder = str(datetime.datetime.now()).split('.')[0].replace(' ','_').replace(':','-')
+ user_import_dir={'fastq' : "{0}/{1}/{2}_{3}".format(ini_file['LIBRARY_IMPORT_DIR_{0}'.format(args.host.split('.')[0].upper())],args.operator,subfolder,'fastq'),
+ 'vcf' : "{0}/{1}/{2}_{3}".format(ini_file['LIBRARY_IMPORT_DIR_{0}'.format(args.host.split('.')[0].upper())],args.operator,subfolder,'vcf'),
+ 'bam' : "{0}/{1}/{2}_{3}".format(ini_file['LIBRARY_IMPORT_DIR_{0}'.format(args.host.split('.')[0].upper())],args.operator,subfolder,'bam')
+ }
+ os.umask(0)
+ for k, folder in user_import_dir.iteritems():
+ if not os.path.exists(folder):
+ os.makedirs(folder,0775)
+ return user_import_dir
+
+def get_destination_path(irods_path,phys_path,data_sample,user_import_dir,data_type):
+
+ if isinstance(data_sample, kb.SeqDataSample) or isinstance(data_sample, kb.AlignedSeqDataSample):
+ if data_sample.sample.label == 'TRAINING_tube_1' : label = 'FATHER'
+ elif data_sample.sample.label == 'TRAINING_tube_2' : label = 'PROBAND'
+ elif data_sample.sample.label == 'TRAINING_tube_3' : label = 'MOTHER'
+ else : continue
+ #label = data_sample.sample.label
+ if isinstance(data_sample, kb.GenomeVariationsDataSample):
+ label = data_sample.label
+
+ filename = "{0}/{1}".format(user_import_dir[data_type],label)
+
+ if irods_path:
+ attr = get_attributes(irods_path)
+
+ if attr.has_key('read'): filename = "{0}_R{1}".format(filename,attr['read'])
+ #if attr.has_key('lanes'): filename = "{0}_L{1}".format(filename,attr['lanes'])
+ if attr.has_key('compression') and attr['compression'] == 'gzip':
+ filename = "{0}.gz".format(filename)
+ else:
+ filename = "{0}_{1}".format(filename,os.path.basename(phys_path))
+ filename = filename.replace('.fq','')
+ return filename
+
+def get_data_objects(args):
+ logger.info("getting data objects")
+ data_objects = list()
+ data_object_ids = args.data_objects.split(',')
+ for dataobj in kb.get_objects(kb.DataObject):
+ if str(dataobj.omero_id) in data_object_ids:
+ data_objects.append(dataobj)
+ logging.info("found {0}".format(len(data_objects)))
+ return data_objects
+
+def get_library_and_folder_ids(args):
+ if args.library is None:
+ logger.critical("Library is a mandatory parameter")
+ sys.exit()
+ library_name = args.library.split('?')[0].replace('.',' ')
+ logger.info("searching for library")
+ orione_library = apiGalaxy.libraries.get_libraries(name="{0}".format(library_name))
+ if len(orione_library) == 0:
+ logger.critical("sorry, library {0} doesn't exist".format(library_name))
+ sys.exit()
+ library_id = orione_library[0].get('id',None)
+
+ if '?' in args.library and args.library == args.folder:
+ folder_name = args.library.split('?')[1].replace('.',' ')
+ else:
+ return library_id,None
+ logger.info("searching for folder {0}".format(folder_name))
+
+ folder = apiGalaxy.libraries.get_folders(library_id=library_id,name=u"/{0}".format(folder_name))
+ if len(folder) == 0:
+ logger.info("not found. creating it..")
+ try:
+ folder = apiGalaxy.libraries.create_folder(library_id,folder_name)
+ except:
+ logger.critical("impossible to create folder {0}".format(folder_name))
+ sys.exit()
+
+
+ folder_id = folder[0].get('id',None)
+
+ return library_id,folder_id
+
+def get_attributes(irods_path):
+ cmd = ['imeta', 'ls', '-ld', irods_path]
+ imeta = [i.splitlines() for i in irods.__irods_check_output(cmd).split('----')]
+ attributes = {}
+ for i in imeta:
+ del i[0]
+ for a in i:
+ if 'attribute' in a:
+ key = a.split(':')[1].strip()
+ if 'value' in a:
+ value = a.split(':')[1].strip()
+ attributes[key] = value
+ return attributes
+
+def init_logger(args,logging):
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {
+ 'format' : LOG_FORMAT,
+ 'datefmt' : LOG_DATEFMT,
+ 'level' : log_level}
+
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+
+ logger = logging.getLogger( __name__ )
+ return logger
+
+def init_config(args):
+ # Load YAML configuration file
+ logger.info('loading YAML configuration file: %s' % args.ini_file)
+ try:
+ ini_file = yaml.load(open(args.ini_file))
+ except:
+ logger.critical('%s is not a valid YAML configuration file' %args.ini_file)
+ sys.exit()
+
+ return ini_file
+
+def init_pythonpath(args,ini_file):
+ logger.info('exporting pythonpath')
+ sys.path.reverse()
+ sys.path.append('/SHARE/USERFS/els7/users/galaxy/develop/usr-cluster/lib/python2.7/site-packages/')
+ sys.path.append('/u/galaxy/.local/lib/python2.7/site-packages/poster-0.8.1-py2.7.egg')
+ sys.path.append('/SHARE/USERFS/els7/users/sequencing/usr-cluster/lib/python2.7/site-packages/automator-0.1-py2.7.egg')
+ sys.path.append("{0}/{1}".format(ini_file['PYTHPATH'],args.host.split('.')[0]))
+ sys.path.reverse()
+
+ global KB
+ from bl.vl.kb import KnowledgeBase as KB
+
+ global irods
+ import automator.agent.irods as irods
+
+ #global bioblend
+ #import bioblend
+
+ global GalaxyInstance
+ from bioblend.galaxy import GalaxyInstance
+
+def init_omero_biobank(args):
+ logger.info('opening kb connection to {0}'.format(args.host))
+
+ try:
+ kb = KB(driver='omero')(args.host, args.user, args.passwd)
+ return kb
+ except:
+ logger.critical('connection refused or failed')
+ sys.exit()
+
+def init_api_galaxy(args):
+ try:
+ galaxy_host = args.galaxy_host or ini_file['GALAXY_HOST_{0}'.format(args.host.split('.')[0].upper())]
+ api_key = args.galaxy_api_key
+ except KeyError, ke:
+ msg = 'No argument passed and no global variable %s found' % ke
+ logger.critical(msg)
+ sys.exit(msg)
+
+
+ logger.info('opening connection to %s with key %s' %(galaxy_host,api_key) )
+ apiGalaxy = GalaxyInstance(galaxy_host, key=api_key)
+ return apiGalaxy
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/library/import_to_library.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/library/import_to_library.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,56 @@
+
+ import DataSet within galaxy library/vl
+
+ irods
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=import_to_library.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --galaxy_api_key=$__user_api_key__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --data_objects=${data_objects}
+ --library=${library_folder}
+ --folder=${library_folder}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/library/init_file.yaml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/library/init_file.yaml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,7 @@
+GALAXY_HOST_BIOBANK04: 'http://test.galaxy.crs4.it/develop/'
+GALAXY_HOST_BIOBANK18: 'http://orione.crs4.it'
+PYTHPATH: '/SHARE/USERFS/els7/users/biobank/lib'
+LIBRARY_IMPORT_DIR_BIOBANK04: '/SHARE/USERFS/els7/users/galaxy/develop/user_library_import_dir'
+LIBRARY_IMPORT_DIR_BIOBANK18: ''
+LOG_FOLDER: '/SHARE/USERFS/els7/users/galaxy/tmp/logs'
+TMP_DIR: '/SHARE/USERFS/els7/users/galaxy/tmp'
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/library/launcher.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/library/launcher.sh Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+CMD=""
+PYTH_PATH="PYTHONPATH=/SHARE/USERFS/els7/users/galaxy/develop/usr-cluster/lib/p\
+ython2.7/site-packages/:/SHARE/USERFS/els7/users/biobank/lib/"
+runner="$(dirname ${BASH_SOURCE[0]})/"
+until [ -z $1 ]
+ do
+
+ opt_host='--host='
+ opt_user='--user='
+ opt_passwd='--passwd='
+ opt_interpreter='--interpreter='
+ opt_runner='--runner='
+ if [[ $1 == $opt_host* ]]; then
+ host=`echo $1 | cut -d '=' -f2 | cut -d '.' -f1`
+ if [ -z $host -o $host == 'None' ]; then
+ echo 'ERROR. Missing omero host parameter. Please, set Omero Host in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ PYTH_PATH+=$host
+ HOST=`echo $1 | cut -d '=' -f2`
+ CMD+=' '$1
+ elif [[ $1 == $opt_user* ]]; then
+ user=`echo $1 | cut -d '=' -f2`
+ if [ -z $user -o $user == 'None' ]; then
+ echo 'ERROR. Missing omero user parameter. Please, set Omero User in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ CMD+=' '$1
+ elif [[ $1 == $opt_passwd* ]]; then
+ passwd=`echo $1 | cut -d '=' -f2`
+ if [ -z $passwd -o $passwd == 'None' ]; then
+ echo 'ERROR. Missing omero password parameter. Please, set Omero Password in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ CMD+=' '$1
+ elif [[ $1 == $opt_runner* ]]; then
+ runner+=`echo $1 | cut -d '=' -f2`
+ elif [[ $1 == $opt_interpreter* ]]; then
+ interpreter=`echo $1 | cut -d '=' -f2`
+ else
+ CMD+=' '$1
+ fi
+ shift
+done
+export $PYTH_PATH/:$PYTHONPATH
+profile="/SHARE/USERFS/els7/users/biobank/lib/${HOST}.biobank.profile"
+if [ -f $profile ]; then
+ source $profile
+ CMD=$interpreter' '$runner$CMD
+ $CMD
+else
+ echo "ERROR. Biobank profile file doesn't exist. Please, check Omero Host in your user preferences" > /dev/null >&2
+ exit -1
+fi
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/all_enrollments.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/all_enrollments.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,89 @@
+import csv, os, sys, argparse
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='Retrieve all enrollments')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS,
+ help='logger level', default='INFO')
+ parser.add_argument('--host', type=str, help='omero hostname')
+ parser.add_argument('--user', type=str, help='omero user')
+ parser.add_argument('--passwd', type=str, help='omero password')
+ parser.add_argument('--ofile', type=str, help='output file path',
+ required=True)
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ # This is a temporary hack!!!
+ to_be_ignored = ['IMMUNOCHIP_DISCARDED', 'CASI_MS_CSM_TMP',
+ 'CASI_MS_CSM_CODES']
+
+ logger = get_logger('all_enrollments', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ try:
+ out_file_path = args.ofile
+ except IndexError:
+ logger.error('Mandatory field missing.')
+ parser.print_help()
+ sys.exit(2)
+
+ # Create the KnowledgeBase object
+ kb = KB(driver='omero')(host, user, passwd)
+
+ # Retrieve all studies from omero
+ studies = kb.get_objects(kb.Study)
+ studies = [s for s in studies if s.label not in to_be_ignored]
+ logger.info('Retrieved %d studies from database' % len(studies))
+
+ csv_header = ['individual_uuid']
+ enrolls_map = {}
+
+ # For each study, retrieve all enrollments
+ for s in studies:
+ logger.info('Retrieving enrollments for study %s' % s.label)
+ enrolls = kb.get_enrolled(s)
+ logger.info('%s enrollments retrieved' % len(enrolls))
+ if len(enrolls) > 0:
+ logger.debug('Building lookup dictionary....')
+ for e in enrolls:
+ enrolls_map.setdefault(e.individual.omero_id, {})['individual_uuid'] = e.individual.id
+ enrolls_map[e.individual.omero_id].setdefault('studies', {})
+ enrolls_map[e.individual.omero_id]['studies'].setdefault(s.label,[])
+ enrolls_map[e.individual.omero_id]['studies'][s.label].append(e.studyCode)
+ label = "{0} #{1}".format(s.label,len(enrolls_map[e.individual.omero_id]['studies'][s.label]))
+ enrolls_map[e.individual.omero_id][label] = e.studyCode
+ if label not in csv_header:
+ csv_header.append(label) # Add study label to CSV header
+ else:
+ logger.debug('No enrollments found, skip study %s' % s.label)
+
+ # Write to CSV file
+ logger.debug('Writing CSV file %s' % out_file_path)
+ with open(out_file_path, 'w') as f:
+ writer = csv.DictWriter(f, csv_header,
+ delimiter='\t', quotechar='"',
+ restval = 'None')
+ writer.writeheader()
+ for k, v in enrolls_map.iteritems():
+ v.pop("studies",{})
+ writer.writerow(v)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/all_enrollments.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/all_enrollments.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,29 @@
+
+
+ Retrieve all enrollments codes from Omero server
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=all_enrollments.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --ofile=${output1}
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+ It will output a tsv files with the following columns:
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/build_miniped.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/build_miniped.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,216 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+"""
+A rough example of basic pedigree info generation.
+"""
+
+import argparse
+import collections
+import csv
+import os
+import sys
+import yaml
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.kb.drivers.omero.ehr import EHR
+import bl.vl.individual.pedigree as ped
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+PLINK_MISSING = -9
+PLINK_UNAFFECTED = 1
+PLINK_AFFECTED = 2
+
+FIELDS = ["fam_label", "ind_label", "fat_label", "mot_label", "gender"]
+
+
+def load_config(config_file):
+ with open(config_file) as cfg:
+ conf = yaml.load(cfg)
+ return conf
+
+
+class Diagnosis(object):
+ def __init__(self, logger, yaml_file):
+ self.logger = logger
+ if os.path.isfile(yaml_file):
+ self.conf = load_config(yaml_file)
+ else:
+ self.logger.critical('The config file {} does not exists'.format(
+ yaml_file))
+ sys.exit()
+
+ def get_openehr_data(self):
+ return self.conf['openEHR']['archetype'], self.conf['openEHR']['field']
+
+ def get_diagnosis_label(self):
+ return [l for l in self.get_diagnosis().iterkeys()]
+
+ def get_diagnosis(self):
+ results = collections.OrderedDict()
+ diagnosis = collections.OrderedDict(sorted(
+ self.conf['diagnosis'].items()))
+ for v in diagnosis.itervalues():
+ results[v['label']] = v['values']
+ return results
+
+ def get_unaffected_diagnosis_dictionary(self):
+ labels = self.get_diagnosis_label()
+ d = {}
+ for k in labels:
+ d[k] = 1
+ return d
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(
+ description='build the first columns of a ped file from VL')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--configfile', type=str, default=os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), 'build_miniped.yaml'),
+ help='config file (yaml) with diagnosis dictionary')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-S', '--study', type=str, required=True,
+ help="a list of comma separated studies used to "
+ "retrieve individuals that will be written to "
+ "ped file")
+ parser.add_argument('--ofile', type=str, help='output file path',
+ required=True)
+ parser.add_argument('--write_header', action='store_true', default=False,
+ help='Write header into the output file')
+ return parser
+
+
+def build_families(individuals, logger):
+ # Individuals with only one parent will be considered like founders
+ # for i in individuals:
+ # if ((i.mother is None) or (i.father is None)):
+ # i.mother = None
+ # i.father = None
+ logger.info("individuals: %d" % len(individuals))
+ # logger.info("individuals: with 0 or 2 parents: %d" % len(not_one_parent))
+ logger.info("analyzing pedigree")
+ founders, non_founders, dangling, couples, children = ped.analyze(
+ individuals
+ )
+ logger.info("splitting into families")
+ return ped.split_disjoint(individuals, children)
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('build_miniped', level=args.loglevel,
+ filename=args.logfile)
+
+ dobj = Diagnosis(logger, args.configfile)
+ logger.debug("l {}".format(dobj.get_diagnosis_label()))
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ logger.debug('Loading all individuals from omero')
+ all_inds = kb.get_objects(kb.Individual) # store all inds to cache
+ logger.debug('%d individuals loaded' % len(all_inds))
+ studies = [kb.get_study(s) for s in args.study.split(',')]
+ # Removing None values
+ studies = set(studies)
+ try:
+ studies.remove(None)
+ except KeyError:
+ pass
+ studies = list(studies)
+ # Sorting studies
+ studies = sorted(studies, key=lambda k: k.label.lower())
+ if len(studies) == 0:
+ logger.error(
+ 'No matches found for labels %s, stopping program' % args.study)
+ sys.exit(2)
+ enrolled_map = {}
+ for study in studies:
+ logger.info('Loading enrolled individuals for study %s' % study.label)
+ enrolled = kb.get_enrolled(study)
+ logger.debug('%d individuals loaded' % len(enrolled))
+ for en in enrolled:
+ if en.individual.id not in enrolled_map:
+ enrolled_map[en.individual.id] = (
+ '%s:%s' % (en.study.label, en.studyCode),
+ en.individual)
+ else:
+ logger.debug('Individual %s already mapped' % en.individual.id)
+ logger.debug('Loading EHR records')
+ ehr_records = kb.get_ehr_records()
+ logger.debug('%s EHR records loaded' % len(ehr_records))
+ ehr_records_map = {}
+ for r in ehr_records:
+ ehr_records_map.setdefault(r['i_id'], []).append(r)
+ affection_map = {}
+ arch, field = dobj.get_openehr_data()
+ for ind_id, ehr_recs in ehr_records_map.iteritems():
+ affection_map[ind_id] = dobj.get_unaffected_diagnosis_dictionary()
+ ehr = EHR(ehr_recs)
+ for k, v in dobj.get_diagnosis().iteritems():
+ for d in v:
+ if ehr.matches(arch, field, d):
+ affection_map[ind_id][k] = PLINK_AFFECTED
+
+ immuno_inds = [i for (ind_id, (st_code, i)) in enrolled_map.iteritems()]
+ families = build_families(immuno_inds, logger)
+ logger.info("found %d families" % len(families))
+
+ def resolve_label(i):
+ try:
+ return enrolled_map[i.id][0]
+ except KeyError:
+ return i.id
+
+ def resolve_pheno(i):
+ try:
+ immuno_affection = affection_map[i.id]
+ except KeyError:
+ return [(d, PLINK_MISSING) for d in dobj.get_diagnosis_label()]
+ return [(d, immuno_affection[d]) for d in dobj.get_diagnosis_label()]
+
+ kb.Gender.map_enums_values(kb)
+ gender_map = lambda x: 2 if x.enum_label() == kb.Gender.FEMALE.enum_label() \
+ else 1
+
+ for d in dobj.get_diagnosis_label():
+ FIELDS.append("_".join([d, "status"]))
+ with open(args.ofile, "w") as f:
+ writer = csv.DictWriter(f, FIELDS, delimiter="\t", lineterminator="\n")
+ if args.write_header:
+ writer.writeheader()
+ families_data = []
+ logger.info("building families data")
+ for k, fam in enumerate(families):
+ fam_label = "FAM_%d" % (k + 1)
+ for i in fam:
+ r = {"fam_label": fam_label,
+ "ind_label": resolve_label(i),
+ "fat_label": 0 if (i.father is None or i.father not in fam)
+ else resolve_label(i.father),
+ "mot_label": 0 if (i.mother is None or i.mother not in fam)
+ else resolve_label(i.mother),
+ "gender": gender_map(i.gender)}
+ for p in resolve_pheno(i):
+ r["_".join([p[0], "status"])] = p[1]
+ families_data.append(r)
+ logger.info("writing miniped")
+ writer.writerows(families_data)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/build_miniped.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/build_miniped.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,58 @@
+
+
+ Build a reduced ped file from Omero server
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=build_miniped.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ #if $study
+ --study=${study}
+ #end if
+ --ofile=${output1}
+ --logfile=${logfile}
+ #if $write_header
+ --write_header
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It will output a tsv files with a column of codes for each groups of samples.
+
+ The labels of the columns are:
+
+ family
+
+ individual enrollment code (STUDY:CODE)
+
+ father enrollment code (STUDY:CODE)
+
+ mother enrollment code (STUDY:CODE)
+
+ gender
+
+ T1D affection status
+
+ MS affection status
+
+ Nefro affection status
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/build_miniped.yaml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/build_miniped.yaml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,18 @@
+openEHR:
+ archetype: openEHR-EHR-EVALUATION.problem-diagnosis.v1
+ field: at0002.1
+diagnosis:
+ 1:
+ label: t1d
+ values:
+ - icd10-cm:E10
+ 2:
+ label: ms
+ values:
+ - icd10-cm:G35
+ 3:
+ label: nefro
+ values:
+ - icd10-cm:E23.2
+ - icd10:N00-N08
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/check_merge_individuals.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/check_merge_individuals.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,104 @@
+import sys, csv, argparse, os
+from collections import Counter
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='check data that will be passed to the merge_individuals tool')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file')
+ return parser
+
+
+def get_invalid_vids(records, logger):
+ records_map = {}
+ invalid_vids = []
+
+ for rec in records:
+ for k,v in rec.iteritems():
+ records_map.setdefault(k, []).append(v)
+ # Check for duplicated sources
+ ct = Counter()
+ for x in records_map['source']:
+ ct[x] += 1
+ for k, v in ct.iteritems():
+ if v > 1:
+ logger.error('ID %s appears %d times as source, this ID has been marked as invalid' % (k, v))
+ invalid_vids.append(k)
+ # Check for VIDs that appear bots in 'source' and 'target' fields
+ sources = set(records_map['source'])
+ targets = set(records_map['target'])
+ commons = sources.intersection(targets)
+ for c in commons:
+ logger.error('ID %s appears both in \'source\' and \'target\' columns, this ID has been marked as invalid' % c)
+ invalid_vids.append(c)
+
+ return set(invalid_vids)
+
+
+def check_row(row, individuals, logger):
+ try:
+ source = individuals[row['source']]
+ logger.debug('%s is a valid Individual ID' % source.id)
+ target = individuals[row['target']]
+ logger.debug('%s is a valid Individual ID' % target.id)
+ return True
+ except KeyError, ke:
+ logger.error('%s is not a valid Individual ID' % ke)
+ return False
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('check_merge_individuals', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Preloading all individuals')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('Loaded %d individuals' % len(inds))
+ inds_map = {}
+ for i in inds:
+ inds_map[i.id] = i
+
+ with open(args.in_file) as infile, open(args.out_file, 'w') as outfile:
+ reader = csv.DictReader(infile, delimiter='\t')
+ records = [row for row in reader]
+ invalid_vids = get_invalid_vids(records, logger)
+
+ writer = csv.DictWriter(outfile, reader.fieldnames, delimiter='\t')
+ writer.writeheader()
+
+ for record in records:
+ if record['source'] in invalid_vids or record['target'] in invalid_vids:
+ logger.error('Skipping record %r because at least one ID was marked as invalid' % record)
+ else:
+ if check_row(record, inds_map, logger):
+ writer.writerow(record)
+ logger.debug('Record %r written in output file' % record)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/check_merge_individuals.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/check_merge_individuals.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,41 @@
+
+
+ Verify data that will be passed to the merge_individuals tool
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=check_merge_individuals.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --in_file=${input1}
+ --out_file=${output1}
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
+
+-----
+
+It will check merging individuals info using informations from a file like this::
+
+ source target
+ V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/check_update_parents_data.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/check_update_parents_data.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,113 @@
+import sys, csv, argparse, logging, os
+from collections import Counter
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='check data that will be passed to the update_parents tool')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file')
+ return parser
+
+
+def check_row(row, individuals_map, kb, logger):
+ logger.debug('Checking record %r' % row)
+ try:
+ ind = individuals_map[row['individual']]
+ logger.info('%s is a valid Individual ID' % ind.id)
+ if row['father'] != 'None':
+ father = individuals_map[row['father']]
+ logger.info('%s is a valid Individual ID' % father.id)
+ check_gender(father, kb.Gender.MALE)
+ logger.info('Gender check passed')
+ else:
+ logger.info('None value, no check required')
+ if row['mother'] != 'None':
+ mother = individuals_map[row['mother']]
+ logger.info('%s is a valid Individual ID' % mother.id)
+ check_gender(mother, kb.Gender.FEMALE)
+ logger.info('Gender check passed')
+ else:
+ logger.info('None value, no check required')
+ return True
+ except KeyError, ke:
+ logger.error('%s is not a valid Individual ID, rejecting row' % ke)
+ return False
+ except ValueError, ve:
+ logger.error(ve)
+ return False
+
+
+def check_gender(individual, gender):
+ if individual.gender.enum_label() != gender.enum_label():
+ raise ValueError('Gender for individual %s is %s, expected %s, rejecting row' % (individual.id,
+ individual.gender.enum_label(),
+ gender.enum_label()))
+ else:
+ pass
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('check_update_parents_data', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Preloading all individuals from the system')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('%d individuals loaded' % len(inds))
+ inds_lookup = {}
+ for i in inds:
+ inds_lookup[i.id] = i
+
+ with open(args.in_file) as infile, open(args.out_file, 'w') as outfile:
+ reader = csv.DictReader(infile, delimiter='\t')
+ records = list(reader)
+ logger.info('Check for duplicated in \'individual\' column')
+ recs_by_ind = {}
+ for rec in records:
+ recs_by_ind.setdefault(rec['individual'], []).append(rec)
+ ct = Counter()
+ duplicated = []
+ for k,v in recs_by_ind.iteritems():
+ if len(v) > 1:
+ duplicated.append(k)
+ for dupl in duplicated:
+ logger.info('Individual %s is a duplicated' % dupl)
+ for r in recs_by_ind.pop(dupl):
+ logger.info('Removing record %r' % r)
+ good_records = sum(recs_by_ind.itervalues(), [])
+ logger.info('Duplicated check completed')
+ writer = csv.DictWriter(outfile, reader.fieldnames, delimiter='\t')
+ writer.writeheader()
+ logger.info('Checking records')
+ for row in good_records:
+ if check_row(row, inds_lookup, kb, logger):
+ writer.writerow(row)
+ logger.debug('Record %r written in output file' % row)
+ logger.info('Records check completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/check_update_parents_data.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/check_update_parents_data.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,42 @@
+
+
+ Verify data that will be passed to the update_parents tool
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=check_update_parents_data.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --in_file=${input1}
+ --out_file=${output1}
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
+
+-----
+
+It will check parental info of individual using informations from a file like this::
+
+ individual father mother
+ V08E18411BC66F4987BCA43EFC6F636224 None None
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/convert_sam.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/convert_sam.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.snp_manager.main import main
+
+main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/convert_sam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/convert_sam.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,40 @@
+
+ converter
+
+ launcher.sh
+ --interpreter=python
+ --runner=convert_sam.py
+ --logfile ${log_file} convert_sam -i ${input_file}
+ -o ${output_file} --reftag ${dbkey} --output-format ${output_type}
+ ## FIXME: find a way to import the default from the relevant module
+ --flank-size 125
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool converts SAM alignment data to VL marker alignment or Galaxy
+extract genomic DNA input.
+
+Expects single-end BWA alignment data produced by the previous steps
+in the workflow (see markers_to_fastq).
+
+**NOTE:** if the marker_alignment output format is selected, the
+Database/Build property must be set in the input SAM file.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/enrollments_by_platewells.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/enrollments_by_platewells.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,77 @@
+"""
+From a list of platewells retrieves the enrollments code of the connected individual
+"""
+import argparse
+import csv
+import sys
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='From platewells 2 enrollments code')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-S', '--study', type=str, required=True,
+ help='a study used to retrieve individuals')
+ parser.add_argument('--ifile', type=str, required=True,
+ help='list of platewells used to fetch data')
+ parser.add_argument('--ofile', type=str, help='output file path',
+ required=True)
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('pws2enrolls', level=args.loglevel,
+ filename=args.logfile)
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ study = kb.get_study(args.study)
+ enrolled_map = {e.individual.id:e for e in kb.get_enrolled(study)}
+ logger.info('Loaded {} enrolled individuals for study {}'.format(len(enrolled_map), study.label))
+ plates = kb.get_objects(kb.TiterPlate)
+ logger.info('Loaded {} plates'.format(len(plates)))
+ pws_map = {':'.join([w.container.barcode, w.label]):w for w in kb.get_objects(kb.PlateWell)
+ if w.container.barcode}
+ logger.info('Loaded {} platewells'.format(len(pws_map)))
+ #records = []
+ of=open(args.ofile, 'w')
+ writer=csv.DictWriter(of, ['platewell', 'status', 'enrollment'],
+ delimiter='\t', quotechar='"', restval='None')
+ writer.writeheader()
+ with open(args.ifile, 'r') as f:
+ reader=csv.DictReader(f, delimiter='\t')
+ logger.info('Searching individuals connected to the platewells')
+ for r in reader:
+ ind=kb.dt.get_connected(pws_map[r['platewell']], kb.Individual,
+ kb.dt.DIRECTION_INCOMING)
+ try:
+ record = {'platewell': r['platewell'],
+ 'status': pws_map[r['platewell']].status.enum_label(),
+ 'enrollment': ':'.join([study.label,enrolled_map[ind[0].id].studyCode])}
+ except KeyError as e:
+ logger.warning('not enrolled {}'.format(r['platewell']))
+ record = {'platewell': r['platewell'],
+ 'status': pws_map[r['platewell']].status.enum_label(),
+ 'enrollment': ':'.join([study.label,'not_enrolled'])}
+ writer.writerow(record)
+ of.close()
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/enrollments_by_platewells.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/enrollments_by_platewells.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,66 @@
+
+
+ From a list of platewells retrieves the enrollments code of the connected individual
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=enrollments_by_platewells.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --logfile=${logfile}
+ --ifile=$input
+ --ofile=${output}
+ #if str($study_label) != 'no_study'
+ --study=${study_label}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool, from a list of platewells retrieves the enrollments code of the connected individual.
+
+Input file must be in TABULAR format and like::
+
+ platewell
+ A9033P3B:C09
+ A9033P3B:G09
+ A9033P3B:G10
+ A9033P3B:H05
+ A9033WRT:E08
+ A9033WRT:E10
+ A9033WRT:F03
+ A9033WRT:F04
+ ...
+
+Output file will be like::
+
+ platewell status enrollment
+ A9033P3B:C09 DISCARDED AUTOIMMUNITY:X3M6XP517
+ A9033P3B:G09 DISCARDED AUTOIMMUNITY:RYMRK2NLJ
+ A9033P3B:G10 DISCARDED AUTOIMMUNITY:OV13V99M9
+ A9033P3B:H05 DISCARDED AUTOIMMUNITY:OV13ZQK19
+ A9033WRT:E08 DISCARDED AUTOIMMUNITY:7GMWNX9M8
+ A9033WRT:E10 DISCARDED AUTOIMMUNITY:R3MKP0GL4
+ A9033WRT:F03 DISCARDED AUTOIMMUNITY:N1VD2Q915
+ A9033WZT:A04 CONTENTUSABLE AUTOIMMUNITY:210JRG4MW
+ ...
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/flowcell_samplesheet.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/flowcell_samplesheet.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,107 @@
+
+
+ Build the samplesheet for a given FlowCell
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=kb_query.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --ofile=${outfile}
+ flowcell_samplesheet
+ #if $namespace.ignore_namespace
+ --flowcell=${flowcell}
+ --ignore_namespace
+ #else
+ --flowcell="${namespace.namespace_value}|${flowcell}"
+ #end if
+ #if $remove_namespaces
+ --remove_namespaces
+ #end if
+ #if $add_sample_label
+ --sample_label
+ #end if
+ --separator=${csv_separator}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Export data related to a FlowCell identified by the Flowcell ID field
+into a csv file like
+
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator
+
+ foofc1,1,v012aa2,hg19,ATCACG,,WG,john doe
+
+ foofc1,2,v0441a1,hg19,GATCAG,,EXOME,john doe
+
+ foofc1,2,v021441,hg19,TAGCTT,,WG,john doe
+
+ ...
+
+If the checkbox "Add sample labels" is enabled, the output file will
+have a new column at the end of each row with the Label of the sample
+like
+
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleLabel
+
+ foofc1,1,v012aa2,hg19,ATCACG,,WG,john doe,foosmp1
+
+ foofc1,2,v0441a1,hg19,GATCAG,,EXOME,john doe,foosmp2
+
+ foofc1,2,v021441,hg19,TAGCTT,,WG,john doe,foosmp3
+
+ ...
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/get_kinship_input.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_kinship_input.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,86 @@
+
+
+ Build input files for kinship MR application
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=kb_query.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --ofile=${output_gen}
+ extract_gt
+ #if str($study) != 'no_study'
+ --study=${study}
+ #end if
+ --out_samples_list=${output_sl}
+ --marker_set=${mset}
+ #if $transpose_output
+ --transpose_output
+ #end if
+ --compress_output
+ --compression_level=${compression_level}
+ #if $ignore_duplicated
+ --ignore_duplicated
+ #end if
+ #if $enable_debug
+ --loglevel=DEBUG
+ #if str($data_collection) != 'no_collection'
+ --data_collection=${data_collection}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/get_parents_from_sibling.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_parents_from_sibling.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,89 @@
+'''
+From a file like this
+individual sibling
+V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327
+V0FAE2B10F690041509739A3F4B314DC8F V00875417B31684EC2A62EE37717913445
+V0382EF862AA4B475697C95D3777043239 V08E376727ED8E4B369DAA3B62A9395E1B
+....
+
+retrieve indivual's parents using sibling informations and build a file like
+
+individual father mother
+V08E18411BC66F4987BCA43EFC6F636224 V027DE334753424F07B81A70053EF5B873 V035222CAEE0474AFEBB9A161D4B64914E
+V0FAE2B10F690041509739A3F4B314DC8F V0E966B53BDCC942C09D6B6D96DE98F4F4 V0F7B6926C6FBE4F0BB38BBC6CFB13A825
+....
+
+'''
+
+import sys, csv, argparse, logging, os
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='retrieve parents information using sibling')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero passwd')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file with individual-sibling couples')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file with parents information')
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('get_parents_from_sibling', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving individuals')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('Retrieved %d individuals' % len(inds))
+ inds_lookup = {}
+ for ind in inds:
+ inds_lookup[ind.id] = ind
+
+ with open(args.in_file) as infile:
+ reader = csv.DictReader(infile, delimiter='\t')
+ records = []
+ for row in reader:
+ try:
+ sib = inds_lookup[row['sibling']]
+ rec = {'individual' : row['individual'],
+ 'father' : sib.father.id if sib.father else 'None',
+ 'mother' : sib.mother.id if sib.mother else 'None'}
+ logger.info('Individual %s, father: %s - mother: %s' % (row['individual'],
+ rec['father'],
+ rec['mother']))
+ records.append(rec)
+ except KeyError:
+ logger.error('Unable to find individual %s' % row['sibling'])
+
+ logger.info('Retrieved parents for %d individuals' % len(records))
+
+ with open(args.out_file, 'w') as outfile:
+ writer = csv.DictWriter(outfile, ['individual', 'father', 'mother'],
+ delimiter='\t')
+ writer.writeheader()
+ writer.writerows(records)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/get_parents_from_sibling.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_parents_from_sibling.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,50 @@
+
+
+ Retrieve individual's parents using sibling informations
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=get_parents_from_sibling.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --in_file=${input1}
+ --out_file=${output1}
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
+
+-----
+
+It will retrieve indivual's parents using sibling informations from a file like this::
+
+ individual sibling
+ V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327
+ V0FAE2B10F690041509739A3F4B314DC8F V00875417B31684EC2A62EE37717913445
+ V0382EF862AA4B475697C95D3777043239 V08E376727ED8E4B369DAA3B62A9395E1B
+
+and build a tsv file like this::
+
+ individual father mother
+ V08E18411BC66F4987BCA43EFC6F636224 None None
+ V0FAE2B10F690041509739A3F4B314DC8F V07282522B89FC4F7CA08094537A13C0D1 V09D459311D1254095AE9F00B45E5A101E
+ V0382EF862AA4B475697C95D3777043239 V04CD9561F753F4853838E2E96819AAAC0 V0382EF862AA4B475697C95D3777043239
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/get_studies_details.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_studies_details.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,59 @@
+'''
+'''
+
+import argparse, csv, sys
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='retrieve studies details')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero passwd')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file with studies details')
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('get_studies', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving studies')
+ studies = kb.get_objects(kb.Study)
+ logger.info('Retrieved {} studies'.format(len(studies)))
+ records = []
+ for s in studies:
+ enr = kb.get_enrolled(s)
+ rec = {'label': s.label,
+ 'description': s.description,
+ 'enrolled': len(enr)}
+ records.append(rec)
+
+ with open(args.out_file, 'w') as outfile:
+ writer = csv.DictWriter(outfile, ['label', 'description', 'enrolled'],
+ delimiter='\t')
+ # writer.writeheader()
+ writer.writerows(sorted(records, key=lambda key: key['label']))
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/get_studies_details.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/get_studies_details.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,29 @@
+
+
+ Retrieve studies details
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=get_studies_details.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --out_file=${output1}
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+It will produce a list of the studies known by the biobank server
+
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/global_stats.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/global_stats.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,46 @@
+
+
+ Provide global statistics for a given study.
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=kb_query.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ofile=${output1}
+ --logfile=${logfile}
+ global_stats
+ #if str($study) != 'all_known_studies'
+ --study=${study}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It will output a tsv files with the following columns::
+
+ study diagnosis technology gender counts
+ BSTUDY icd10-cm:G35 AffymetrixCelGENOMEWIDESNP_6 MALE 1
+ BSTUDY icd10-cm:E10 AffymetrixCelGENOMEWIDESNP_6 FEMALE 1
+ BSTUDY local:at0.3 AffymetrixCelGENOMEWIDESNP_6 MALE 2
+ BSTUDY icd10-cm:G35;icd10-cm:E10;icd10-cm:E10 AffymetrixCelGENOMEWIDESNP_6 MALE 1
+ BSTUDY icd10-cm:G35 AffymetrixCelGENOMEWIDESNP_6 FEMALE 1
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/gstudio_datasheet.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/gstudio_datasheet.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,46 @@
+
+
+ Build a Genome Studio datasheet for the given plate
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=kb_query.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --ofile=${outfile}
+ gstudio_datasheet
+ #if str($plate) != 'no_plate'
+ --plate=${plate}
+ --manifest=${manifest}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Build a Genome Studio datasheet for the given plate
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/kb_query.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/kb_query.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,9 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.kb_query.main import main as kb_query
+
+kb_query(sys.argv[1:])
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/launcher.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/launcher.sh Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+CMD=""
+PYTH_PATH="PYTHONPATH=/SHARE/USERFS/els7/users/galaxy/develop/usr-cluster/lib/p\
+ython2.7/site-packages/:/SHARE/USERFS/els7/users/biobank/lib/"
+runner="$(dirname ${BASH_SOURCE[0]})/"
+until [ -z $1 ]
+ do
+
+ opt_host='--host='
+ opt_user='--user='
+ opt_passwd='--passwd='
+ opt_interpreter='--interpreter='
+ opt_runner='--runner='
+ if [[ $1 == $opt_host* ]]; then
+ host=`echo $1 | cut -d '=' -f2 | cut -d '.' -f1`
+ if [ -z $host -o $host == 'None' ]; then
+ echo 'ERROR. Missing omero host parameter. Please, set Omero Host in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ PYTH_PATH+=$host
+ HOST=`echo $1 | cut -d '=' -f2`
+ CMD+=' '$1
+ elif [[ $1 == $opt_user* ]]; then
+ user=`echo $1 | cut -d '=' -f2`
+ if [ -z $user -o $user == 'None' ]; then
+ echo 'ERROR. Missing omero user parameter. Please, set Omero User in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ CMD+=' '$1
+ elif [[ $1 == $opt_passwd* ]]; then
+ passwd=`echo $1 | cut -d '=' -f2`
+ if [ -z $passwd -o $passwd == 'None' ]; then
+ echo 'ERROR. Missing omero password parameter. Please, set Omero Password in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ CMD+=' '$1
+ elif [[ $1 == $opt_runner* ]]; then
+ runner+=`echo $1 | cut -d '=' -f2`
+ elif [[ $1 == $opt_interpreter* ]]; then
+ interpreter=`echo $1 | cut -d '=' -f2`
+ else
+ CMD+=' '$1
+ fi
+ shift
+done
+export $PYTH_PATH/:$PYTHONPATH
+profile="/SHARE/USERFS/els7/users/biobank/lib/${HOST}.biobank.profile"
+if [ -f $profile ]; then
+ source $profile
+ CMD=$interpreter' '$runner$CMD
+ $CMD
+else
+ echo "ERROR. Biobank profile file doesn't exist. Please, check Omero Host in your user preferences" > /dev/null >&2
+ exit -1
+fi
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/lookup_index.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/lookup_index.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.snp_manager.main import main
+
+main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/lookup_index.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/lookup_index.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,29 @@
+
+ lookup dbSNP index
+
+ lookup_index.py --logfile ${log_file} lookup_index -i ${input_file}
+ -o ${output_file}
+ --index-file "${ filter( lambda x: str( x[0] ) == str( $indices ), $__app__.tool_data_tables[ 'dbsnp_indexes' ].get_fields() )[0][-1] }"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool reads a Galaxy genome segment extractor output in interval
+format and performs a lookup in the selected dbSNP index to get the
+true rs label. It outputs a new marker definitions file with the true
+rs labels and masks.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/map_to_collection.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/map_to_collection.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,70 @@
+
+
+ Map a list of objects (vessels or data samples) to the specified
+ collection.
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=kb_query.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --ofile=${outfile}
+ map_to_collection
+ --ifile=${infile}
+ #if str($field_label) != ''
+ --field_label=${field_label}
+ #end if
+ #if str($collection_selector.collection_type) != 'no_coll_selected'
+ --collection_type=${collection_selector.collection_type}
+ #if str($collection_selector.collection_type) == 'DataCollection'
+ #if str($collection_selector.dcoll_label) != 'no_label_selected'
+ --collection_label=${collection_selector.dcoll_label}
+ #end if
+ #elif str($collection_selector.collection_type) == 'VesselsCollection'
+ #if str($collection_selector.vcoll_label) != 'no_label_selected'
+ --collection_label=${collection_selector.vcoll_label}
+ #end if
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Select a Vessels Collection...
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/map_vid.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/map_vid.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,25 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.kb_query.main import main as kb_query
+
+def main(argv):
+ selected_column, new_column_name, input_file = argv[:3]
+ selected_column = int(selected_column) - 1
+ new_column_name = new_column_name.strip()
+
+ # with open(input_file) as f:
+ # l = f.readline().strip()
+ # Backport to 2.6
+ fi = open(input_file)
+ l = fi.readline().strip()
+ fi.close()
+
+ column_names = l.split('\t')
+ column_name = column_names[selected_column]
+
+ argv = argv[3:] + ['--column=%s,%s' % (column_name, new_column_name)]
+ kb_query(argv)
+
+main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/map_vid.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/map_vid.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,145 @@
+
+
+ Map labels of objects known to OMERO.biobank to their VID
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=map_vid.py
+ ${selected_column}
+ ${new_column_name}
+ ${input1}
+ #if $omero_configuration.level == 'advanced'
+ --host=$omero_configuration.vl_host
+ --user=$omero_configuration.vl_user
+ --passwd=$omero_configuration.vl_passwd
+ #else
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ #end if
+ --operator=$__user_email__
+ --ofile=${output1}
+ --logfile=${logfile}
+ map_vid
+ --ifile=${input1}
+ --source-type=${source_type.source_type}
+ #if $source_type.source_type == 'Individual'
+ #if str($source_type.study) != 'use_provided'
+ --study=${source_type.study}
+ #end if
+ #end if
+ #if $strict_mapping
+ --strict-mapping
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+The tool resolves VIDs for the given column and rename the column
+iteself with a new label. Usually to map the items' VIDs the simple
+item label is necessary but in some cases a special syntax is needed:
+
+* for Individual items, if no default study is provided, the pattern
+ to be used is **STUDY:STUDY_LABEL**. If a default study is provided,
+ the column must contain only the STUDY_LABEL
+
+* for PlateWell items the pattern is **PLATE_LABEL:WELL_LABEL**
+
+* for DataCollectionItem items the pattern is
+ **DATA_COLLECTION_LABEL:ITEM_LABEL**
+
+
+
+
+
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/markers_to_fastq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/markers_to_fastq.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.snp_manager.main import main
+
+main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/markers_to_fastq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/markers_to_fastq.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,36 @@
+
+ converter
+
+ launcher.sh
+ --interpreter=python
+ --runner=markers_to_fastq.py --logfile ${log_file} markers_to_fastq
+ -i ${input_file} -o ${output_file}
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool converts VL marker definitions to fastq data.
+
+VL marker definitions files have the following format (spaces are tabs)::
+
+ label rs_label mask
+ SNP_A-1780419 rs6576700 GGATACATTTTATTGC[A/G]CTTGCAGAGTATTTTT
+ SNP_A-1780418 rs17054099 GGATACATTACCCAAA[C/T]GGTCACAGGTCAAAGG
+ SNP_A-1780415 rs7730126 GGATACATCCCCCCCA[A/G]AAAATGAGAATAAAGC
+ ...
+
+Where "label" is a unique identifier, "rs_label" is the dbSNP label
+and "mask" is the SNP's mask in the
+LEFT_FLANK[ALLELE_A/ALLELE_B/...]RIGHT_FLANK format. One fastq record
+is generated for each allele in the mask. The string "None" in the
+rs_label column means there is no rs label for the marker.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/plate_dsamples_details.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/plate_dsamples_details.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,86 @@
+
+
+ Retrieve wells and connected data samples related to a known plate
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=kb_query.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --ofile=${output}
+ plate_data_samples
+ #if str($plate) != 'select_one'
+ --plate=${plate}
+ #end if
+ #if $fetch_all
+ --fetch_all
+ #end if
+ #if str($vcoll_label) != 'no_collection'
+ --vessels_collection=${vcoll_label}
+ #end if
+ #if $vessel_types
+ --ignore_types=${vessel_types}
+ #end if
+ #if str($study_label) != 'no_study'
+ --map_study=${study_label}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Using of the the selectable plates barcode, the tool will generate a
+report file for the plate like::
+
+ PLATE_barcode PLATE_label WELL_label WELL_status DATA_SAMPLE_label
+ XXYYZZKK test_plate A01 CONTENTUSABLE a01_test_sample
+ XXYYZZKK test_plate A02 CONTENTUSABLE X
+ XXYYZZKK test_plate A03 UNKNOWN OR EMPTY X
+ XXYYZZKK test_plate A04 CONTENTUSABLE a04_test_sample
+ XXYYZZKK test_plate A05 DISCARDED X
+ ...
+
+For each plate, all wells will be generated in the output file, even
+the ones not actually recorded into the system, these wells will be
+marked with a 'UNKOWN OR EMPTY' status.
+
+For each well, the tool performs a query in order to find if at least
+one data sample is directly connected to the well itself; if at least
+one is found, the label of the data sample will be placed in the
+DATA_SAMPLE_label column, if no data sample is connected to the well a
+'X' will be placed.
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/query.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/query.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,79 @@
+
+
+ Provides a simplified environment to perform complex queries to
+ BIOBANK.
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=kb_query.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ofile=${output1}
+ --logfile=${logfile}
+ query
+ --group=$study
+ --code-file=$code_file
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${str($query_code).replace('__sq__', "'").replace('__cr____cn__', '\n')}
+
+
+
+
+
+
+
+
+The following is an example of a query that will dump family relations
+within the group::
+
+ writeheader('study', 'id', 'gender', 'father', 'mother')
+ for i in Individuals(group):
+ writerow(group.id, enum_label(i.gender),
+ i.father.id if i.father else 'None',
+ i.mother.id if i.mother else 'None')
+
+
+The next example will prepare a file that could be used to define a
+data collection and then as the input for a genotyping run::
+
+ writeheader('dc_id', 'gender', 'data_sample',
+ 'path', 'mimetype', 'size', 'sha1')
+ for i in Individuals(group):
+ for d in DataSamples(i, 'AffymetrixCel'):
+ for o in DataObjects(d):
+ writerow(group.id, enum_label(i.gender), d.id,
+ o.path, o.mimetype, o.size, o.sha1)
+
+In the examples above, '''group''' (actually a study) corresponds to
+the group whose label is assigned by the '''--group''' flag.
+
+**Note** This is clearly an extremely dangerous tool.
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/select_sub_group.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/select_sub_group.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,129 @@
+
+
+ Selects groups of individuals.
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=kb_query.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --ofile=${output1}
+ --logfile=${logfile}
+ selector
+ #if str($study) != 'use_all'
+ --study=$study
+ #end if
+ --group-label=$group_label
+ --total-number=$total_number
+ --male-fraction=$male_fraction
+ --reference-disease=$reference_diagnosis
+ --control-fraction=$control_fraction
+ #if str($required_datasample) != 'unselect'
+ --required-datasample=$required_datasample
+ #end if
+ #if int($seed) != 0
+ --seed=$seed
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+It will select a group of individuals from a specific group (from all
+avalable individuals, if no group is selected). The selection is
+controlled by the following parameters:
+
+ * total number of individuals selected
+ * male fraction
+ * reference disease
+ * control fraction
+ * presence of specific datasets
+
+The results will be presented as a file that can be used to generate a
+new group (actually a study). The file will have the following columns::
+
+ study label individual
+ XXX 0001 V20940239409
+ XXX 0002 V20940239509
+ XXX 0003 V20940239609
+ XXX 0004 V20940239709
+ ...
+
+ where study is the name of the new study
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/snp_manager.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/snp_manager.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,7 @@
+# BEGIN_COPYRIGHT
+# END_COPYRIGHT
+
+import sys
+from bl.vl.app.snp_manager.main import main as snp_manager
+
+snp_manager(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/snp_manager.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/snp_manager.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,26 @@
+
+ get true rs label and mask from dbSNP
+
+ launcher.sh
+ --interpreter=python
+ --runner=snp_manager.py
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ FIXME
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/tools/vessels_by_individual.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/tools/vessels_by_individual.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,74 @@
+
+
+ Retrieve all vessels related to individuals passed with the input
+ file. Vessel type and a Vessel Collection can be used as filters.
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=kb_query.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --ofile=${outfile}
+ vessels_by_individual
+ --ifile=${infile}
+ #if str($collection_label) != 'no_collection'
+ --vessels_collection=${collection_label}
+ #end if
+ #if str($vessel_type) != 'no_type'
+ --vessel_type=${vessel_type}
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/change_source_item.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/change_source_item.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,258 @@
+# The tool changes the source of an object inside the system.
+# Expected input file format is
+#
+# target new_source
+# V1415515 V1241441
+# V1351124 V1511141
+# .....
+#
+# Where target is the object whose source will be changed with the
+# new_source object. New source type will be specified using the
+# command line option.
+
+import csv, argparse, sys, os, json, time
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+import omero
+import omero.model
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='change the source for given items')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logger level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--operator', type=str, required=True,
+ help='operator username')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='list of items with new sources')
+ parser.add_argument('--target_type', type=str, required=True,
+ help='type of the target objects')
+ parser.add_argument('--source_type', type=str, required=True,
+ help='type of the new source objects')
+ return parser
+
+
+def do_check(records, targets, sources,
+ target_type, source_type,
+ kb, logger):
+ logger.info('Starting consistency checks')
+ src_map = dict([(s.id, s) for s in sources])
+ trg_map = dict([(t.id, t) for t in targets])
+ good_records = []
+ targets = {}
+ sources = {}
+ for i, r in enumerate(records):
+ if r['target'] not in trg_map:
+ logger.warning('No %s with ID %s, rejecting record %d' % (target_type,
+ r['target'], i))
+ continue
+ if r['new_source'] not in src_map:
+ logger.warning('No %s with ID %s, rejecting record %d' % (source_type,
+ r['new_source'], i))
+ continue
+ targets[r['target']] = trg_map[r['target']]
+ sources[r['new_source']] = src_map[r['new_source']]
+ good_records.append(r)
+ logger.info('Done with consistency checks')
+ return good_records, targets, sources
+
+
+def update_data(records, targets, sources, operator, act_conf,
+ kb, logger, batch_size = 500):
+ def get_chunk(batch_size, records):
+ offset = 0
+ while len(records[offset:]) > 0:
+ yield records[offset:offset+batch_size]
+ offset += batch_size
+ dev = get_device(kb, logger)
+ for i, recs in enumerate(get_chunk(batch_size, records)):
+ logger.info('Updating batch %d' % i)
+ batch_to_save = []
+ edges_to_delete = []
+ for r in recs:
+ target = targets[r['target']]
+ # Build the ActionOnAction backup object
+ if not target.lastUpdate:
+ last_action = target.action
+ else:
+ last_action = target.lastUpdate
+ old_action = target.action
+ asconf = {'backup' : {'action' : old_action.id}}
+ aslabel = 'updater.update_source_item-%f' % time.time()
+ backup = build_action(operator, old_action.context,
+ dev, last_action, aslabel,
+ asconf, kb, logger)
+ target.lastUpdate = backup
+ # Build the Action in order to attach the new source to
+ # the target object
+ new_source = sources[r['new_source']]
+ if new_source.is_mapped:
+ new_source.unload()
+ asconf = act_conf
+ aslabel = 'updater.update_source_item-%f' % time.time()
+ new_act = build_action(operator, old_action.context,
+ dev, new_source, aslabel,
+ asconf, kb, logger)
+ target.action = new_act
+ if old_action.OME_TABLE == 'Action':
+ # no old source, just save the new action
+ batch_to_save.append(target)
+ else:
+ # check if the old target and the new one are different
+ if new_source != old_action.target:
+ batch_to_save.append(target)
+ edges_to_delete.append((old_action.target, target))
+ if len(batch_to_save) > 0:
+ kb.save_array(batch_to_save)
+ else:
+ logger.info('No record need to be updated')
+ for vert in edges_to_delete:
+ kb.dt.destroy_edge(*vert)
+
+
+def build_action(operator, context, device, target,
+ action_setup_label, action_setup_conf,
+ kb, logger):
+ if action_setup_label:
+ asetup = get_action_setup(action_setup_label, action_setup_conf,
+ kb, logger)
+ else:
+ asetup = None
+ aconf = {
+ 'device' : device,
+ 'actionCategory' : kb.ActionCategory.IMPORT,
+ 'operator' : 'operator',
+ 'context' : context,
+ 'target' : target,
+ }
+ if asetup:
+ aconf['setup'] = asetup
+ action = kb.factory.create(retrieve_action_type(target, kb), aconf)
+ return action
+
+
+def retrieve_action_type(target, kb):
+ tklass = target.ome_obj.__class__.__name__
+ for i, k in enumerate(target.ome_obj.__class__.__mro__):
+ if k is omero.model.IObject:
+ tklass = target.ome_obj.__class__.__mro__[i-1].__name__
+ if tklass == 'Vessel':
+ return kb.ActionOnVessel
+ elif tklass == 'Individual':
+ return kb.ActionOnIndividual
+ elif tklass == 'DataSample':
+ return kb.ActionOnDataSample
+ elif tklass == 'DataCollectionItem':
+ return kb.ActionOnDataCollectionItem
+ elif tklass == 'Action':
+ return kb.ActionOnAction
+ # elif tklass == 'VLCollection':
+ # return kb.ActionOnCollection
+ else:
+ raise ValueError('No Action related to %s klass' % tklass)
+
+
+def get_action_setup(label, conf, kb, logger):
+ asetup_conf = {
+ 'label' : label,
+ 'conf' : json.dumps(conf),
+ }
+ asetup = kb.factory.create(kb.ActionSetup, asetup_conf)
+ return asetup
+
+
+def get_device(kb, logger):
+ dev_model = 'UPDATE'
+ dev_maker = 'CRS4'
+ dev_release = '0.1'
+ dev_label = 'updater-%s.update_source_item' % dev_release
+ device = kb.get_device(dev_label)
+ if not device:
+ logger.debug('No device with label %s, creating one' % dev_label)
+ conf = {
+ 'maker' : dev_maker,
+ 'model' : dev_model,
+ 'release' : dev_release,
+ 'label' : dev_label,
+ }
+ device = kb.factory.create(kb.Device, conf).save()
+ return device
+
+
+def find_action_setup_conf(args):
+ action_setup_conf = {}
+ for x in dir(args):
+ if not (x.startswith('_') or x.startswith('func')):
+ action_setup_conf[x] = getattr(args, x)
+ if 'passwd' in action_setup_conf:
+ action_setup_conf.pop('passwd') # Storing passwords into an
+ # Omero obj is not a great idea...
+ return action_setup_conf
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('change_source_item', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ logger.info('Loading data from input file')
+ with open(args.in_file) as f:
+ reader = csv.DictReader(f, delimiter='\t')
+ records = list(reader)
+ logger.info('Loaded %d records' % len(records))
+
+ logger.info('Loading %s type objects' % args.target_type)
+ targets = kb.get_objects(getattr(kb, args.target_type))
+ logger.info('Loaded %d objects' % len(targets))
+ if len(targets) == 0:
+ msg = 'No targets loaded from the system, nothing to do'
+ logger.critical(msg)
+ sys.exit(msg)
+
+ logger.info('Loading %s type objects' % args.source_type)
+ sources = kb.get_objects(getattr(kb, args.source_type))
+ logger.info('Loaded %d objects' % len(sources))
+ if len(sources) == 0:
+ msg = 'No sources loaded from the system, nothing to do'
+ logger.critical(msg)
+ sys.exit(msg)
+
+ logger.info('Loading Action type objects')
+ acts = kb.get_objects(kb.Action)
+ logger.info('Loaded %d objects' % len(acts))
+
+ records, targets, sources = do_check(records, targets, sources,
+ args.target_type, args.source_type,
+ kb, logger)
+ if len(records) == 0:
+ msg = 'No records passed consistency checks, nothing to do'
+ logger.critical(msg)
+ sys.exit(msg)
+
+ aconf = find_action_setup_conf(args)
+
+ update_data(records, targets, sources, args.operator,
+ aconf, kb, logger)
+
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/change_source_item.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/change_source_item.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,41 @@
+
+
+ Change source items for given objects
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=change_source_item.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --in_file=${infile}
+ --target_type=${target_type}
+ --source_type=${source_type}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/discard_from_collection.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/discard_from_collection.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,91 @@
+import csv, argparse, sys, os
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+COLLECTION_TYPES = {'VesselsCollection' : 'VesselsCollectionItem',
+ 'DataCollection' : 'DataCollectionItem'}
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='remove elements from a Vessels or Data Collection')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logger level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-I', '--ifile', type=str, required=True,
+ help='list of collection items that will be removed')
+ parser.add_argument('--collection_type', type=str, required=True,
+ choices=COLLECTION_TYPES.keys(),
+ help='type of the collection')
+ parser.add_argument('--collection_label', type=str, required=True,
+ help='label of the collection')
+
+ return parser
+
+def load_collection(coll_type, coll_label, kb):
+ query = 'SELECT coll FROM %s coll WHERE coll.label = :coll_label' % coll_type
+ coll = kb.find_all_by_query(query, {'coll_label' : coll_label})
+ return coll[0] if len(coll) > 0 else None
+
+def load_collection_items(collection, coll_type, kb):
+ if COLLECTION_TYPES[coll_type] == 'VesselsCollectionItem':
+ citems = kb.get_vessels_collection_items(collection)
+ elif COLLECTION_TYPES[coll_type] == 'DataCollectionItem':
+ citems = kb.get_data_collection_items(collection)
+ else:
+ raise ValueError('Unknown data collection type %s' % COLLECTION_TYPES[coll_type])
+ ci_map = {}
+ for ci in citems:
+ ci_map[ci.id] = ci
+ return ci_map
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('discard_from_collection', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ logger.info('Loading collection %s from %s' % (args.collection_label,
+ args.collection_type))
+ coll = load_collection(args.collection_type, args.collection_label, kb)
+ if not coll:
+ msg = 'No %s found with label %s' % (args.collection_type,
+ args.collection_label)
+ logger.error(msg)
+ sys.exit(msg)
+ logger.info('Loading items from collection')
+ coll_items = load_collection_items(coll, args.collection_type, kb)
+ logger.info('Fetched %d elements' % len(coll_items))
+
+ with open(args.ifile) as infile:
+ reader = csv.DictReader(infile, delimiter='\t')
+ to_be_deleted = [row['collection_item'] for row in reader]
+ logger.info('Found %d items to be deleted' % len(to_be_deleted))
+
+ for tbd in to_be_deleted:
+ try:
+ kb.delete(coll_items[tbd])
+ logger.info('%s with ID %s deleted' % (COLLECTION_TYPES[args.collection_type],
+ tbd))
+ except KeyError, ke:
+ logger.warning('No %s related to ID %s' % (COLLECTION_TYPES[args.collection_type],
+ ke))
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/discard_from_collection.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/discard_from_collection.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,79 @@
+
+ Discard input elements from the selected collection
+
+ launcher.sh
+ --interpreter=python
+ --runner=discard_from_collection.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --ifile=${ifile}
+ #if str($collection_selector.collection_type) != 'no_coll_selected'
+ --collection_type=${collection_selector.collection_type}
+ #if str($collection_selector.collection_type) == 'DataCollection'
+ #if str($collection_selector.dcoll_label) != 'no_label_selected'
+ --collection_label=${collection_selector.dcoll_label}
+ #end if
+ #elif str($collection_selector.collection_type) == 'VesselsCollection'
+ #if str($collection_selector.vcoll_label) != 'no_label_selected'
+ --collection_label=${collection_selector.vcoll_label}
+ #end if
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool discard from a DataCollection or a VesselCollection one or
+more items.
+
+The expected input file must be like
+
++---------------+
+|collection_item|
++---------------+
+|V013AFF22311 |
++---------------+
+|V0ABB3451516 |
++---------------+
+|V012441AAEEC |
++---------------+
+
+Input file rows must be VIDs obtained using the **map_vid** tool.
+
+Collection must be selected using the specific selection lists that
+show only the ones imported into the system.
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/drop_parental_info.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/drop_parental_info.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,69 @@
+import sys, csv, argparse, os
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='set parents of the selected individuals to None')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='list of the individuals')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file')
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('drop_parental_info', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving individuals')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('Retrieved %d individuals' % len(inds))
+ inds_lookup = {}
+ for i in inds:
+ inds_lookup[i.id] = i
+
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ records = []
+ for row in reader:
+ try:
+ # The 'individual' : inds_lookup[row['individual']].id
+ # is quite redundant but is a usefull check in order
+ # to filter wrong VIDs
+ record = {'individual' : inds_lookup[row['individual']].id,
+ 'father' : 'None',
+ 'mother' : 'None'}
+ records.append(record)
+ except KeyError, ke:
+ logger.warning('Individual with VID %s does not exist, skipping line' % ke)
+
+ with open(args.out_file, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ['individual', 'father', 'mother'],
+ delimiter = '\t')
+ writer.writeheader()
+ writer.writerows(records)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/drop_parental_info.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/drop_parental_info.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,47 @@
+
+
+ Remove parental info of individuals
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=drop_parental_info.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --in_file=${input1}
+ --out_file=${output1}
+ --logfile=${logfile}
+
+
+
+
+
+
+
+
+
+
+
+
+It will remove parental info of individual using informations from a file like this::
+
+ individual
+ V08E18411BC66F4987BCA43EFC6F636224
+
+and build a tsv file like this::
+
+ individual father mother
+ V08E18411BC66F4987BCA43EFC6F636224 None None
+
+-----
+
+.. class:: warningmark
+
+Note that galaxy don't recognize a tsv file with just one column like a tabular file, so need to be converted by hand
+
+
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/launcher.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/launcher.sh Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+CMD=""
+PYTH_PATH="PYTHONPATH=/SHARE/USERFS/els7/users/galaxy/develop/usr-cluster/lib/p\
+ython2.7/site-packages/:/SHARE/USERFS/els7/users/biobank/lib/"
+runner="$(dirname ${BASH_SOURCE[0]})/"
+until [ -z $1 ]
+ do
+
+ opt_host='--host='
+ opt_user='--user='
+ opt_passwd='--passwd='
+ opt_interpreter='--interpreter='
+ opt_runner='--runner='
+ if [[ $1 == $opt_host* ]]; then
+ host=`echo $1 | cut -d '=' -f2 | cut -d '.' -f1`
+ if [ -z $host -o $host == 'None' ]; then
+ echo 'ERROR. Missing omero host parameter. Please, set Omero Host in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ PYTH_PATH+=$host
+ HOST=`echo $1 | cut -d '=' -f2`
+ CMD+=' '$1
+ elif [[ $1 == $opt_user* ]]; then
+ user=`echo $1 | cut -d '=' -f2`
+ if [ -z $user -o $user == 'None' ]; then
+ echo 'ERROR. Missing omero user parameter. Please, set Omero User in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ CMD+=' '$1
+ elif [[ $1 == $opt_passwd* ]]; then
+ passwd=`echo $1 | cut -d '=' -f2`
+ if [ -z $passwd -o $passwd == 'None' ]; then
+ echo 'ERROR. Missing omero password parameter. Please, set Omero Password in your user preferences' > /dev/null >&2
+ exit -1
+ fi
+ CMD+=' '$1
+ elif [[ $1 == $opt_runner* ]]; then
+ runner+=`echo $1 | cut -d '=' -f2`
+ elif [[ $1 == $opt_interpreter* ]]; then
+ interpreter=`echo $1 | cut -d '=' -f2`
+ else
+ CMD+=' '$1
+ fi
+ shift
+done
+export $PYTH_PATH/:$PYTHONPATH
+profile="/SHARE/USERFS/els7/users/biobank/lib/${HOST}.biobank.profile"
+if [ -f $profile ]; then
+ source $profile
+ CMD=$interpreter' '$runner$CMD
+ $CMD
+else
+ echo "ERROR. Biobank profile file doesn't exist. Please, check Omero Host in your user preferences" > /dev/null >&2
+ exit -1
+fi
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/merge_individuals.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/merge_individuals.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,223 @@
+#=======================================
+# This tool moves all informations related to an individual (source) to
+# another (target). Moved informations are:
+# * children (Individual objects)
+# * ActionOnInvidual
+# * Enrollments
+# * EHR records
+#
+# The tool expects as input a TSV file like this
+# source target
+# V0468D2D96999548BF9FC6AD24C055E038 V060BAA01C662240D181BB98A51885C498
+# V029CC0A614E2D42D0837602B15193EB58 V01B8122A7C75A452E9F80381CEA988557
+# V0B20C93E8A88D43EFB87A7E6911292A05 V0BED85E8E76A54AA7AB0AFB09F95798A8
+# ...
+#
+# NOTE WELL:
+# * Parents of the "source" indivudal WILL NOT BE ASSIGNED
+# to the "target" individual
+# * For the Enrollmnent objects, if
+# "target" individual has already a code in the same study of "source"
+# individual, the script will try to move the Enrollment to the
+# "duplicated" study (this will be fixed when a proper ALIASES
+# manegement will be introduced)
+# =======================================
+
+import sys, argparse, csv, time, json, os
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.kb import KBError
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='merge informations related to an individual ("source") to another one ("target")')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-O', '--operator', type=str, help='operator',
+ required=True)
+ parser.add_argument('--in_file', type=str, required = True,
+ help='input TSV file')
+ return parser
+
+
+def update_object(obj, backup_values, operator, kb, logger):
+ logger.debug('Building ActionOnAction for object %s::%s' %
+ (obj.get_ome_table(),
+ obj.id)
+ )
+ act_setup = build_action_setup('merge-individuals-%f' % time.time(),
+ backup_values, kb)
+ aoa_conf = {
+ 'setup': act_setup,
+ 'actionCategory' : kb.ActionCategory.UPDATE,
+ 'operator': operator,
+ 'target': obj.lastUpdate if obj.lastUpdate else obj.action,
+ 'context': obj.action.context
+ }
+ logger.debug('Updating object with new ActionOnAction')
+ obj.lastUpdate = kb.factory.create(kb.ActionOnAction, aoa_conf)
+
+
+def build_action_setup(label, backup, kb, logger):
+ logger.debug('Creating a new ActionSetup with label %s and backup %r' % (label, backup))
+ conf = {
+ 'label': label,
+ 'conf': json.dumps({'backup' : backup})
+ }
+ asetup = kb.factory.create(kb.ActionSetup, conf)
+ return asetup
+
+
+def update_children(source_ind, target_ind, operator, kb, logger):
+ if source_ind.gender.enum_label() == kb.Gender.MALE.enum_label():
+ parent_type = 'father'
+ elif source_ind.gender.enum_label() == kb.Gender.FEMALE.enum_label():
+ parent_type = 'mother'
+ else:
+ raise ValueError('%s is not a valid gender value' % (source_ind.gender.enum_label()))
+ query = '''
+ SELECT ind FROM Individual ind
+ JOIN ind.{0} AS {0}
+ WHERE {0}.vid = :parent_vid
+ '''.format(parent_type)
+ children = kb.find_all_by_query(query, {'parent_vid' : source_ind.id})
+ logger.info('Retrieved %d children for source individual' % len(children))
+ for child in children:
+ backup = {}
+ logger.debug('Changing %s for individual %s' % (parent_type,
+ child.id))
+ backup[parent_type] = getattr(child, parent_type).id
+ setattr(child, parent_type, target_ind)
+ update_object(child, backup, operator, kb)
+ kb.save_array(children)
+
+
+def update_action_on_ind(source_ind, target_ind, operator, kb, logger):
+ query = '''SELECT act FROM ActionOnIndividual act
+ JOIN act.target AS ind
+ WHERE ind.vid = :ind_vid
+ '''
+ src_acts = kb.find_all_by_query(query, {'ind_vid' : source_ind.id})
+ logger.info('Retrieved %d actions for source individual' % len(src_acts))
+ connected = kb.dt.get_connected(source_ind, direction=kb.dt.DIRECTION_OUTGOING,
+ query_depth=1)
+ if source_ind in connected:
+ connected.remove(source_ind)
+ for sa in src_acts:
+ logger.debug('Changing target for action %s' % sa.id)
+ sa.target = target_ind
+ logger.debug('Action %s target updated' % sa.id)
+ kb.save_array(src_acts)
+ for conn in connected:
+ kb.dt.destroy_edge(source_ind, conn)
+ kb.dt.create_edge(conn.action, target_ind, conn)
+
+
+def update_enrollments(source_ind, target_ind, operator, kb, logger):
+ query = '''SELECT en FROM Enrollment en
+ JOIN en.individual AS ind
+ WHERE ind.vid = :ind_vid
+ '''
+ enrolls = kb.find_all_by_query(query, {'ind_vid' : source_ind.id})
+ logger.info('Retrieved %d enrollments for source individual' % len(enrolls))
+ for sren in enrolls:
+ try:
+ sren.individual = target_ind
+ logger.debug('Changing individual for enrollment %s in study %s' % (sren.studyCode,
+ sren.study.label))
+ kb.save(sren)
+ logger.info('Changed individual for enrollment %s (study code %s -- study %s)' % (sren.id,
+ sren.studyCode,
+ sren.study.label))
+ except KBError, kbe:
+ logger.warning('Unable to update enrollment %s (study code %s -- study %s)' % (sren.id,
+ sren.studyCode,
+ sren.study.label))
+ move_to_duplicated(sren, operator, kb, logger)
+
+
+def update_ehr_records(source_ind, target_ind, kb):
+ kb.update_table_rows(kb.eadpt.EAV_EHR_TABLE, '(i_vid == "%s")' % source_ind.id,
+ {'i_vid' : target_ind.id})
+
+
+# This method should be considered as a temporary hack that will be
+# used untill a proper ALIAS management will be introduced into the
+# system
+def move_to_duplicated(enrollment, operator, kb, logger):
+ old_st = enrollment.study
+ dupl_st = kb.get_study('%s_DUPLICATI' % old_st.label)
+ if not dupl_st:
+ logger.warning('No "duplicated" study ({0}_DUPLICATI) found for study {0}'.format(old_st.label))
+ return
+ enrollment.study = dupl_st
+ try:
+ kb.save(enrollment)
+ logger.info('Enrollmnet %s moved from study %s to study %s' % (enrollment.studyCode,
+ old_st.label, dupl_st.label))
+ except:
+ logger.error('An error occurred while moving enrollment %s from study %s to %s' % (enrollment.studyCode,
+ old_st.label,
+ dupl_st.label))
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('merge_individuals', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.debug('Retrieving Individuals')
+ individuals = kb.get_objects(kb.Individual)
+ logger.debug('Retrieved %d Individuals' % len(individuals))
+ ind_lookup = {}
+ for i in individuals:
+ ind_lookup[i.id] = i
+
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ for row in reader:
+ try:
+ source = ind_lookup[row['source']]
+ logger.info('Selected as source individual with ID %s' % source.id)
+ target = ind_lookup[row['target']]
+ logger.info('Selected as destination individual with ID %s' % target.id)
+ except KeyError, ke:
+ logger.warning('Unable to retrieve individual with ID %s, skipping row' % ke)
+ continue
+
+ logger.info('Updating children connected to source individual')
+ update_children(source, target, args.operator, kb, logger)
+ logger.info('Children update complete')
+
+ logger.info('Updating ActionOnIndividual related to source individual')
+ update_action_on_ind(source, target, args.operator, kb, logger)
+ logger.info('ActionOnIndividual update completed')
+
+ logger.info('Updating enrollments related to source individual')
+ update_enrollments(source, target, args.operator, kb, logger)
+ logger.info('Enrollments update completed')
+
+ logger.info('Updating EHR records related to source individual')
+ update_ehr_records(source, target, kb)
+ logger.info('EHR records update completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/merge_individuals.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/merge_individuals.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,52 @@
+
+
+ Merge individuals' data
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=dmerge_individuals.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --ifile=${ifile}
+
+
+
+
+
+
+
+
+
+
+
+
+This tool moves all informations related to an individual (source) to
+another (target). Moved informations are:
+
+ * children (Individual objects)
+ * ActionOnInvidual
+ * Enrollments
+ * EHR records
+
+The tool expects as input a TSV file like this::
+
+ source target
+ V0468D2D96999548BF9FC6AD24C055E038 V060BAA01C662240D181BB98A51885C498
+ V029CC0A614E2D42D0837602B15193EB58 V01B8122A7C75A452E9F80381CEA988557
+ V0B20C93E8A88D43EFB87A7E6911292A05 V0BED85E8E76A54AA7AB0AFB09F95798A8
+ ...
+
+NOTE WELL:
+ * Parents of the "source" indivudal WILL NOT BE ASSIGNED
+ to the "target" individual
+ * For the Enrollmnent objects, if
+ "target" individual has already a code in the same study of "source"
+ individual, the script will try to move the Enrollment to the
+ "duplicated" study
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/update_parents.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/update_parents.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,110 @@
+import sys, csv, argparse, time, json
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='update parents')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('-O', '--operator', type=str, help='operator',
+ required=True)
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file with individual, father and mother')
+ return parser
+
+
+def update_parents(individual, father, mother, operator, kb, logger):
+ backup = {}
+ logger.info('Updating parents for individual %s', individual.id)
+ if individual.father != father:
+ backup['father'] = individual.father.id if individual.father else None
+ logger.info('Setting father to %s (old value %s)' % (father.id if father else None,
+ backup['father']))
+ individual.father = father
+ if individual.mother != mother:
+ backup['mother'] = individual.mother.id if individual.mother else None
+ logger.info('Setting mother to %s (old value %s)' % (mother.id if mother else None,
+ backup['mother']))
+ individual.mother = mother
+ if len(backup.items()) > 0:
+ update_object(individual, backup, operator, kb, logger)
+ return individual
+ else:
+ logger.info('No update needed for individual %s' % individual.id)
+ return None
+
+
+def update_object(obj, backup_values, operator, kb, logger):
+ logger.debug('Building ActionOnAction for object %s' % obj.id)
+ act_setup = build_action_setup('update-parents-%f' % time.time(),
+ backup_values, kb, logger)
+ aoa_conf = {
+ 'setup': act_setup,
+ 'actionCategory': kb.ActionCategory.UPDATE,
+ 'operator': operator,
+ 'target': obj.lastUpdate if obj.lastUpdate else obj.action,
+ 'context': obj.action.context
+ }
+ logger.debug('Updating object with new ActionOnAction')
+ obj.lastUpdate = kb.factory.create(kb.ActionOnAction, aoa_conf)
+
+
+def build_action_setup(label, backup, kb, logger):
+ logger.debug('Creating a new ActionSetup with label %s and backup %r' % (label,
+ backup))
+ conf = {
+ 'label': label,
+ 'conf': json.dumps({'backup': backup})
+ }
+ asetup = kb.factory.create(kb.ActionSetup, conf)
+ return asetup
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('update_parents', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving individuals')
+ inds = kb.get_objects(kb.Individual)
+ logger.info('Retrieved %d individuals' % len(inds))
+ inds_lookup = {}
+ for i in inds:
+ inds_lookup[i.id] = i
+
+ with open(args.in_file) as in_file:
+ to_be_updated = []
+ reader = csv.DictReader(in_file, delimiter='\t')
+ for row in reader:
+ ind = inds_lookup[row['individual']]
+ father = inds_lookup[row['father']] if row['father'] != 'None' else None
+ mother = inds_lookup[row['mother']] if row['mother'] != 'None' else None
+ ind = update_parents(ind, father, mother, args.operator, kb, logger)
+ if ind:
+ to_be_updated.append(ind)
+
+ logger.info('%d individuals are going to be updated' % len(to_be_updated))
+ kb.save_array(to_be_updated)
+ logger.info('Update complete')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/updater/update_parents_data.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/updater/update_parents_data.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,35 @@
+
+
+ Update parental info of individuals
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=update_parents.py
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+ --operator=$__user_email__
+ --logfile=${logfile}
+ --in_file=${input1}
+
+
+
+
+
+
+
+
+
+
+
+It will update parental info of individual using informations from a file like this::
+
+ individual father mother
+ V4C5363 V0A3AC5 V0CF6C8
+ V0EE642 V0A3AC5 V0CF6C8
+ V027BA1 V0DE514 V0C3A91
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/build_enrollments_import.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/build_enrollments_import.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,187 @@
+"""
+prepare a tsv to be imported with a study code foreach individuals not
+present in a specified study.
+
+Can be specified also a study from which each individuals enrolled in will
+be ignored
+
+Report file contains enrollments codes in the others studies
+
+Codes are short hashes from numbers generated using Hashids.org with
+study label as salt parameter
+
+ex:
+source study label
+V03CB1DB357B274B17B139EA56A2FFA19E AUTOIMMUNITY ORVL5KMK5
+V0BA695C2E326F4C13AD7F6052BB20539B AUTOIMMUNITY 9R0M2E12N
+V067C445E35DA04ECCA21FA3E2DF3BBCF6 AUTOIMMUNITY QGZLQJ1RV
+...
+
+"""
+
+import argparse
+import csv
+import string
+import sys
+
+from hashids import Hashids
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.utils import LOG_LEVELS, get_logger
+import bl.vl.utils.ome_utils as vlu
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='Retrieve all individuals not enrolled in the specified project')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logger level', default='INFO')
+ parser.add_argument('--study', type=str, help='Study label', required=True)
+ parser.add_argument('--study_to_be_ignored', type=str,
+ help='Study label to be ignored')
+ parser.add_argument('--host', type=str, help='Omero hostname')
+ parser.add_argument('--user', type=str, help='Omero user')
+ parser.add_argument('--passwd', type=str, help='Omero password')
+ parser.add_argument('--ofile', type=str, help='output file path',
+ required=True)
+ parser.add_argument('--reportfile', type=str, help='report file',
+ default='report.tsv')
+ return parser
+
+
+def init_hashids(study):
+ hashids = Hashids(salt=study, min_length=9,
+ alphabet=string.ascii_uppercase + string.digits)
+ return hashids
+
+
+def write_csv_to_be_enrolled(logger, hashids, path, inds_map,
+ highest_id=0):
+ csv_header = ['source', 'study', 'label']
+ study_id = highest_id
+
+ # Write to CSV file
+ logger.debug('Writing CSV file %s' % path)
+ with open(path, 'w') as f:
+ writer = csv.DictWriter(f, csv_header,
+ delimiter='\t', quotechar='"',
+ restval='None')
+ writer.writeheader()
+ for k, v in inds_map.iteritems():
+ study_id += 1
+ v['label'] = hashids.encrypt(study_id)
+ writer.writerow(v)
+ return
+
+
+def write_csv_enrollment_codes(logger, filename, csv_header, enrolls_map):
+ logger.debug('Writing CSV file %s' % filename)
+ with open(filename, 'w') as f:
+ writer = csv.DictWriter(f, csv_header,
+ delimiter='\t', quotechar='"',
+ restval='None')
+ writer.writeheader()
+ for k, v in enrolls_map.iteritems():
+ writer.writerow(v)
+ return
+
+
+def get_enrollments_codes(logger, kb, inds_map):
+ """Retrieve enrollments codes in other studies for the individuals
+ to be enrolled into the specified study"""
+ # Retrieve all studies from omero
+ studies = kb.get_objects(kb.Study)
+ logger.info('Retrieved %d studies from database' % len(studies))
+
+ csv_header = ['individual_uuid']
+ enrolls_map = {}
+ # For each study, retrieve all enrollments
+ for s in studies:
+ logger.info('Retrieving enrollments for study %s' % s.label)
+ enrolls = kb.get_enrolled(s)
+ logger.info('%s enrollments retrieved' % len(enrolls))
+ if len(enrolls) > 0:
+ logger.debug('Building lookup dictionary....')
+ csv_header.append(s.label) # Add study label to CSV header
+ for e in enrolls:
+ if e.individual.id in inds_map:
+ enrolls_map.setdefault(e.individual.omero_id,
+ {})['individual_uuid'] = e.individual.id
+ enrolls_map[e.individual.omero_id][s.label] = e.studyCode
+ else:
+ logger.debug('No enrollments found, skip study %s' % s.label)
+
+ return csv_header, enrolls_map
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('inds_not_enrolled', level=args.loglevel,
+ filename=args.logfile)
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ out_file_path = args.ofile
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ inds = kb.get_objects(kb.Individual)
+ #len_inds = len(inds)
+ logger.info('Retrieved {} individuals'.format(len(inds)))
+
+ inds_map = {}
+
+ for i in inds:
+ inds_map.setdefault(i.id, {})['source'] = i.id
+ inds_map[i.id]['study'] = args.study
+
+ study = kb.get_by_label(kb.Study, args.study)
+ if study:
+ logger.info('{} present in the database'.format(study.label))
+ else:
+ logger.critical('{} not present in the database'.format(args.study))
+ sys.exit()
+
+ hashids = init_hashids(study.label)
+ enrolls = kb.get_enrolled(study)
+ logger.info("{} enrollments founded in {}".format(len(enrolls),
+ study.label))
+ highest_id = 0
+ #ids = []
+
+ for e in enrolls:
+ if e.individual.id in inds_map:
+ del inds_map[e.individual.id]
+ _ = hashids.decrypt(e.studyCode)
+ if _ > highest_id:
+ highest_id = _[0]
+
+ if args.study_to_be_ignored and kb.get_by_label(kb.Study,
+ args.study_to_be_ignored):
+ to_be_removed = [args.study_to_be_ignored]
+ else:
+ to_be_removed = []
+
+ for tbr_study in to_be_removed:
+ enr = kb.get_enrolled(kb.get_by_label(kb.Study, tbr_study))
+ logger.info('Retrieved {} enrollments from {}'.format(len(enr),
+ tbr_study))
+ for e in enr:
+ if e.individual.id in inds_map:
+ del inds_map[e.individual.id]
+
+ logger.info('{} individuals to be enrolled'.format(len(inds_map)))
+
+ write_csv_to_be_enrolled(logger, hashids, out_file_path, inds_map, highest_id)
+
+ csv_header, enrolls_map = get_enrollments_codes(logger, kb, inds_map)
+ write_csv_enrollment_codes(logger, args.reportfile, csv_header, enrolls_map)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/delete_flowcell_results.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/delete_flowcell_results.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,97 @@
+import argparse, sys
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.utils import get_logger, LOG_LEVELS
+import bl.vl.utils.ome_utils as vlu
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='remove datasamples connected to a specific sample of a flowcell')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero server hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--flowcell-label', type=str, required=True,
+ help='flowcell label')
+ parser.add_argument('--sample-label', type=str, required=True,
+ help='sample label')
+ parser.add_argument('--dry-run', action='store_true',
+ help='run a simulation, don\'t delete anything')
+ return parser
+
+
+def get_flowcell_samples_map(flowcell, kb, logger):
+ fc = kb.get_by_label(kb.FlowCell, flowcell)
+ if not fc:
+ logger.info('No flowcell with label %s', flowcell)
+ sys.exit(0)
+ logger.info('Loading data for flowcell %s', flowcell)
+ dsamples = kb.dt.get_connected(fc, kb.SeqDataSample)
+ dsamples_map = {}
+ for ds in dsamples:
+ if ds.sample:
+ dsamples_map.setdefault(ds.sample.label, []).append(ds)
+ else:
+ dsamples_map.setdefault('NO_SAMPLE', []).append(ds)
+ return dsamples_map
+
+
+def print_report(dsamples_map, sample_label, kb, logger):
+ dsamples = dsamples_map.get(sample_label)
+ if not dsamples:
+ logger.info('No sample with label %s is related to the flowcell', sample_label)
+ sys.exit(0)
+ for ds in dsamples:
+ dobjs = kb.get_data_objects(ds)
+ logger.info('## data sample: %s', ds.label)
+ for dob in dobjs:
+ logger.info('### data object: %s --- mimetype: %s', dob.path, dob.mimetype)
+
+
+def delete(dsamples_map, sample_label, kb, logger):
+ for ds in dsamples_map[sample_label]:
+ # this is a hack specific for the automator workflow
+ if not ds.label.startswith('stage1'):
+ logger.info('Deleting data for %s', ds.label)
+ dobjs = kb.get_data_objects(ds)
+ for d in dobjs:
+ kb.delete(d)
+ a = ds.action
+ kb.delete(ds)
+ try:
+ kb.delete(a)
+ except:
+ pass
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('delete_flowcell_results', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ dsamples_map = get_flowcell_samples_map(args.flowcell_label, kb, logger)
+ print_report(dsamples_map, args.sample_label, kb, logger)
+ if not args.dry_run:
+ delete(dsamples_map, args.sample_label, kb. logger)
+ pass
+ else:
+ logger.debug('SIMULATION, exit now')
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/delete_flowcell_results.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/delete_flowcell_results.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,36 @@
+
+
+
+ Delete results produced from a flowcell related to a specific sample
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=delete_flowcell_results.py
+ --logfile=${log_file}
+ --flowcell-label=${flowcell_label}
+ --sample-label=${sample_label}
+ #if $simulate
+ --dry-run
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+Delete data samples and related data objects produced from the given flowcell and related to the given sample.
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/drop_flowcell_related_items.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/drop_flowcell_related_items.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,124 @@
+import argparse, sys
+from collections import Counter
+
+from bl.vl.kb import KnowledgeBase as KB
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.utils import get_logger, LOG_LEVELS
+from bl.vl.kb.drivers.omero.sequencing import SeqDataSample, SequencerOutput
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='delete all items related to the given flowcell')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='omero hostname')
+ parser.add_argument('-U', '--user', type=str, help='omero user')
+ parser.add_argument('-P', '--passwd', type=str, help='omero password')
+ parser.add_argument('--flowcell-label', type=str, required=True,
+ help='flowcell label')
+ parser.add_argument('--datasets-only', action='store_true',
+ help='delete only data samples and data objects related to the given flowcell')
+ return parser
+
+
+def get_sources(objs):
+ sources = set()
+ for x in objs:
+ try:
+ sources.add(x.action.target)
+ except AttributeError:
+ # Action has no "target" attribute, no source for item x
+ pass
+ return sources
+
+
+def delete_objects(objs, kb, logger, max_retries = 3):
+ retry_ct = Counter()
+ while len(objs) > 0:
+ o = objs.pop(0)
+ if type(o) in [SeqDataSample, SequencerOutput]:
+ logger.info('Loading DataObjects for %s:%s' % (o.__class__.__name__,
+ o.label))
+ dobjs = kb.get_data_objects(o)
+ logger.info('%d DataObjects loaded' % len(dobjs))
+ for d in dobjs:
+ logger.info('Deleting %s:%s' % (d.__class__.__name__,
+ d.path))
+ kb.delete(d)
+ try:
+ logger.info('Deleting %s:%s' % (o.__class__.__name__,
+ o.id))
+ act = o.action
+ kb.delete(o)
+ try:
+ logger.info('Deleting source action %s:%s' % (act.__class__.__name__,
+ act.id))
+ kb.delete(act)
+ except:
+ logger.info('Can\'t delete action')
+ except:
+ logger.info('Can\'t delete, putting back into objects list')
+ if retry_ct['%s:%s' % (type(o), o.id)] < max_retries:
+ objs.append(o)
+ retry_ct['%s:%s' % (type(o), o.id)] += 1
+ else:
+ logger.info('Reached maximum retry limit for the object, skipping')
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('drop_flowcell_related_items', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+
+ logger.info('Retrieving flowcell with label %s' % args.flowcell_label)
+ query = 'SELECT fc FROM FlowCell fc WHERE fc.label = :fc_label'
+ results = kb.find_all_by_query(query, {'fc_label' : args.flowcell_label})
+ if len(results) == 0:
+ logger.info('No FlowCell with label %s in the database' % args.flowcell_label)
+ sys.exit(0)
+ fc = results[0]
+ logger.info('Loaded FlowCell with ID %s' % fc.id)
+ logger.info('Loading related Lanes')
+ lanes = list(kb.get_lanes_by_flowcell(fc))
+ logger.info('%d Lanes loaded' % len(lanes))
+ logger.info('Loading related LaneSlots')
+ lane_slots = []
+ for l in lanes:
+ lane_slots += list(kb.get_laneslots_by_lane(l))
+ logger.info('%d LaneSlots loaded' % len(lane_slots))
+ logger.info('Loading related Tubes')
+ sub_samples = get_sources(lane_slots)
+ samples = get_sources(sub_samples)
+ logger.info('%d Tubes loaded' % (len(sub_samples) + len(samples)))
+ logger.info('Loading related SequencerOutputs')
+ seq_out = kb.dt.get_connected(fc, kb.SequencerOutput, kb.dt.DIRECTION_OUTGOING)
+ logger.info('%d SequencerOutputs loaded' % len(seq_out))
+ logger.info('Loading related SeqDataSamples')
+ seq_dsamples = kb.dt.get_connected(fc, kb.SeqDataSample, kb.dt.DIRECTION_OUTGOING)
+ logger.info('%d SeqDataSamples loaded' % len(seq_dsamples))
+
+ if args.datasets_only:
+ delete_items = [seq_dsamples]
+ else:
+ delete_items = [seq_dsamples, seq_out, lane_slots, lanes,
+ [fc], list(sub_samples), list(samples)]
+ for items in delete_items:
+ delete_objects(items, kb, logger)
+
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/format_vessels_by_individual_output.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/format_vessels_by_individual_output.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,91 @@
+# This tool format output files from kb_query vessels_by_individual
+# into a tabular format with all data related to an individual grouped
+# in each row. The tool needs as input a mapping file like
+#
+# individual_id label
+# V12311 A_STUDY:A_CODE
+# V135115 A_STUDY:B_CODE
+#
+# in order to use a known label and not VIDs for each row
+
+import csv, sys, argparse, logging
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='format kb_query vessels_by_individual output file to tabular format')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in_file', type=str, required=True,
+ help='input file (obtained using kb_query vessels by individual tool)')
+ parser.add_argument('--map_file', type=str, required=True,
+ help='mapping file')
+ parser.add_argument('--out_file', type=str, required=True,
+ help='output file')
+ return parser
+
+def get_mapping(records, grouper_field, grouped_field):
+ mapping = {}
+ for rec in records:
+ mapping.setdefault(rec[grouper_field], []).append(rec[grouped_field])
+ return mapping
+
+def get_labels_mapping(reader, logger):
+ rows = [r for r in reader]
+ lmap = get_mapping(rows, 'individual', 'label')
+ logger.info('%d labels grouped for %d individuals' % (len(rows),
+ len(lmap)))
+ return lmap
+
+def get_vessels_mapping(reader, logger):
+ rows = [r for r in reader]
+ vmap = get_mapping(rows, 'individual', 'vessel_label')
+ logger.info('%d vessels grouped for %d individuals' % (len(rows),
+ len(vmap)))
+ return vmap
+
+def build_record(label, vessels):
+ record = {'individual_label' : '--'.join(label)}
+ for v in vessels:
+ record['vessel_%d' % (vessels.index(v) + 1)] = v
+ return record
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format' : LOG_FORMAT,
+ 'datefmt' : LOG_DATEFMT,
+ 'level' : log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger()
+
+ with open(args.map_file) as mf:
+ reader = csv.DictReader(mf, delimiter='\t')
+ labels_map = get_labels_mapping(reader, logger)
+
+ with open(args.in_file) as inf:
+ reader = csv.DictReader(inf, delimiter='\t')
+ vessels_map = get_vessels_mapping(reader, logger)
+
+ max_vessels_count = max([len(v) for v in vessels_map.values()])
+ csv_fields = ['individual_label']
+ for x in xrange(max_vessels_count):
+ csv_fields.append('vessel_%d' % (x+1))
+
+ with open(args.out_file, 'w') as ofile:
+ writer = csv.DictWriter(ofile, csv_fields, delimiter='\t')
+ writer.writeheader()
+ for ind, vessels in vessels_map.iteritems():
+ writer.writerow(build_record(labels_map[ind], vessels))
+
+ logger.info('Job completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/format_vessels_by_individual_output.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/format_vessels_by_individual_output.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,31 @@
+
+
+ Format the output from VLU.vessels_by_individual into a tabular
+ format
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=format_vessels_by_individual_output.py
+ --loglevel=$__app__.config.vl_loglevel
+ --logfile=${logfile}
+ --in_file=${in_file}
+ --map_file=${map_file}
+ --out_file=${out_file}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/from_well_to_illumina_measures.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/from_well_to_illumina_measures.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,95 @@
+import sys, argparse, csv
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.utils import get_logger, LOG_LEVELS
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.graph.drivers.neo4j import Neo4JDriver
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='map wells label to illumina bead chip measures')
+ parser.add_argument('--logfile', type=str, help='log file (deafult=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level (default=INFO)', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='OMERO server hostname')
+ parser.add_argument('-U', '--user', type=str, help='OMERO user')
+ parser.add_argument('-P', '--passwd', type=str, help='OMERO password')
+ parser.add_argument('--in_file', type=str, help='input file',
+ required=True)
+ parser.add_argument('--out_file', type=str, help='output file',
+ required=True)
+ parser.add_argument('--well_column', type=str, help='label of the column that will be mapped',
+ default='source')
+ return parser
+
+
+def get_wells_map(plate, kb, logger):
+ logger.info('Mapping wells for plate %s', plate.label)
+ wells_map = {}
+ for w in kb.get_wells_by_plate(plate):
+ wells_map[w.label] = w
+ logger.info('Mapped %d wells', len(wells_map))
+ return wells_map
+
+
+def get_plates_map(plates_list, kb, logger):
+ logger.info('Loading TiterPlates map')
+ plates_map = {}
+ for pl in kb.get_objects(kb.TiterPlate):
+ if isinstance(pl, kb.TiterPlate) and pl.barcode in plates_list:
+ plates_map[pl.barcode] = get_wells_map(pl, kb, logger)
+ logger.info('Mapped %d plates', len(plates_map))
+ return plates_map
+
+
+def get_connected_illumina_measures(well, kb, logger):
+ logger.debug('Loading connected IlluminaBeadChipMeasures for well %s:%s', well.label,
+ well.container.label)
+ return kb.dt.get_connected(well, kb.IlluminaBeadChipMeasures,
+ direction = Neo4JDriver.DIRECTION_OUTGOING)
+
+
+def wells_to_illumina(in_file, out_file, column_label, kb, logger):
+ with open(in_file) as ifile, open(out_file, 'w') as ofile:
+ reader = csv.DictReader(ifile, delimiter='\t')
+ in_records = [r for r in reader]
+ plates_barcodes = set([x[column_label].split(':')[0] for x in in_records])
+ plates_map = get_plates_map(plates_barcodes, kb, logger)
+ writer = csv.DictWriter(ofile, reader.fieldnames, delimiter='\t')
+ writer.writeheader()
+ logger.info('Mapping wells to illumina bead chip measures')
+ for rec in in_records:
+ barcode, well = rec[column_label].split(':')
+ measures = get_connected_illumina_measures(plates_map[barcode][well], kb,
+ logger)
+ if len(measures) != 1:
+ logger.warning('Found %d measures for well %s:%s, skipping line', len(measures),
+ barcode, well)
+ continue
+ rec[column_label] = measures[0].label
+ writer.writerow(rec)
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('from_well_to_illumina_measures', level=args.loglevel,
+ filename=args.logfile)
+
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ kb = KB(driver='omero')(host, user, passwd)
+ wells_to_illumina(args.in_file, args.out_file, args.well_column,
+ kb, logger)
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/from_well_to_illumina_measures.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/from_well_to_illumina_measures.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,34 @@
+
+
+
+ Map well labels to illumina bead chip measures
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=from_well_to_illumina_measures.py
+ --logfile=${logfile}
+ --in_file=${infile}
+ --out_file=${out_file}
+ --well_column=${well_column}
+
+
+
+
+
+
+
+
+
+
+
+
+
+Map a TSV file's column with PlateWell labels in format PLATE_BARCODE:WELL_LABEL to the label of
+the connected IlluminaBeadChipMeasures
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/gdoize_ms.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/gdoize_ms.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,46 @@
+
+
+ Build missing GDOs for the selected markers set
+
+
+ py_protobuff_cpp
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=gdoize_ms
+ --logfile=${logfile}
+ #if str($mset_label) != 'select_one'
+ --markers-set-label=$mset_label
+ #end if
+ #if str($study) != 'select_one'
+ --study-label=$study
+ #end if
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Iterate over all genotype data samples corresponding to the given
+marker set; create a GDO table row for each genotpye data sample that
+does not already have one.
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/launcher.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/launcher.sh Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+CMD=""
+PYTH_PATH="PYTHONPATH=/SHARE/USERFS/els7/users/galaxy/develop/usr-cluster/lib/p\
+ython2.7/site-packages/:/SHARE/USERFS/els7/users/biobank/lib/"
+runner="$(dirname ${BASH_SOURCE[0]})/"
+until [ -z $1 ]
+ do
+
+ opt_host='--host='
+ opt_interpreter='--interpreter='
+ opt_runner='--runner='
+ if [[ $1 == $opt_host* ]]; then
+ PYTH_PATH+=`echo $1 | cut -d '=' -f2 | cut -d '.' -f1`
+ CMD+=' '$1
+ elif [[ $1 == $opt_runner* ]]; then
+ runner+=`echo $1 | cut -d '=' -f2`
+ elif [[ $1 == $opt_interpreter* ]]; then
+ interpreter=`echo $1 | cut -d '=' -f2`
+ else
+ CMD+=' '$1
+ fi
+ shift
+done
+export $PYTH_PATH/:$PYTHONPATH
+CMD=$interpreter' '$runner$CMD
+$CMD
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_aligned_seq_dsample_import.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_aligned_seq_dsample_import.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,76 @@
+import csv, sys, argparse, logging
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='build aligned seq data sample import files')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--study', type=str, required=True,
+ help='study')
+ parser.add_argument('--label', type=str, required=True,
+ help='label')
+ parser.add_argument('--source', type=str, required=True,
+ help='source')
+ parser.add_argument('--device', type=str, required=True,
+ help='device')
+ parser.add_argument('--path', type=str, required=True,
+ help='path')
+ parser.add_argument('--sample', type=str, required=True,
+ help='sample')
+ parser.add_argument('--genome_reference', type=str, required=True,
+ help='genome reference')
+ parser.add_argument('--dsample_ofile', type=str, default='./genome_variations_dsample.tsv',
+ help='output file containing data samples definitions')
+ parser.add_argument('--dobject_ofile', type=str, default='./genome_variations_dobject.tsv',
+ help='output file containing data samples definitions')
+
+ return parser
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format': LOG_FORMAT,
+ 'datefmt': LOG_DATEFMT,
+ 'level': log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger('prepare_aligned_seq_dsample_inport')
+
+
+ with open(args.dsample_ofile, 'w') as ofile:
+ out_file_header = ['study', 'label', 'source', 'source_type', 'seq_dsample_type', 'status', 'device', 'sample', 'genome_reference']
+ writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
+ writer.writeheader()
+ writer.writerow({'study': args.study,
+ 'label': args.label,
+ 'source': args.source,
+ 'device': args.device,
+ 'source_type' : 'Tube',
+ 'seq_dsample_type': 'AlignedSeqDataSample',
+ 'status': 'USABLE',
+ 'sample' : args.sample,
+ 'genome_reference': args.genome_reference})
+ logger.info('Done writing file %s' % args.dsample_ofile)
+
+ with open(args.dobject_ofile, 'w') as ofile:
+ out_file_header = ['study', 'path', 'data_sample', 'mimetype', 'size', 'sha1']
+ writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
+ writer.writeheader()
+ writer.writerow({'study': args.study,
+ 'path': args.path,
+ 'data_sample': args.label,
+ 'mimetype': 'x-vl/bam',
+ 'size': '-1',
+ 'sha1': 'N.A.'})
+ logger.info('Done writing file %s' % args.dobject_ofile)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_aligned_seq_dsample_import.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_aligned_seq_dsample_import.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,58 @@
+
+
+ Prepare Tabular file to Import Aligned SeqDatasample
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=prepare_aligned_seq_dsample_import.py
+ --study=${study}
+ --label=${label}
+ --source=${source}
+ --device=${device}
+ --genome_reference=${genome_reference}
+ --path=${path}
+ --sample=${sample}
+ --logfile=${log_file}
+ --dsample_ofile=${dsample_ofile}
+ --dobject_ofile=${dobject_ofile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_enrollments_import.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_enrollments_import.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,97 @@
+"""
+Split a file like::
+
+source enrollment
+V044DE795E7F9F42FEB9855288CF577A77 ASTUDY:2141
+V06C59B915C0FD47DABE6AE02C731780AF BSTUDY:390
+
+into two separated a new TSV files
+
+source study label
+V044DE795E7F9F42FEB9855288CF577A77 ASTUDY 2141
+V06C59B915C0FD47DABE6AE02C731780AF BSTUDY 390
+
+"""
+
+import sys, argparse, csv
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def get_parser():
+ parser = argparse.ArgumentParser('Prepare input files for enrollments import workflow')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', type=str, required=True,
+ help='input TSV file')
+ parser.add_argument('--out-enrollments', type=str, required=True,
+ help='input file with Enrollments definitions')
+ return parser
+
+
+def get_enrollments_definitions(records, logger):
+ logger.info('Creating enrollment definitions')
+ enr_defs = []
+ for rec in records:
+ try:
+ edef = {}
+ edef['source'] = rec['source']
+ try:
+ edef['study'], edef['label'] = rec['enrollment'].split(':')
+ except ValueError:
+ logger.error('Skipped record %r, wrong label format for %s', rec, rec['enrollment'])
+ continue
+ except KeyError, ke:
+ logger.error('Skipped record %r, missing key %s', rec, ke)
+ continue
+ enr_defs.append(edef)
+ logger.info('Retrieved %d enrollment definitions', len(enr_defs))
+ return enr_defs
+
+
+def get_parents_definitions(records, logger):
+ logger.info('Creating parents definitions')
+ parents_defs = []
+ for rec in records:
+ try:
+ pdef = dict()
+ pdef['individual'] = rec['individual']
+ if rec['father'] != 'None' or rec['mother'] != 'None':
+ pdef['father'] = rec['father']
+ pdef['mother'] = rec['mother']
+ parents_defs.append(pdef)
+ else:
+ continue
+ except KeyError, ke:
+ logger.error('Skipped record %r, missing key %s', rec, ke)
+ continue
+ logger.info('Retrieved %d parents definitions', len(parents_defs))
+ return parents_defs
+
+
+def main(argv):
+ parser = get_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('prepare_enrollments_import', level=args.loglevel,
+ filename=args.logfile)
+
+ logger.info('Start processing file %s', args.in_file)
+
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ records = [row for row in reader]
+ logger.info('Loaded %d records', len(records))
+
+ enrollment_defs = get_enrollments_definitions(records, logger)
+ with open(args.out_enrollments, 'w') as enr_out:
+ enr_writer = csv.DictWriter(enr_out,
+ ['source', 'study', 'label'],
+ delimiter='\t')
+ enr_writer.writeheader()
+ enr_writer.writerows(enrollment_defs)
+
+ logger.info('Job completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_enrollments_import.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_enrollments_import.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,45 @@
+
+
+
+ Prepare input files for enrollments import workflow
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=prepare_enrollments_import.py
+ --logfile=${log_file}
+ --in-file=${in_file}
+ --out-enrollments=${enrs_out}
+
+
+
+
+
+
+
+
+
+
+
+
+
+Split a file like::
+
+ source enrollment
+ V044DE795E7F9F42FEB9855288CF577A77 ASTUDY:2141
+ V06C59B915C0FD47DABE6AE02C731780AF BSTUDY:390
+
+into two separated a new TSV files
+
+ source study label
+ V044DE795E7F9F42FEB9855288CF577A77 ASTUDY 2141
+ V06C59B915C0FD47DABE6AE02C731780AF BSTUDY 390
+
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_genome_variations_dsample_import.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_genome_variations_dsample_import.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,75 @@
+import csv, sys, argparse, logging
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='build sequencer output import files')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--study', type=str, required=True,
+ help='study')
+ parser.add_argument('--label', type=str, required=True,
+ help='label')
+ parser.add_argument('--source', type=str, required=True,
+ help='source')
+ parser.add_argument('--device', type=str, required=True,
+ help='device')
+ parser.add_argument('--path', type=str, required=True,
+ help='path')
+ parser.add_argument('--genome_reference', type=str, required=True,
+ help='genome reference')
+ parser.add_argument('--dsample_ofile', type=str, default='./genome_variations_dsample.tsv',
+ help='output file containing data samples definitions')
+ parser.add_argument('--dobject_ofile', type=str, default='./genome_variations_dobject.tsv',
+ help='output file containing data samples definitions')
+
+ return parser
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format': LOG_FORMAT,
+ 'datefmt': LOG_DATEFMT,
+ 'level': log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger('prepare_genome_variations_dsample_inport')
+
+
+ with open(args.dsample_ofile, 'w') as ofile:
+ out_file_header = ['study', 'label', 'source', 'device', 'device_type', 'source_type','data_sample_type',
+ 'status','genome_reference']
+ writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
+ writer.writeheader()
+ writer.writerow({'study': args.study,
+ 'label': args.label,
+ 'source': args.source,
+ 'device': args.device,
+ 'device_type': 'Device',
+ 'source_type' : 'Tube',
+ 'data_sample_type': 'GenomeVariationsDataSample',
+ 'status': 'USABLE',
+ 'genome_reference': args.genome_reference})
+ logger.info('Done writing file %s' % args.dsample_ofile)
+
+ with open(args.dobject_ofile, 'w') as ofile:
+ out_file_header = ['study', 'path', 'data_sample', 'mimetype', 'size', 'sha1']
+ writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
+ writer.writeheader()
+ writer.writerow({'study': args.study,
+ 'path': args.path,
+ 'data_sample': args.label,
+ 'mimetype': 'x-vl/vcf',
+ 'size': '-1',
+ 'sha1': 'N.A.'})
+ logger.info('Done writing file %s' % args.dobject_ofile)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_genome_variations_dsample_import.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_genome_variations_dsample_import.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,55 @@
+
+
+ Prepare Tabular file to Import Genome Variation Datasample
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=prepare_genome_variations_dsample_import.py
+ --study=${study}
+ --label=${label}
+ --source=${source}
+ --device=${device}
+ --genome_reference=${genome_reference}
+ --path=${path}
+ --logfile=${log_file}
+ --dsample_ofile=${dsample_ofile}
+ --dobject_ofile=${dobject_ofile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_illumina_import_inputs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_illumina_import_inputs.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,155 @@
+from bl.core.io.illumina import GenomeStudioSampleSheetReader as gsr
+from bl.vl.utils import LOG_LEVELS, get_logger
+import csv, argparse, sys, re
+
+
+def make_parser():
+ parser = argparse.ArgumentParser('Split GenomeStudio samplesheet in TSV files to import data within OMERO')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--input-file', type=str, required=True,
+ help='GenomeStudio samplesheet')
+ parser.add_argument('--arrays-out-file', type=str,
+ help='output file containing IlluminaArrayOfArrays definitions',
+ default='./array_of_arrays.tsv')
+ parser.add_argument('--bead-chip-out-file', type=str,
+ help='output file containing IlluminaBeadChipArray definitions',
+ default='./bead_chip.tsv')
+ parser.add_argument('--array-measure-out-file', type=str,
+ help='output file containing IlluminaBeadChipMeasure definitions',
+ default='./array_measure.tsv')
+ parser.add_argument('--array-measures-out-file', type=str,
+ help='output file containing IlluminaBeadChipMeasures definitions',
+ default='./array_measures.tsv')
+ parser.add_argument('--study', type=str, required=True,
+ help='Study label that will be used in the import procedure')
+ return parser
+
+
+def get_assay_type_enum(manifest_file):
+ return manifest_file.strip().replace('.bpm', '').replace('-', '_').replace(' ', '_').upper()
+
+
+def prepare_array_of_arrays_input(barcode, study, elements):
+ ICHIPCORDS_PATTERN = re.compile(r'^r(\d{2})c(\d{2})$', re.IGNORECASE)
+ rows = []
+ cols = []
+ for x in elements:
+ m = re.match(ICHIPCORDS_PATTERN, x['array_label'])
+ rows.append(int(m.groups()[0]))
+ cols.append(int(m.groups()[1]))
+ return {
+ 'barcode': barcode,
+ 'rows': max(rows),
+ 'columns': max(cols),
+ 'label': barcode,
+ 'study': study,
+ }
+
+
+def barcodes_to_labels(elements, wells_map, strict_mapping, logger):
+ from copy import deepcopy
+
+ mapped_elements = []
+ for e in elements:
+ if e['source'] in wells_map:
+ new_el = deepcopy(e)
+ new_el['source'] = wells_map[e['source']]
+ mapped_elements.append(new_el)
+ else:
+ logger.warning('Unable to map well %s' % e['source'])
+
+ if strict_mapping and len(mapped_elements) < len(elements):
+ msg = 'Mapped %d records of %d' %(len(elements), len(mapped_elements))
+ logger.critical(msg)
+ sys.exit(msg)
+ return mapped_elements
+
+
+def prepare_bead_chip_array_input(array_barcode, assay_type, study, elements):
+ return [{
+ 'illumina_array': array_barcode,
+ 'label': x['array_label'],
+ 'source': x['source'],
+ 'bead_chip_assay_type': assay_type,
+ 'study': study,
+ } for x in elements]
+
+
+def prepare_bead_chip_measure_input(array_barcode, study, elements,
+ device='generic_illumina_scanner',
+ status='USABLE'):
+ records = []
+ for channel in ['Grn', 'Red']:
+ records.extend(
+ [
+ {
+ 'label': '%s_%s_%s' % (array_barcode, x['array_label'], channel),
+ 'source': '%s:%s' % (array_barcode, x['array_label']),
+ 'scanner': device,
+ 'status': status,
+ 'study': study,
+ } for x in elements
+ ]
+ )
+ return records
+
+
+def prepare_bead_chip_array_measures_input(array_barcode, study, elements):
+ return [{
+ 'study': study,
+ 'label': '%s_%s' % (array_barcode, x['array_label']),
+ 'red_channel': '%s_%s_Red' % (array_barcode, x['array_label']),
+ 'green_channel': '%s_%s_Grn' %(array_barcode, x['array_label']),
+ 'source': '%s:%s' % (array_barcode, x['array_label']),
+ } for x in elements]
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('prepare_illumina_import_inputs', level=args.loglevel,
+ filename=args.logfile)
+
+ logger.info('Processing file %s', args.input_file)
+ with open(args.input_file) as in_file:
+ reader = gsr(in_file)
+ assay_type = get_assay_type_enum(reader.header['A'])
+ arrays_map = {}
+ for r in reader:
+ arrays_map.setdefault(r['SentrixBarcode_A'], []).append({'source': r['Sample_ID'],
+ 'array_label': r['SentrixPosition_A']})
+ with open(args.arrays_out_file, 'w') as array_file,\
+ open(args.bead_chip_out_file, 'w') as chip_file,\
+ open(args.array_measures_out_file, 'w') as measures_file,\
+ open(args.array_measure_out_file, 'w') as measure_file:
+ arrays_writer = csv.DictWriter(array_file,
+ ['study', 'label', 'barcode', 'rows', 'columns'],
+ delimiter='\t')
+ arrays_writer.writeheader()
+ chip_writer = csv.DictWriter(chip_file,
+ ['study', 'illumina_array', 'label', 'source',
+ 'bead_chip_assay_type'],
+ delimiter='\t')
+ chip_writer.writeheader()
+ measure_writer = csv.DictWriter(measures_file,
+ ['study', 'label', 'source', 'scanner', 'status'],
+ delimiter='\t')
+ measure_writer.writeheader()
+ measures_writer = csv.DictWriter(measure_file,
+ ['study', 'label', 'red_channel', 'green_channel',
+ 'source'],
+ delimiter='\t')
+ measures_writer.writeheader()
+ for k, v in arrays_map.iteritems():
+ arrays_writer.writerow(prepare_array_of_arrays_input(k, args.study, v))
+ chip_writer.writerows(prepare_bead_chip_array_input(k, assay_type, args.study, v))
+ measure_writer.writerows(prepare_bead_chip_measure_input(k, args.study, v))
+ measures_writer.writerows(prepare_bead_chip_array_measures_input(k, args.study, v))
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_illumina_import_inputs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_illumina_import_inputs.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,45 @@
+
+
+
+ Prepare inputs file used to import illumina data
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=prepare_illumina_import_inputs.py
+ --input-file=${infile}
+ --logfile=${log_file}
+ --study=${study}
+ --arrays-out-file=${arrays_out_file}
+ --bead-chip-out-file=${bead_chip_out_file}
+ --array-measure-out-file=${measure_out_file}
+ --array-measures-out-file=${measures_out_file}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Prepare file to import IlluminaArrayOfArrays and IlluminaBeadChip objects reading data from a
+GenomeStudio samplesheet
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_individuals_import.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_individuals_import.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,103 @@
+"""
+Split a file like::
+
+ individual gender father mother
+ ASTUDY:2141 MALE ASTUDY:12 ASTUDY:12341
+ ASTUDY:415 MALE ASTUDY:3562 ASTUDY:13612
+
+into two separated TSV files, the first one will be used to import new individuals and enrollments,
+the second one will be used to update father and mother informations for the individuals in the first
+file.
+"""
+
+import sys, argparse, csv
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def get_parser():
+ parser = argparse.ArgumentParser('Prepare input files for individuals import workflow')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', type=str, required=True,
+ help='input TSV file')
+ parser.add_argument('--out-individuals', type=str, required=True,
+ help='input file with Individuals definitions')
+ parser.add_argument('--out-parents', type=str, required=True,
+ help='input file with parents definitions')
+ return parser
+
+
+def get_individual_definitions(records, logger):
+ logger.info('Creating individual definitions')
+ ind_defs = []
+ for rec in records:
+ try:
+ idef = {'father': 'None', 'mother': 'None'}
+ idef['gender'] = rec['gender']
+ try:
+ idef['study'], idef['label'] = rec['individual'].split(':')
+ except ValueError:
+ logger.error('Skipped record %r, wrong label format for %s', rec, rec['individual'])
+ continue
+ except KeyError, ke:
+ logger.error('Skipped record %r, missing key %s', rec, ke)
+ continue
+ ind_defs.append(idef)
+ logger.info('Retrieved %d individual definitions', len(ind_defs))
+ return ind_defs
+
+
+def get_parents_definitions(records, logger):
+ logger.info('Creating parents definitions')
+ parents_defs = []
+ for rec in records:
+ try:
+ pdef = dict()
+ pdef['individual'] = rec['individual']
+ if rec['father'] != 'None' or rec['mother'] != 'None':
+ pdef['father'] = rec['father']
+ pdef['mother'] = rec['mother']
+ parents_defs.append(pdef)
+ else:
+ continue
+ except KeyError, ke:
+ logger.error('Skipped record %r, missing key %s', rec, ke)
+ continue
+ logger.info('Retrieved %d parents definitions', len(parents_defs))
+ return parents_defs
+
+
+def main(argv):
+ parser = get_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('prepare_individuals_import', level=args.loglevel,
+ filename=args.logfile)
+
+ logger.info('Start processing file %s', args.in_file)
+
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ records = [row for row in reader]
+ logger.info('Loaded %d records', len(records))
+
+ individual_defs = get_individual_definitions(records, logger)
+ with open(args.out_individuals, 'w') as inds_out:
+ inds_writer = csv.DictWriter(inds_out,
+ ['study', 'label', 'gender', 'father', 'mother'],
+ delimiter='\t')
+ inds_writer.writeheader()
+ inds_writer.writerows(individual_defs)
+
+ parents_defs = get_parents_definitions(records, logger)
+ with open(args.out_parents, 'w') as parents_out:
+ parents_writer = csv.DictWriter(parents_out, ['individual', 'father', 'mother'],
+ delimiter='\t')
+ parents_writer.writeheader()
+ parents_writer.writerows(parents_defs)
+
+ logger.info('Job completed')
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_individuals_import.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_individuals_import.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,44 @@
+
+
+
+ Prepare input files for individuals import\parents update workflow
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=prepare_individuals_import.py
+ --logfile=${log_file}
+ --in-file=${in_file}
+ --out-individuals=${inds_out}
+ --out-parents=${parents_out}
+
+
+
+
+
+
+
+
+
+
+
+
+
+Split a file like::
+
+ individual gender father mother
+ ASTUDY:2141 MALE ASTUDY:12 ASTUDY:12341
+ ASTUDY:415 MALE ASTUDY:3562 ASTUDY:13612
+
+into two separated TSV files, the first one will be used to import new individuals and enrollments,
+the second one will be used to update father and mother informations for the individuals in the first
+file.
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_seq_dsample_inputs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_seq_dsample_inputs.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,283 @@
+"""
+This tool produces files that can be used as input to import
+* samples
+* flowcells
+* lanes
+* laneslots
+within OMERO.biobank using import applications.
+If the optional 'study-output-file' parameter is given as input, the
+script will produce the input file for a new study definition.
+If the optional 'tubes-subsamples-output-file' is given, the script
+will generate another file with tubes definitions where each tube is
+produced appliying a specific laboratory protocol to an existing
+tube. Existing tubes are the ones in tubes-out-file, new tubes' labels
+are created using the pattern ::
+The config_parameters field must point to a YAML configuration file
+with the following structure:
+
+ config_parameters:
+ study_label: study_label
+ namespace: namespace
+
+where study_label is mandatory
+"""
+
+import csv, sys, argparse, logging, yaml
+# Needed to import flowcell data
+from bioblend.galaxy import GalaxyInstance
+import nglimsclient, os
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='split sequencing samplesheet')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', '-i', type=str, required=True,
+ help='input file')
+ parser.add_argument('--tubes-out-file', type=str,
+ help='output file containing tube definitions',
+ default='./tubes_def.tsv')
+ parser.add_argument('--flowcells-out-file', type=str,
+ help='output file containing flowcell definitions',
+ default='./flowcells_def.tsv')
+ parser.add_argument('--lanes-out-file', type=str,
+ help='output file containing lane definitions',
+ default='./lanes_def.tsv')
+ parser.add_argument('--laneslots-out-file', type=str,
+ help='output file containing laneslot definitions',
+ default='./laneslots_def.tsv')
+ parser.add_argument('--config-parameters', type=str, required=True,
+ help='a YAML configuration file containing study label and labels namespace, '
+ 'namespace is optional')
+ parser.add_argument('--study-output-file', type=str,
+ help='output file containing study definition')
+ parser.add_argument('--tubes-subsamples-output-file', type=str,
+ help='output file containing tubes subsamples (samples produced applying a '
+ 'laboratory protocol to existing samples)')
+ return parser
+
+
+def get_samplesheet_translator(samplesheet_type='default'):
+ translator = {'default': {'flowcell_id': 'FCID',
+ 'tube_id': 'SampleID',
+ 'lane_id': 'Lane',
+ 'sample_tag': 'Index',
+ 'protocol': 'Recipe',
+ 'operator': 'Operator',
+ 'sample_project': 'SampleProject'}
+ }
+ return translator[samplesheet_type]
+
+def add_namespace(namespace, label, separator='|'):
+ return separator.join([namespace, label])
+
+def write_tubes_file(records, study_label, translator, ofile,
+ namespace = None, logger = None):
+ ofile_fields = ['study', 'label', 'vessel_type', 'vessel_content',
+ 'vessel_status', 'source', 'source_type']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ tubes_def = set([r[translator['tube_id'].strip()] for r in records])
+ for x in tubes_def:
+ writer.writerow({'study' : study_label,
+ 'label' : x if not namespace else add_namespace(namespace, x),
+ 'vessel_type' : 'Tube',
+ 'vessel_content' : 'DNA',
+ 'vessel_status' : 'UNKNOWN',
+ 'source' : 'None',
+ 'source_type' : 'NO_SOURCE'})
+
+
+def write_subsamples_file(records, study_label, translator, ofile,
+ namespace = None, logger = None):
+ ofile_fields = ['study', 'label', 'vessel_type', 'vessel_content',
+ 'vessel_status', 'source', 'source_type', 'options']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ subsamples_def = set([('%s::%s' % (r[translator['tube_id']].strip(), r[translator['protocol']].strip()),
+ r[translator['tube_id']].strip(),
+ r[translator['protocol']].strip()) for r in records])
+ for x in subsamples_def:
+ writer.writerow({'study' : study_label,
+ 'label' : x[0] if not namespace else add_namespace(namespace, x[0]),
+ 'vessel_type' : 'Tube',
+ 'vessel_content' : 'DNA',
+ 'vessel_status' : 'UNKNOWN',
+ 'source' : x[1] if not namespace else add_namespace(namespace, x[1]),
+ 'source_type' : 'Tube',
+ 'options' : 'protocol=%s' % x[2]})
+
+
+def write_flowcells_file(records, study_label, translator, ofile,
+ namespace = None, logger=None):
+ ofile_fields = ['study', 'label', 'barcode', 'container_status',
+ 'number_of_slots']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ flowcells_def = set([(r[translator['flowcell_id']].strip()) for r in records])
+ for x in flowcells_def:
+ writer.writerow({'study' : study_label,
+ 'label' : x if not namespace else add_namespace(namespace, x),
+ 'barcode' : x if not namespace else add_namespace(namespace, x),
+ 'container_status' : 'INSTOCK',
+ 'number_of_slots' : '8'})
+
+
+def write_lanes_file(records, study_label, translator, ofile,
+ namespace = None, logger=None):
+ ofile_fields = ['study', 'flow_cell', 'slot', 'container_status']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ lanes_def = set([(r[translator['flowcell_id']].strip(),
+ r[translator['lane_id']].strip())
+ for r in records])
+ for x in lanes_def:
+ writer.writerow({'study' : study_label,
+ 'flow_cell' : x[0] if not namespace else add_namespace(namespace, x[0]),
+ 'slot' : x[1],
+ 'container_status' : 'INSTOCK'})
+
+
+def write_laneslots_file(records, study_label, translator, ofile,
+ subsamples_enabled=False,
+ namespace = None, logger=None):
+ logger.debug ('subsamples_ensabled: %r' % subsamples_enabled)
+ ofile_fields = ['study', 'lane', 'tag', 'content', 'source',
+ 'source_type', 'options']
+ # Get NGLIMS host and key
+ try:
+ galaxy_host = os.environ['NGLIMS_GALAXY_HOST']
+ api_key = os.environ['NGLIMS_GALAXY_API_KEY']
+ except KeyError as ke:
+ msg = 'No environment variables %s set to configure access to the Galaxy server' % ke
+ sys.exit(msg)
+ # Get flowcell label (assuming label is the same for all records)
+ fc_id = records[0][translator['flowcell_id']].strip()
+ # Get flowcell details from nglims
+ gi = nglimsclient.setup(GalaxyInstance(galaxy_host, api_key))
+ if gi.nglims.exists_flowcell_id(fc_id):
+ fc_data = gi.nglims.flowcell_complete_details(fc_id)
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t')
+ writer.writeheader()
+ laneslots_def = set()
+ for r in records:
+ fc_id = r[translator['flowcell_id']].strip() if not namespace else \
+ add_namespace(namespace, r[translator['flowcell_id']]).strip()
+ if subsamples_enabled:
+ source_tube_id = '%s::%s' % (r[translator['tube_id']].strip(),
+ r[translator['protocol']].strip())
+ else:
+ source_tube_id = r[translator['tube_id']].strip()
+ # Identify adapter
+ adapter = [i['adapter'] for i in fc_data['details'] if i['name']==r[translator['tube_id']].strip() and i['lane']==int(r[translator['lane_id']].strip())]
+ laneslots_def.add(('%s:%s' % (fc_id, r[translator['lane_id']].strip()),
+ r[translator['sample_tag']].strip(),
+ source_tube_id,
+ r[translator['protocol']].strip(),
+ r[translator['operator']].strip(),
+ r[translator['sample_project']].strip(),
+ adapter[0]))
+ for x in laneslots_def:
+ writer.writerow({'study' : study_label,
+ 'lane' : x[0],
+ 'tag' : x[1],
+ 'content' : 'DNA',
+ 'source' : x[2] if not namespace else \
+ add_namespace(namespace, x[2]),
+ 'source_type' : 'Tube',
+ 'options' : 'protocol=%s,operator=%s,sample_project=%s,adapter=%s' %
+ (x[3], x[4], x[5], x[6])})
+
+
+def write_study_file(study_label, records, translator, ofile, logger=None):
+ ofile_fields = ['label', 'description']
+ with open(ofile, 'w') as out_file:
+ writer = csv.DictWriter(out_file, ofile_fields, delimiter='\t', )
+ writer.writeheader()
+ writer.writerow({'label': study_label})
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format' : LOG_FORMAT,
+ 'datefmt' : LOG_DATEFMT,
+ 'level' : log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger('prepare_seq_dsample_inputs')
+
+ with open(args.in_file, 'rU') as f:
+ logger.info('Loading data from file %s' % args.in_file)
+ reader = csv.DictReader(f, delimiter='\t')
+ recs = [r for r in reader]
+ translator = get_samplesheet_translator()
+
+ with open(args.config_parameters) as cfgf:
+ conf = yaml.load(cfgf)
+ if not conf.has_key('config_parameters'):
+ raise RuntimeError('Bad configuration file')
+ else:
+ try:
+ study_label = conf['config_parameters']['study_label']
+ except KeyError:
+ raise RuntimeError('No study_label provided')
+ if conf['config_parameters'].has_key('namespace'):
+ namespace = conf['config_parameters']['namespace']
+ else:
+ namespace = None
+
+ if args.study_output_file:
+ logger.info('Writing Study definition file %s' % args.study_output_file)
+ write_study_file(study_label, recs, translator, args.study_output_file, logger)
+ logger.info('Done writing file %s' % args.study_output_file)
+
+ logger.info('Writing Tube definitions file %s' % args.tubes_out_file)
+ write_tubes_file(recs, study_label, translator,
+ args.tubes_out_file, namespace,
+ logger)
+ logger.info('Done writing file %s' % args.tubes_out_file)
+
+ if args.tubes_subsamples_output_file:
+ logger.info('Writing Tubes\' subsamples definitions file %s' \
+ % args.tubes_subsamples_output_file)
+ write_subsamples_file(recs, study_label, translator,
+ args.tubes_subsamples_output_file,
+ namespace, logger)
+ logger.info('Done writing file %s' % args.tubes_subsamples_output_file)
+
+ logger.info('Writing FlowCell definitions file %s' % args.flowcells_out_file)
+ write_flowcells_file(recs, study_label, translator,
+ args.flowcells_out_file, namespace,
+ logger)
+ logger.info('Done writing file %s' % args.flowcells_out_file)
+
+ logger.info('Writing Lane definitions file %s' % args.lanes_out_file)
+ write_lanes_file(recs, study_label, translator,
+ args.lanes_out_file, namespace,
+ logger)
+ logger.info('Done writing file %s' % args.lanes_out_file)
+
+ logger.info('Writing LaneSlot definitions file %s' % args.laneslots_out_file)
+ write_laneslots_file(recs, study_label, translator,
+ args.laneslots_out_file,
+ 'tubes_subsamples_output_file' in args, # Check if subsamples have been created
+ namespace,
+ logger)
+ logger.info('Done writing file %s' % args.laneslots_out_file)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_seq_dsample_inputs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_seq_dsample_inputs.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,77 @@
+
+
+ Extract OMERO.biobank objects from a sequencing samplesheet
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=prepare_seq_dsample_inputs.py
+ --in-file=${infile}
+ --logfile=${log_file}
+ --tubes-out-file=${tubes_ofile}
+ --flowcells-out-file=${flowcells_ofile}
+ --lanes-out-file=${lanes_ofile}
+ --laneslots-out-file=${laneslots_ofile}
+ --config-parameters=${config_params}
+ --study-output-file=${study_ofile}
+ --tubes-subsamples-output-file=${subsamples_ofile}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool produces files that can be used as input to import
+ * samples
+ * flowcells
+ * lanes
+ * laneslots
+
+within OMERO.biobank using import applications.
+
+If the optional 'study-output-file' parameter is given as input, the
+script will produce the input file for a new study definition.
+
+If the optional 'tubes-subsamples-output-file' is given, the script
+will generate another file with tubes definitions where each tube is
+produced appliying a specific laboratory protocol to an existing
+tube. Existing tubes are the ones in tubes-out-file, new tubes' labels
+are created using the pattern **tube_label::protocol**
+The config_parameters field must point to a YAML configuration file
+with the following structure:
+
+ config_parameters:
+ study_label: study_label
+
+ namespace: namespace
+
+where study_label is mandatory
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_seq_out_inputs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_seq_out_inputs.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,99 @@
+"""
+This tool produces files that can be used as input to import
+ * SequencerOutput data samples
+ * SequencerOutput data objects
+within OMERO.biobank using import applications.
+
+Input file must be like
+
+ run_directory path
+ 130418_SN194_0303_BC1NYHACXX file:///SHARE/USERFS/els7/users/sequencing_data/completed/130418_SN194_0303_BC1NYHACXX/raw
+ 160418_SN194_0304_BCAZYHACXX file:///SHARE/USERFS/els7/users/sequencing_data/completed/160418_SN194_0304_BCAZYHACXX/raw
+ ....
+"""
+
+import csv, sys, argparse, logging
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='build sequencer output import files')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', '-i', type=str, required=True,
+ help='input file')
+ parser.add_argument('--dsamples-out-file', type=str, default='./seq_out_dsamples.tsv',
+ help='output file containing data samples definitions')
+ parser.add_argument('--dobjects-out-file', type=str, default='./seq_out_dobjects.tsv',
+ help='output file containing data objects definitions')
+ parser.add_argument('--study', '-s', type=str, required=True)
+ return parser
+
+
+def write_dsamples_file(records, out_file, study_label):
+
+ def parse_run_directoty(run_dir):
+ _, device, _, flowcell = run_dir.split('_')
+ return device, flowcell[1:]
+
+ with open(out_file, 'w') as ofile:
+ out_file_header = ['study', 'label', 'source', 'source_type', 'seq_dsample_type',
+ 'status', 'device']
+ writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
+ writer.writeheader()
+ for r in records:
+ device, flowcell = parse_run_directoty(r)
+ writer.writerow({'study': study_label,
+ 'label': r,
+ 'source': flowcell,
+ 'source_type': 'FlowCell',
+ 'seq_dsample_type': 'SequencerOutput',
+ 'status': 'USABLE',
+ 'device': device})
+
+
+def write_dobjects_file(records, out_file, study_label):
+ with open(out_file, 'w') as ofile:
+ out_file_header = ['study', 'path', 'data_sample', 'mimetype', 'size', 'sha1']
+ writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
+ writer.writeheader()
+ for r in records:
+ writer.writerow({'study': study_label,
+ 'path': r['path'],
+ 'data_sample': r['run_directory'],
+ 'mimetype': 'x-vl/illumina-run-folder',
+ 'size': '-1',
+ 'sha1': 'N.A.'})
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ log_level = getattr(logging, args.loglevel)
+ kwargs = {'format': LOG_FORMAT,
+ 'datefmt': LOG_DATEFMT,
+ 'level': log_level}
+ if args.logfile:
+ kwargs['filename'] = args.logfile
+ logging.basicConfig(**kwargs)
+ logger = logging.getLogger('prepare_seq_dsample_inputs')
+
+ with open(args.in_file) as f:
+ logger.info('Loading data from file %s', args.in_file)
+ reader = csv.DictReader(f, delimiter='\t')
+ recs = [r for r in reader]
+
+ logger.info('Writing DataSample data to file %s', args.dsamples_out_file)
+ write_dsamples_file(set([r['run_directory'] for r in recs]),
+ args.dsamples_out_file, args.study)
+ logger.info('Writing DataObjects data to file %s', args.dobjects_out_file)
+ write_dobjects_file(recs, args.dobjects_out_file, args.study)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/prepare_seq_out_inputs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/prepare_seq_out_inputs.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,46 @@
+
+
+ Extract OMERO.biobank objects that can be used to import SequencerOutput data
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=prepare_seq_out_inputs.py
+ --in-file=${infile}
+ --logfile=${log_file}
+ --dsamples-out-file=${dsamples_ofile}
+ --dobjects-out-file=${dobjects_ofile}
+ --study=${study}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool produces files that can be used as input to import
+ * SequencerOutput data samples
+ * SequencerOutput data objects
+within OMERO.biobank using import applications.
+
+Input file must be like
+
+ run_directory path
+ 130418_SN194_0303_BC1NYHACXX file:///SHARE/USERFS/els7/users/sequencing_data/completed/130418_SN194_0303_BC1NYHACXX/raw
+ 160418_SN194_0304_BCAZYHACXX file:///SHARE/USERFS/els7/users/sequencing_data/completed/160418_SN194_0304_BCAZYHACXX/raw
+ ....
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/split_by_study.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/split_by_study.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,90 @@
+"""
+Split a file like::
+
+ individual gender father mother
+ ASTUDY:2141 MALE ASTUDY:12 ASTUDY:12341
+ ASTUDY:415 MALE ASTUDY:3562 ASTUDY:13612
+ BSTUDY:12515 FEMALE BSTUDY:3512 BSTUDY:124
+
+into multiple files based on the STUDY value of the label stored in the "individual" column.
+Each label in the "individual" column must have a STUDY:ENROLLMENT_CODE format, otherwise the line
+will be skipped.
+"""
+
+import sys, argparse, csv, os
+from bl.vl.utils import LOG_LEVELS, get_logger
+
+
+def get_parser():
+ parser = argparse.ArgumentParser('Split a file containing pedigree informations in multiple files using the study as split criteria')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('--in-file', type=str, required=True,
+ help='input TSV file')
+ parser.add_argument('--record-id', type=int,
+ help='Output ID record given by Galaxy')
+ parser.add_argument('--out-path', type=str, help='Output directory',
+ default='.')
+ return parser
+
+
+def split_element(element, logger):
+ try:
+ study, code = element.split(':')
+ return study, code
+ except ValueError:
+ logger.error('Label %s is not a label with format STUDY:ENROLLMENT_CODE', element)
+ return None, None
+
+
+def map_by_study(records, logger):
+ records_map = {}
+ for rec in records:
+ study, code = split_element(rec['individual'], logger)
+ if not study and not code:
+ logger.debug('Skipping record %r', rec)
+ continue
+ records_map.setdefault(study, []).append(rec)
+ logger.info('Records splitted between %d studies', len(records_map.keys()))
+ return records_map
+
+
+def dump_records(study_label, records, header, output_path, logger, galaxy_record_id=None):
+
+ def get_file_name(study, out_path, galaxy_id=None):
+ if not galaxy_id:
+ file_name = '%s_individuals.tsv' % study
+ else:
+ file_name = 'primary_%d_%s_visible_tabular' % (galaxy_id, study.replace('_', '-'))
+ return os.path.join(out_path, file_name)
+
+ fname = get_file_name(study_label, output_path, galaxy_record_id)
+ with open(fname, 'w') as ofile:
+ logger.info('Dumping %d records to file %s', len(records), fname)
+ writer = csv.DictWriter(ofile, header, delimiter='\t')
+ writer.writeheader()
+ writer.writerows(records)
+
+
+def main(argv):
+ parser = get_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('split_by_study', level=args.loglevel, filename=args.logfile)
+
+ logger.info('Start processing file %s', args.in_file)
+ with open(args.in_file) as in_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ records = [row for row in reader]
+
+ records_map = map_by_study(records, logger)
+ # Force the header of the output files in order to prevent problems when running the workflow later
+ header = ['individual', 'gender', 'father', 'mother']
+ for study, records in records_map.iteritems():
+ dump_records(study, records, header, args.out_path, logger, args.record_id)
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/split_by_study.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/split_by_study.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,42 @@
+
+
+
+ Split a file containing pedigree informations in multiple files using the study as split criteria
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=split_by_study.py
+ --in-file=${in_file}
+ --logfile=${log_file}
+ --record-id=$log_file.id
+ --out-path=$__new_file_path__
+
+
+
+
+
+
+
+
+
+
+
+Split a file like::
+
+ individual gender father mother
+ ASTUDY:2141 MALE ASTUDY:12 ASTUDY:12341
+ ASTUDY:415 MALE ASTUDY:3562 ASTUDY:13612
+ BSTUDY:12515 FEMALE BSTUDY:3512 BSTUDY:124
+
+into multiple files based on the STUDY value of the label stored in the "individual" column.
+Each label in the "individual" column must have a STUDY:ENROLLMENT_CODE format, otherwise the line
+will be skipped.
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/wells_barcode_to_label.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/wells_barcode_to_label.py Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,112 @@
+"""
+Map PlateWell labels written as PLATE_BARCODE:WELL_LABEL to labels written as
+PLATE_LABEL:WELL_LABEL which is the PlateWell label format required by the map_vid
+application.
+The inputs are a TSV file and the label of the column of this file containing the
+PlateWell labels that are going to be mapped.
+"""
+
+import csv, argparse, sys, copy
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.utils import LOG_LEVELS, get_logger
+import bl.vl.utils.ome_utils as vlu
+
+
+def get_wells_map(kb, plate_barcodes, logger):
+ wells_map = {}
+ logger.info('Start building PlateWells map')
+ res = kb.get_by_field(kb.TiterPlate, 'barcode', plate_barcodes)
+ logger.debug('Plates %r --- Results: %r', plate_barcodes, res)
+ for _, pl in res.iteritems():
+ if pl.OME_TABLE == 'TiterPlate':
+ if pl.barcode:
+ for w in kb.get_wells_by_plate(pl):
+ logger.debug('Mapping well %s of plate %s', w.label, w.container.label)
+ wells_map['%s:%s' % (w.container.barcode, w.label)] = '%s:%s' % (w.container.label,
+ w.label)
+ else:
+ logger.debug('TiterPlate %s has no barcode', pl.label)
+ else:
+ logger.debug('Object is a %r, skipping it', pl.OME_TABLE)
+ logger.info('Mapped %d PlateWells', len(wells_map))
+ return wells_map
+
+
+def get_plates_list(records, plates_column, logger):
+ plates = set()
+ logger.info('Retrieving TiterPlate barcodes from %d records', len(records))
+ for r in records:
+ plates.add(r[plates_column].split(':')[0])
+ logger.info('Found %d TiterPlate objects', len(plates))
+ return list(plates)
+
+
+def make_parser():
+ parser = argparse.ArgumentParser('Map barcodes in PlateWell labels to TiterPlate labels')
+ parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+ parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+ help='logging level', default='INFO')
+ parser.add_argument('-H', '--host', type=str, help='OMERO host')
+ parser.add_argument('-U', '--user', type=str, help='OMERO user')
+ parser.add_argument('-P', '--passwd', type=str, help='OMERO password')
+ parser.add_argument('--in-file', type=str, required=True,
+ help='input TSV file')
+ parser.add_argument('--column-label', type=str, required=True,
+ help='the label of the columun containing the values that will be mapped')
+ parser.add_argument('--out-file', type=str, required=True,
+ help='output TSV file')
+ parser.add_argument('--strict-mapping', action='store_true',
+ help='if output records are less than the input ones, raise an error')
+ return parser
+
+
+def main(argv):
+ parser = make_parser()
+ args = parser.parse_args(argv)
+
+ logger = get_logger('wells_barcode_to_label', level=args.loglevel,
+ filename=args.logfile)
+ try:
+ host = args.host or vlu.ome_host()
+ user = args.user or vlu.ome_user()
+ passwd = args.passwd or vlu.ome_passwd()
+ except ValueError, ve:
+ logger.critical(ve)
+ sys.exit(ve)
+
+ logger.info('Starting job')
+
+ kb = KB(driver='omero')(host, user, passwd)
+ # wells_map = get_wells_map(kb, logger)
+
+ with open(args.in_file) as in_file, open(args.out_file, 'w') as out_file:
+ reader = csv.DictReader(in_file, delimiter='\t')
+ if args.column_label not in reader.fieldnames:
+ msg = 'No column %s in file %s' % (args.column_label, args.in_file)
+ logger.critical(msg)
+ raise RuntimeError(msg)
+ records = [row for row in reader]
+ plates = get_plates_list(records, args.column_label, logger)
+ wells_map = get_wells_map(kb, plates, logger)
+ logger.info('Mapping %d records', len(records))
+ writer = csv.DictWriter(out_file, reader.fieldnames, delimiter='\t')
+ writer.writeheader()
+ mapped_records = []
+ for rec in records:
+ mapped = copy.deepcopy(rec)
+ logger.debug('Mapping value %s', mapped[args.column_label])
+ if mapped[args.column_label] in wells_map:
+ mapped[args.column_label] = wells_map[mapped[args.column_label]]
+ mapped_records.append(mapped)
+ if args.strict_mapping and len(mapped_records) < len(records):
+ msg = 'Mapped %d record of %d' % (len(mapped_records), len(records))
+ logger.critical(msg)
+ sys.exit(msg)
+ logger.info('%d records mapped', len(mapped_records))
+ writer.writerows(mapped_records)
+ logger.info('Job completed')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank/utils/wells_barcode_to_label.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/wells_barcode_to_label.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,59 @@
+
+
+
+ Map plate barcodes in well labels to plate labels
+
+
+
+ launcher.sh
+ --interpreter=python
+ --runner=wells_barcode_to_label.py
+ --in-file=${in_file}
+ --logfile=${log_file}
+ --column-label=${column_label}
+ #if $strict_mapping
+ --strict-mapping
+ #end if
+ --out-file=${out_file}
+ --host=$__user_omero_host__
+ --user=$__user_omero_user__
+ --passwd=$__user_omero_password__
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Map PlateWell labels written as PLATE_BARCODE:WELL_LABEL to labels written as
+PLATE_LABEL:WELL_LABEL which is the PlateWell label format required by the map_vid
+application.
+The inputs are a TSV file and the label of the column of this file containing the
+PlateWell labels that are going to be mapped.
+
+
+
\ No newline at end of file
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/biobank_tool_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank_tool_conf.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,81 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r ba6cf6ede027 galaxy-tools/orione_biobank_tool_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/orione_biobank_tool_conf.xml Wed Sep 28 06:03:30 2016 -0400
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+