view data_manager/install_packaged_annotation_data.py @ 0:632d33df6758 draft

"planemo upload commit 3dc5291eccd1fb516be67694c18a27bda5f69f91"
author wolma
date Thu, 16 Dec 2021 18:26:23 +0000
parents
children 691bcbfd775d
line wrap: on
line source

#!/usr/bin/env python

import argparse
import datetime
import json
import os
import re

from urllib.request import urlretrieve

import yaml


class PackagedAnnotationMeta():
    @classmethod
    def from_file(cls, fname):
        meta = yaml.safe_load(open(fname))
        return cls(meta)

    def __init__(self, meta_dict):
        if 'build' not in meta_dict:
            meta_dict['build'] = datetime.date.today().isoformat()
        if 'volume' not in meta_dict:
            meta_dict['volume'] = 1
        
        required_meta = ['name', 'build', 'volume', 'refgenome', 'records']
        for key in required_meta:
            if not meta_dict.get(key):
                raise KeyError(
                    'Required info "{0}" missing from metadata'
                    .format(key)
                )
        required_record_meta = ['id', 'name', 'version', 'format', 'source']
        for key in required_record_meta:
            for record in meta_dict['records']:
                if not record.get(key):
                    raise KeyError(
                        '{0}\n'
                        'Required info "{0}" missing from record metadata'
                        .format(record, key)
                    )
        self.meta = meta_dict
        self.meta['id'] = self._get_id()

    def _get_id(self):
        components = [
            self.meta['name'],
            self.meta['refgenome'],
            str(self.meta['volume']),
            str(self.meta['build'])
        ]
        return '__'.join(
            [
                re.sub(r'[^a-zA-Z_0-9\-]', '', i.replace(' ', '_'))
                for i in components
            ]
        )

    def records(self, full_record_names=False):
        for record in self.meta['records']:
            ret = record.copy()
            if full_record_names:
                ret['name'] = self._full_record_name(record)
            yield ret

    def fullname(self):
        return '{0} ({1}, vol:{2}/build:{3})'.format(
            self.meta['name'],
            self.meta['refgenome'],
            self.meta['volume'],
            self.meta['build']
        )

    def _full_record_name(self, record):
        return '{0} ({1}, {2}; from {3}/vol:{4}/build{5})'.format(
            record['name'], record['version'],
            self.meta['refgenome'],
            self.meta['name'],
            self.meta['volume'],
            self.meta['build']
        )                

    def dump(self, fname):
        with open(fname, 'w') as fo:
            yaml.dump(
                self.meta, fo, allow_unicode=False, default_flow_style=False
            )


def fetch_data(source_url, target_file):
    final_file, headers = urlretrieve(source_url, target_file)
    
def install_data(data, target_directory):
    # TODO: allow multiple FASTA input files
    fasta_base_name = os.path.split( fasta_filename )[-1]
    sym_linked_fasta_filename = os.path.join( target_directory, fasta_base_name )
    os.symlink( fasta_filename, sym_linked_fasta_filename )
    args = ['bowtie2-build', sym_linked_fasta_filename, index_id]
    proc = subprocess.Popen(args=args, shell=False, cwd=target_directory)
    return_code = proc.wait()
    if return_code:
        print("Error building index.", file=sys.stderr)
        sys.exit(return_code)
    return [' '.join(cmd_quote(arg) for arg in args)]


def meta_to_dm_records(meta, dbkey=None):
    data_table_rows = []
    for record in meta.records(full_record_names=True):
        data_table_rows.append(
            {
                'value': '{0}:{1}'.format(meta.meta['id'], record['id']),
                'dbkey': dbkey or meta.meta['refgenome'],
                'data_name': record['name'],
                'data_id': record['id'],
                'data_format': record['format'],
                'package_id': meta.meta['id'],
                'package_name': meta.fullname(),
                'path': ''
            }
        )
    return data_table_rows


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('metadata')
    parser.add_argument(
        '-o', '--galaxy-datamanager-json',
        required=True
    )
    parser.add_argument('-t', '--target-directory', default=None)
    parser.add_argument('--dbkey', default=None)
    args = parser.parse_args()

    
    if args.target_directory:
        if not os.path.isdir(args.target_directory):
            os.mkdir(args.target_directory)
    else:
        args.target_directory = os.getcwd()

    meta = PackagedAnnotationMeta.from_file(args.metadata)

    for record in meta.records():
        fetch_data(
            record['source'],
            os.path.join(args.target_directory, record['id'])
        )

    meta.dump(os.path.join(args.target_directory, 'meta.yml'))
    
    # Finally, we prepare the metadata for the new data table record ...
    data_manager_dict = {
        'data_tables': {
            'packaged_annotation_data': meta_to_dm_records(meta, args.dbkey)
        }
    }

    # ... and save it to the json results file
    with open(args.galaxy_datamanager_json, 'w') as fh:
        json.dump(data_manager_dict, fh, sort_keys=True)