Mercurial > repos > bgruening > rnacommender
view data.py @ 0:768beb05387d draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
author | bgruening |
---|---|
date | Tue, 31 May 2016 04:29:57 -0400 |
parents | |
children |
line wrap: on
line source
"""Dataset handler.""" import numpy as np import pandas as pd from theano import config __author__ = "Gianluca Corrado" __copyright__ = "Copyright 2016, Gianluca Corrado" __license__ = "MIT" __maintainer__ = "Gianluca Corrado" __email__ = "gianluca.corrado@unitn.it" __status__ = "Production" class Dataset(object): """General dataset.""" def __init__(self, fp, fr, standardize_proteins=False, standardize_rnas=False): """ Constructor. Parameters ---------- fp : str Protein features fr : str The name of the HDF5 file containing features for the RNAs. """ self.Fp = fp.astype(config.floatX) store = pd.io.pytables.HDFStore(fr) self.Fr = store.features.astype(config.floatX) store.close() def load(self): """Load dataset in memory.""" raise NotImplementedError() class PredictDataset(Dataset): """Test dataset.""" def __init__(self, fp, fr): """ Constructor. Parameters ---------- fp : str The name of the HDF5 file containing features for the proteins. fr : str The name of the HDF5 file containing features for the RNAs. """ super(PredictDataset, self).__init__(fp, fr) def load(self): """ Load dataset in memory. Return ------ Examples to predict. For each example: - p contains the protein features, - r contains the RNA features, - p_names contains the name of the protein, - r_names contains the name of the RNA. """ protein_input_dim = self.Fp.shape[0] rna_input_dim = self.Fr.shape[0] num_examples = self.Fp.shape[1] * self.Fr.shape[1] p = np.zeros((num_examples, protein_input_dim)).astype(config.floatX) p_names = [] r = np.zeros((num_examples, rna_input_dim)).astype(config.floatX) r_names = [] index = 0 for protein in self.Fp.columns: for rna in self.Fr.columns: p[index] = self.Fp[protein] p_names.append(protein) r[index] = self.Fr[rna] r_names.append(rna) index += 1 return (p, np.array(p_names), r, np.array(r_names))