annotate data.py @ 0:768beb05387d draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
author bgruening
date Tue, 31 May 2016 04:29:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
1 """Dataset handler."""
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
2
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
3 import numpy as np
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
4
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
5 import pandas as pd
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
6
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
7 from theano import config
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
8
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
9 __author__ = "Gianluca Corrado"
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
10 __copyright__ = "Copyright 2016, Gianluca Corrado"
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
11 __license__ = "MIT"
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
12 __maintainer__ = "Gianluca Corrado"
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
13 __email__ = "gianluca.corrado@unitn.it"
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
14 __status__ = "Production"
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
15
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
16
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
17 class Dataset(object):
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
18 """General dataset."""
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
19
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
20 def __init__(self, fp, fr, standardize_proteins=False,
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
21 standardize_rnas=False):
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
22 """
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
23 Constructor.
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
24
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
25 Parameters
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
26 ----------
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
27 fp : str
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
28 Protein features
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
29
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
30 fr : str
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
31 The name of the HDF5 file containing features for the RNAs.
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
32 """
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
33 self.Fp = fp.astype(config.floatX)
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
34
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
35 store = pd.io.pytables.HDFStore(fr)
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
36 self.Fr = store.features.astype(config.floatX)
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
37 store.close()
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
38
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
39 def load(self):
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
40 """Load dataset in memory."""
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
41 raise NotImplementedError()
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
42
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
43
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
44 class PredictDataset(Dataset):
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
45 """Test dataset."""
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
46
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
47 def __init__(self, fp, fr):
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
48 """
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
49 Constructor.
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
50
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
51 Parameters
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
52 ----------
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
53 fp : str
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
54 The name of the HDF5 file containing features for the proteins.
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
55
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
56 fr : str
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
57 The name of the HDF5 file containing features for the RNAs.
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
58 """
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
59 super(PredictDataset, self).__init__(fp, fr)
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
60
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
61 def load(self):
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
62 """
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
63 Load dataset in memory.
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
64
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
65 Return
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
66 ------
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
67 Examples to predict. For each example:
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
68 - p contains the protein features,
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
69 - r contains the RNA features,
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
70 - p_names contains the name of the protein,
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
71 - r_names contains the name of the RNA.
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
72
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
73 """
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
74 protein_input_dim = self.Fp.shape[0]
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
75 rna_input_dim = self.Fr.shape[0]
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
76 num_examples = self.Fp.shape[1] * self.Fr.shape[1]
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
77 p = np.zeros((num_examples, protein_input_dim)).astype(config.floatX)
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
78 p_names = []
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
79 r = np.zeros((num_examples, rna_input_dim)).astype(config.floatX)
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
80 r_names = []
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
81 index = 0
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
82 for protein in self.Fp.columns:
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
83 for rna in self.Fr.columns:
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
84 p[index] = self.Fp[protein]
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
85 p_names.append(protein)
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
86 r[index] = self.Fr[rna]
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
87 r_names.append(rna)
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
88 index += 1
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
89
768beb05387d planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit cc090387231a51b44f84298cd3e149fc6643abb0
bgruening
parents:
diff changeset
90 return (p, np.array(p_names), r, np.array(r_names))