annotate data.py @ 0:d04fa5201f51 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
author rnateam
date Thu, 28 Jul 2016 05:56:54 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
1 """Dataset handler."""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
2
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
3 import numpy as np
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
4
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
5 import pandas as pd
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
6
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
7 __author__ = "Gianluca Corrado"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
8 __copyright__ = "Copyright 2016, Gianluca Corrado"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
9 __license__ = "MIT"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
10 __maintainer__ = "Gianluca Corrado"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
11 __email__ = "gianluca.corrado@unitn.it"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
12 __status__ = "Production"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
13
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
14
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
15 class Dataset(object):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
16 """General dataset."""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
17
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
18 def __init__(self, fp, fr, standardize_proteins=False,
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
19 standardize_rnas=False):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
20 """
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
21 Constructor.
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
22
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
23 Parameters
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
24 ----------
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
25 fp : str
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
26 Protein features
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
27
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
28 fr : str
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
29 The name of the HDF5 file containing features for the RNAs.
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
30 """
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
31 self.Fp = fp.astype('float32')
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
32
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
33 store = pd.io.pytables.HDFStore(fr)
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
34 self.Fr = store.features.astype('float32')
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
35 store.close()
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
36
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
37 def load(self):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
38 """Load dataset in memory."""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
39 raise NotImplementedError()
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
40
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
41
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
42 class PredictDataset(Dataset):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
43 """Test dataset."""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
44
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
45 def __init__(self, fp, fr):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
46 """
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
47 Constructor.
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
48
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
49 Parameters
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
50 ----------
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
51 fp : str
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
52 The name of the HDF5 file containing features for the proteins.
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
53
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
54 fr : str
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
55 The name of the HDF5 file containing features for the RNAs.
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
56 """
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
57 super(PredictDataset, self).__init__(fp, fr)
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
58
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
59 def load(self):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
60 """
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
61 Load dataset in memory.
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
62
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
63 Return
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
64 ------
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
65 Examples to predict. For each example:
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
66 - p contains the protein features,
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
67 - r contains the RNA features,
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
68 - p_names contains the name of the protein,
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
69 - r_names contains the name of the RNA.
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
70
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
71 """
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
72 protein_input_dim = self.Fp.shape[0]
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
73 rna_input_dim = self.Fr.shape[0]
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
74 num_examples = self.Fp.shape[1] * self.Fr.shape[1]
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
75 p = np.zeros((num_examples, protein_input_dim)).astype('float32')
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
76 p_names = []
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
77 r = np.zeros((num_examples, rna_input_dim)).astype('float32')
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
78 r_names = []
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
79 index = 0
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
80 for protein in self.Fp.columns:
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
81 for rna in self.Fr.columns:
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
82 p[index] = self.Fp[protein]
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
83 p_names.append(protein)
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
84 r[index] = self.Fr[rna]
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
85 r_names.append(rna)
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
86 index += 1
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
87
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
88 return (p, np.array(p_names), r, np.array(r_names))