diff main.py @ 0:d04fa5201f51 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
author rnateam
date Thu, 28 Jul 2016 05:56:54 -0400
parents
children 79c9b4b34b63
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main.py	Thu Jul 28 05:56:54 2016 -0400
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+"""Recommendation."""
+
+import argparse
+import sys
+from rbpfeatures import RBPVectorizer
+from data import PredictDataset
+from recommend import Predictor
+
+__author__ = "Gianluca Corrado"
+__copyright__ = "Copyright 2016, Gianluca Corrado"
+__license__ = "MIT"
+__maintainer__ = "Gianluca Corrado"
+__email__ = "gianluca.corrado@unitn.it"
+__status__ = "Production"
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('fasta', metavar='fasta', type=str,
+                        help="""Fasta file containing the RBP \
+                        sequences.""")
+
+    args = parser.parse_args()
+
+    v = RBPVectorizer(fasta=args.fasta)
+    rbp_fea = v.vectorize()
+
+    if rbp_fea is not None:
+        # Define and load dataset
+        D = PredictDataset(
+            fp=rbp_fea, fr="AURA_Human_data/RNA_features/HT_utrs.h5")
+        dataset = D.load()
+
+        model = "AURA_Human_data/model/trained_model.pkl"
+
+        # Define the Trainer and train the model
+        P = Predictor(predict_dataset=dataset,
+                      trained_model=model,
+                      serendipity_dic=model + '_',
+                      output="output.txt")
+        P.predict()
+    else:
+        sys.stdout.write("""
+        The queried protein has no domain similarity with the proteins in the training dataset. It cannot be predicted.
+        """)