annotate graphprot_train_wrapper.py @ 3:9a83a84a25a7 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
author bgruening
date Wed, 27 Jan 2021 19:27:12 +0000
parents adcc4c457c3c
children 58ebf089377e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1 #!/usr/bin/env python3
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
2
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
3 import argparse as ap
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
4 import os
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
5 import subprocess
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
6 import sys
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
7
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
8 import gplib
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
9
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
10
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
11 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
12
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
13 TOOL DEPENDENCIES
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
14 =================
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
15
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
16 GraphProt 1.1.7
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
17 Best install via:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
18 https://anaconda.org/bioconda/graphprot
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
19 Tested with: miniconda3, conda 4.7.12
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
20
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
21
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
22 OUTPUT FILES
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
23 ============
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
24
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
25 data_id.model
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
26 data_id.params
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
27 if not --disable-cv:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
28 data_id.cv_results
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
29 if not --disable-motifs:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
30 data_id.sequence_motif
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
31 data_id.sequence_motif.png
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
32 if --str-model:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
33 data_id.structure_motif
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
34 data_id.structure_motif.png
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
35 Temporary:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
36 data_id.predictions
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
37 data_id.profile
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
38
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
39
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
40 EXAMPLE CALLS
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
41 =============
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
42
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
43 python graphprot_train_wrapper.py --pos gp_data/SERBP1_positives.train.fa
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
44 --neg gp_data/SERBP1_negatives.train.fa --data-id test2 --disable-cv
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
45 --gp-output --opt-set-size 200 --min-train 400
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
46
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
47 python graphprot_train_wrapper.py --pos gp_data/SERBP1_positives.train.fa
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
48 --neg gp_data/SERBP1_negatives.train.fa --data-id test2 --disable-cv
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
49 --opt-set-size 100 --min-train 200
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
50
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
51 python graphprot_train_wrapper.py --pos test-data/test_positives.train.fa
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
52 --neg test-data/test_negatives.train.fa --data-id gptest2 --disable-cv
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
53 --opt-pos test-data/test_positives.parop.fa
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
54 --opt-neg test-data/test_negatives.parop.fa
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
55
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
56 python graphprot_train_wrapper.py --pos test-data/test_positives.train.fa
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
57 --neg test-data/test_negatives.train.fa --data-id gptest2 --disable-cv
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
58 --disable-motifs --opt-pos test-data/test_positives.parop.fa --opt-neg
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
59 test-data/test_negatives.parop.fa
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
60
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
61
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
62 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
63
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
64
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
65 ###############################################################################
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
66
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
67 def setup_argument_parser():
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
68 """Setup argparse parser."""
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
69 help_description = """
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
70 Galaxy wrapper script for GraphProt to train a GraphProt model on
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
71 a given set of input sequences (positives and negatives .fa). By
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
72 default a sequence model is trained (due to structure models
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
73 being much slower to train). Also by default take a portion of
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
74 the input sequences for hyperparameter optimization (HPO) prior to
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
75 model training, and run a 10-fold cross validation and motif
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
76 generation after model training. Thus the following output
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
77 files are produced:
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
78 .model model file, .params model parameter file, .png motif files
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
79 (sequence, or sequence+structure), .cv_results CV results file.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
80 After model training, predict on positives to get highest whole
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
81 site and profile scores found in binding sites. Take the median
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
82 score out of these to store in .params file, using it later
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
83 for outputting binding sites or peaks with higher confidence.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
84
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
85 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
86 # Define argument parser.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
87 p = ap.ArgumentParser(add_help=False,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
88 prog="graphprot_train_wrapper.py",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
89 description=help_description,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
90 formatter_class=ap.MetavarTypeHelpFormatter)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
91
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
92 # Argument groups.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
93 p_man = p.add_argument_group("REQUIRED ARGUMENTS")
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
94 p_opt = p.add_argument_group("OPTIONAL ARGUMENTS")
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
95
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
96 # Required arguments.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
97 p_opt.add_argument("-h", "--help",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
98 action="help",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
99 help="Print help message")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
100 p_man.add_argument("--pos",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
101 dest="in_pos_fa",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
102 type=str,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
103 required=True,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
104 help="Positive (= binding site) sequences .fa file "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
105 "for model training (option -fasta)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
106 p_man.add_argument("--neg",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
107 dest="in_neg_fa",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
108 type=str,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
109 required=True,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
110 help="Negative sequences .fa file for model "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
111 "training (option -negfasta)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
112 p_man.add_argument("--data-id",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
113 dest="data_id",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
114 type=str,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
115 required=True,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
116 help="Data ID (option -prefix)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
117 # Additional arguments.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
118 p_opt.add_argument("--opt-set-size",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
119 dest="opt_set_size",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
120 type=int,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
121 default=500,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
122 help="Hyperparameter optimization set size (taken "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
123 "away from both --pos and --neg) (default: 500)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
124 p_opt.add_argument("--opt-pos",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
125 dest="opt_pos_fa",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
126 type=str,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
127 help="Positive (= binding site) sequences .fa file "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
128 "for hyperparameter optimization (default: take "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
129 "--opt-set-size from --pos)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
130 p_opt.add_argument("--opt-neg",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
131 dest="opt_neg_fa",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
132 type=str,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
133 help="Negative sequences .fa file for hyperparameter "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
134 "optimization (default: take --opt-set-size "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
135 "from --neg)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
136 p_opt.add_argument("--min-train",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
137 dest="min_train",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
138 type=int,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
139 default=500,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
140 help="Minimum amount of training sites demanded "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
141 "(default: 500)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
142 p_opt.add_argument("--disable-cv",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
143 dest="disable_cv",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
144 default=False,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
145 action="store_true",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
146 help="Disable cross validation step (default: false)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
147 p_opt.add_argument("--disable-motifs",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
148 dest="disable_motifs",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
149 default=False,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
150 action="store_true",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
151 help="Disable motif generation step (default: false)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
152 p_opt.add_argument("--gp-output",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
153 dest="gp_output",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
154 default=False,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
155 action="store_true",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
156 help="Print output produced by GraphProt "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
157 "(default: false)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
158 p_opt.add_argument("--str-model",
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
159 dest="train_str_model",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
160 default=False,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
161 action="store_true",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
162 help="Train a structure model (default: train "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
163 "a sequence model)")
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
164 return p
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
165
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
166
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
167 ###############################################################################
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
168
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
169 if __name__ == '__main__':
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
170
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
171 # Setup argparse.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
172 parser = setup_argument_parser()
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
173 # Read in command line arguments.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
174 args = parser.parse_args()
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
175
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
176 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
177 Do all sorts of sanity checking.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
178
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
179 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
180 # Check for Linux.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
181 assert "linux" in sys.platform, "please use Linux"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
182 # Check tool availability.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
183 assert gplib.is_tool("GraphProt.pl"), "GraphProt.pl not in PATH"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
184 # Check file inputs.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
185 assert os.path.exists(args.in_pos_fa), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
186 "positives .fa file \"%s\" not found" % (args.in_pos_fa)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
187 assert os.path.exists(args.in_neg_fa), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
188 "negatives .fa file \"%s\" not found" % (args.in_neg_fa)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
189 # Count .fa entries.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
190 c_pos_fa = gplib.count_fasta_headers(args.in_pos_fa)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
191 c_neg_fa = gplib.count_fasta_headers(args.in_neg_fa)
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
192 assert c_pos_fa, "positives .fa file \"%s\" no headers found" % \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
193 (args.in_pos_fa)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
194 assert c_neg_fa, "negatives .fa file \"%s\" no headers found" % \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
195 (args.in_neg_fa)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
196 print("# positive .fa sequences: %i" % (c_pos_fa))
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
197 print("# negative .fa sequences: %i" % (c_neg_fa))
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
198 # Check additional files.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
199 if args.opt_pos_fa:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
200 assert args.opt_neg_fa, "--opt-pos but no --opt-neg given"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
201 if args.opt_neg_fa:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
202 assert args.opt_pos_fa, "--opt-neg but no --opt-pos given"
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
203 # Check for lowercase only sequences, which cause GP to crash.
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
204 error_mess = "input sequences encountered containing "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
205 "only lowercase characters or lowercase characters in between "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
206 "uppercase characters. Please provide either all uppercase "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
207 "sequences or sequences containing uppercase regions surrounded "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
208 "by lowercase context regions for structure calculation (see "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
209 "viewpoint concept in original GraphProt publication "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
210 "for more details)"
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
211 seqs_dic = gplib.read_fasta_into_dic(args.in_pos_fa)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
212 bad_ids = gplib.check_seqs_dic_format(seqs_dic)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
213 assert not bad_ids, "%s" % (error_mess)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
214 seqs_dic = gplib.read_fasta_into_dic(args.in_neg_fa)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
215 bad_ids = gplib.check_seqs_dic_format(seqs_dic)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
216 assert not bad_ids, "%s" % (error_mess)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
217 if args.opt_pos_fa:
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
218 seqs_dic = gplib.read_fasta_into_dic(args.opt_pos_fa)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
219 bad_ids = gplib.check_seqs_dic_format(seqs_dic)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
220 assert not bad_ids, "%s" % (error_mess)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
221 if args.opt_neg_fa:
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
222 seqs_dic = gplib.read_fasta_into_dic(args.opt_neg_fa)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
223 bad_ids = gplib.check_seqs_dic_format(seqs_dic)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
224 assert not bad_ids, "%s" % (error_mess)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
225
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
226 # If parop .fa files given.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
227 if args.opt_pos_fa and args.opt_neg_fa:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
228 c_parop_pos_fa = gplib.count_fasta_headers(args.opt_pos_fa)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
229 c_parop_neg_fa = gplib.count_fasta_headers(args.opt_neg_fa)
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
230 assert c_parop_pos_fa, "--opt-pos .fa file \"%s\" no headers found" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
231 % (args.opt_pos_fa)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
232 assert c_parop_neg_fa, "--opt-neg .fa file \"%s\" no headers found" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
233 % (args.opt_neg_fa)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
234 # Less than 500 for training?? You gotta be kidding.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
235 assert c_pos_fa >= args.min_train, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
236 "--pos for training < %i, please provide more (try at least "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
237 "> 1000, the more the better)" % (args.min_train)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
238 assert c_neg_fa >= args.min_train, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
239 "--neg for training < %i, please provide more (try at least "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
240 "> 1000, the more the better)" % (args.min_train)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
241 # Looking closer at ratios.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
242 pos_neg_ratio = c_parop_pos_fa / c_parop_neg_fa
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
243 if pos_neg_ratio < 0.8 or pos_neg_ratio > 1.25:
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
244 assert 0, "ratio of --opt-pos to --opt-neg < 0.8 or > 1.25 "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
245 "(ratio = %f). Try to keep ratio closer to 1 or better use "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
246 "identical numbers (keep in mind that performance measures "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
247 "such as accuracy or AUROC are not suitable for imbalanced "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
248 " datasets!)" % (pos_neg_ratio)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
249 else:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
250 # Define some minimum amount of training sites for the sake of sanity.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
251 c_pos_train = c_pos_fa - args.opt_set_size
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
252 c_neg_train = c_neg_fa - args.opt_set_size
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
253 # Start complaining.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
254 assert c_pos_fa >= args.opt_set_size, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
255 "# positives < --opt-set-size (%i < %i)" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
256 % (c_pos_fa, args.opt_set_size)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
257 assert c_neg_fa >= args.opt_set_size, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
258 "# negatives < --opt-set-size (%i < %i)" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
259 % (c_neg_fa, args.opt_set_size)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
260 assert c_pos_train >= args.opt_set_size, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
261 "# positives remaining for training < --opt-set-size "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
262 "(%i < %i)" % (c_pos_train, args.opt_set_size)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
263 assert c_neg_train >= args.opt_set_size, "# negatives remaining "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
264 "for training < --opt-set-size (%i < %i)" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
265 % (c_neg_train, args.opt_set_size)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
266 # Less than 500?? You gotta be kidding.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
267 assert c_pos_train >= args.min_train, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
268 "# positives remaining for training < %i, please provide more "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
269 " (try at least > 1000, the more the better)" % (args.min_train)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
270 assert c_neg_train >= args.min_train, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
271 "# negatives remaining for training < %i, please provide more "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
272 "(try at least > 1000, the more the better)" % (args.min_train)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
273 # Looking closer at ratios.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
274 pos_neg_ratio = c_pos_train / c_neg_train
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
275 if pos_neg_ratio < 0.8 or pos_neg_ratio > 1.25:
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
276 assert 0, "ratio of --pos to --neg < 0.8 or > 1.25 "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
277 "(ratio = %f). Try to keep ratio closer to 1 or better use "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
278 "identical numbers (keep in mind that performance measures "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
279 "such as accuracy or AUROC are not suitable for imbalanced "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
280 "datasets!)" % (pos_neg_ratio)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
281
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
282 """
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
283 Generate parop + train .fa output files for hyperparameter
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
284 optimization + training.
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
285
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
286 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
287 # Output files for training.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
288 pos_parop_fa = args.data_id + ".positives.parop.fa"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
289 neg_parop_fa = args.data_id + ".negatives.parop.fa"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
290 pos_train_fa = args.data_id + ".positives.train.fa"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
291 neg_train_fa = args.data_id + ".negatives.train.fa"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
292
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
293 # If parop .fa files given.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
294 if args.opt_pos_fa and args.opt_neg_fa:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
295 # Just copy parop and train files.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
296 gplib.make_file_copy(args.opt_pos_fa, pos_parop_fa)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
297 gplib.make_file_copy(args.opt_neg_fa, neg_parop_fa)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
298 gplib.make_file_copy(args.in_pos_fa, pos_train_fa)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
299 gplib.make_file_copy(args.in_neg_fa, neg_train_fa)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
300 else:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
301 # Generate parop + train .fa files from input .fa files.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
302 gplib.split_fasta_into_test_train_files(args.in_pos_fa, pos_parop_fa,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
303 pos_train_fa,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
304 test_size=args.opt_set_size)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
305 gplib.split_fasta_into_test_train_files(args.in_neg_fa, neg_parop_fa,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
306 neg_train_fa,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
307 test_size=args.opt_set_size)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
308
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
309 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
310 Do the hyperparameter optimization.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
311
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
312 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
313 print("Starting hyperparameter optimization (-action ls) ... ")
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
314 check_cmd = "GraphProt.pl -action ls -prefix " + args.data_id + \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
315 " -fasta " + pos_parop_fa + " -negfasta " + neg_parop_fa
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
316 # If sequence model should be trained (default).
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
317 if not args.train_str_model:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
318 check_cmd += " -onlyseq"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
319 print(check_cmd)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
320 output = subprocess.getoutput(check_cmd)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
321 params_file = args.data_id + ".params"
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
322 assert os.path.exists(params_file), "Hyperparameter optimization output "\
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
323 " .params file \"%s\" not found" % (params_file)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
324 # Add model type to params file.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
325 if args.train_str_model:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
326 gplib.echo_add_to_file("model_type: structure", params_file)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
327 else:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
328 gplib.echo_add_to_file("model_type: sequence", params_file)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
329 # Get parameter string.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
330 param_string = gplib.graphprot_get_param_string(params_file)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
331
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
332 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
333 Do the model training. (Yowza!)
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
334
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
335 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
336 print("Starting model training (-action train) ... ")
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
337 check_cmd = "GraphProt.pl -action train -prefix " + args.data_id \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
338 + " -fasta " + pos_train_fa + " -negfasta " + neg_train_fa \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
339 + " " + param_string
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
340 print(check_cmd)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
341 output = subprocess.getoutput(check_cmd)
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
342 assert output, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
343 "The following call of GraphProt.pl produced no output:\n%s" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
344 % (check_cmd)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
345 if args.gp_output:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
346 print(output)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
347 model_file = args.data_id + ".model"
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
348 assert os.path.exists(model_file), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
349 "Training output .model file \"%s\" not found" % (model_file)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
350
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
351 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
352 Do the 10-fold cross validation.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
353
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
354 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
355 if not args.disable_cv:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
356 print("Starting 10-fold cross validation (-action cv) ... ")
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
357 check_cmd = "GraphProt.pl -action cv -prefix " + args.data_id \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
358 + " -fasta " + pos_train_fa + " -negfasta " + neg_train_fa \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
359 + " " + param_string + " -model " + model_file
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
360 print(check_cmd)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
361 output = subprocess.getoutput(check_cmd)
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
362 assert output, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
363 "The following call of GraphProt.pl produced no output:\n%s" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
364 % (check_cmd)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
365 if args.gp_output:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
366 print(output)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
367 cv_results_file = args.data_id + ".cv_results"
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
368 assert os.path.exists(cv_results_file), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
369 "CV output .cv_results file \"%s\" not found" % (cv_results_file)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
370
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
371 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
372 Do the motif generation.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
373
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
374 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
375 if not args.disable_motifs:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
376 print("Starting motif generation (-action motif) ... ")
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
377 check_cmd = "GraphProt.pl -action motif -prefix " + args.data_id \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
378 + " -fasta " + pos_train_fa + " -negfasta " + neg_train_fa \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
379 + " " + param_string + " -model " + model_file
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
380 print(check_cmd)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
381 output = subprocess.getoutput(check_cmd)
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
382 assert output, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
383 "The following call of GraphProt.pl produced no output:\n%s" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
384 % (check_cmd)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
385 if args.gp_output:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
386 print(output)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
387 seq_motif_file = args.data_id + ".sequence_motif"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
388 seq_motif_png_file = args.data_id + ".sequence_motif.png"
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
389 assert os.path.exists(seq_motif_file), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
390 "Motif output .sequence_motif file \"%s\" not found" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
391 % (seq_motif_file)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
392 assert os.path.exists(seq_motif_png_file), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
393 "Motif output .sequence_motif.png file \"%s\" not found" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
394 % (seq_motif_png_file)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
395 if args.train_str_model:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
396 str_motif_file = args.data_id + ".structure_motif"
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
397 str_motif_png_file = args.data_id + ".structure_motif.png"
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
398 assert os.path.exists(str_motif_file), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
399 "Motif output .structure_motif file \"%s\" not found" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
400 % (str_motif_file)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
401 assert os.path.exists(str_motif_png_file), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
402 "Motif output .structure_motif.png file \"%s\" not found" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
403 % (str_motif_png_file)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
404
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
405 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
406 Do whole site predictions on positive training set.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
407
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
408 """
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
409 print("Starting whole site predictions on positive training set "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
410 " (-action predict) ... ")
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
411 check_cmd = "GraphProt.pl -action predict -prefix " + args.data_id \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
412 + " -fasta " + pos_train_fa + " " + param_string \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
413 + " -model " + model_file
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
414 print(check_cmd)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
415 output = subprocess.getoutput(check_cmd)
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
416 assert output, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
417 "The following call of GraphProt.pl produced no output:\n%s" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
418 % (check_cmd)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
419 if args.gp_output:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
420 print(output)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
421 ws_predictions_file = args.data_id + ".predictions"
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
422 assert os.path.exists(ws_predictions_file), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
423 "Whole site prediction output .predictions file \"%s\" not found" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
424 % (ws_predictions_file)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
425
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
426 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
427 Do profile predictions on positive training set.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
428
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
429 """
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
430 print("Starting profile predictions on positive training set "
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
431 "-action predict_profile) ... ")
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
432 check_cmd = "GraphProt.pl -action predict_profile -prefix " \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
433 + args.data_id + " -fasta " + pos_train_fa + " " \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
434 + param_string + " -model " + model_file
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
435 print(check_cmd)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
436 output = subprocess.getoutput(check_cmd)
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
437 assert output, \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
438 "The following call of GraphProt.pl produced no output:\n%s" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
439 % (check_cmd)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
440 if args.gp_output:
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
441 print(output)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
442 profile_predictions_file = args.data_id + ".profile"
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
443 assert os.path.exists(profile_predictions_file), \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
444 "Profile prediction output .profile file \"%s\" not found" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
445 % (profile_predictions_file)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
446
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
447 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
448 Get 50 % score (median) for .predictions and .profile file.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
449 For .profile, first extract for each site the maximum score, and then
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
450 from the list of maximum site scores get the median.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
451 For whole site .predictions, get the median from the site scores list.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
452
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
453 """
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
454 print("Getting .profile and .predictions median scores ... ")
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
455
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
456 # Whole site scores median.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
457 ws_pred_median = \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
458 gplib.graphprot_predictions_get_median(ws_predictions_file)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
459 # Profile top site scores median.
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
460 profile_median = \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
461 gplib.graphprot_profile_get_tsm(profile_predictions_file,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
462 profile_type="profile")
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
463 ws_pred_string = "pos_train_ws_pred_median: %f" % (ws_pred_median)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
464 profile_string = "pos_train_profile_median: %f" % (profile_median)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
465 gplib.echo_add_to_file(ws_pred_string, params_file)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
466 gplib.echo_add_to_file(profile_string, params_file)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
467 # Average profile top site scores median for extlr 1 to 10.
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
468 for i in range(10):
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
469 i += 1
3
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
470 avg_profile_median = \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
471 gplib.graphprot_profile_get_tsm(profile_predictions_file,
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
472 profile_type="avg_profile",
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
473 avg_profile_extlr=i)
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
474
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
475 avg_profile_string = "pos_train_avg_profile_median_%i: %f" \
9a83a84a25a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
476 % (i, avg_profile_median)
1
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
477 gplib.echo_add_to_file(avg_profile_string, params_file)
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
478
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
479 print("Script: I'm done.")
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
480 print("Author: Good. Now go back to your file system directory.")
adcc4c457c3c "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
481 print("Script: Ok.")