Mercurial > repos > rnateam > graphprot_predict_profile
annotate gplib.py @ 3:9a83a84a25a7 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
author | bgruening |
---|---|
date | Wed, 27 Jan 2021 19:27:12 +0000 |
parents | adcc4c457c3c |
children | 58ebf089377e |
rev | line source |
---|---|
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
2 import gzip |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
3 import random |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
4 import re |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
5 import statistics |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
6 import subprocess |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
7 from distutils.spawn import find_executable |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
8 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
9 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
10 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
11 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
12 Run doctests: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
13 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
14 python3 -m doctest gplib.py |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
15 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
16 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
17 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
18 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
19 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
20 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
21 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
22 def graphprot_predictions_get_median(predictions_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
23 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
24 Given a GraphProt .predictions file, read in site scores and return |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
25 the median value. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
26 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
27 >>> test_file = "test-data/test.predictions" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
28 >>> graphprot_predictions_get_median(test_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
29 0.571673 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
30 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
31 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
32 # Site scores list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
33 sc_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
34 with open(predictions_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
35 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
36 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
37 score = float(cols[2]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
38 sc_list.append(score) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
39 f.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
40 # Return the median. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
41 return statistics.median(sc_list) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
42 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
43 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
44 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
45 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
46 def graphprot_profile_get_tsm(profile_file, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
47 profile_type="profile", |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
48 avg_profile_extlr=5): |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
49 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
50 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
51 Given a GraphProt .profile file, extract for each site (identified by |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
52 column 1 ID) the top (= highest) score. Then return the median of these |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
53 top scores. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
54 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
55 profile_type can be either "profile" or "avg_profile". |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
56 "avg_profile means that the position-wise scores will first get smoothed |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
57 out by calculating for each position a new score through taking a |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
58 sequence window -avg_profile_extlr to +avg_profile_extlr of the position |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
59 and calculate the mean score over this window and assign it to the |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
60 position. After that, the maximum score of each site is chosen, and the |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
61 median over all maximum scores is returned. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
62 "profile" leaves the position-wise scores as they are, directly extracting |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
63 the maximum for each site and then reporting the median. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
64 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
65 >>> test_file = "test-data/test.profile" |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
66 >>> graphprot_profile_get_tsm(test_file) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
67 3.2 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
68 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
69 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
70 # Dictionary of lists, with list of scores (value) for each site (key). |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
71 lists_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
72 with open(profile_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
73 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
74 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
75 seq_id = cols[0] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
76 score = float(cols[2]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
77 if seq_id in lists_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
78 lists_dic[seq_id].append(score) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
79 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
80 lists_dic[seq_id] = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
81 lists_dic[seq_id].append(score) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
82 f.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
83 # For each site, extract maximum and store in new list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
84 max_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
85 for seq_id in lists_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
86 if profile_type == "profile": |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
87 max_sc = max(lists_dic[seq_id]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
88 max_list.append(max_sc) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
89 elif profile_type == "avg_profile": |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
90 # Convert profile score list to average profile scores list. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
91 aps_list = \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
92 list_moving_window_average_values(lists_dic[seq_id], |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
93 win_extlr=avg_profile_extlr) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
94 max_sc = max(aps_list) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
95 max_list.append(max_sc) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
96 else: |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
97 assert 0, "invalid profile_type argument given: \"%s\"" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
98 % (profile_type) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
99 # Return the median. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
100 return statistics.median(max_list) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
101 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
102 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
103 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
104 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
105 def list_moving_window_average_values(in_list, |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
106 win_extlr=5, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
107 method=1): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
108 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
109 Take a list of numeric values, and calculate for each position a new value, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
110 by taking the mean value of the window of positions -win_extlr and |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
111 +win_extlr. If full extension is not possible (at list ends), it just |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
112 takes what it gets. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
113 Two implementations of the task are given, chose by method=1 or method=2. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
114 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
115 >>> test_list = [2, 3, 5, 8, 4, 3, 7, 1] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
116 >>> list_moving_window_average_values(test_list, win_extlr=2, method=1) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
117 [3.3333333333333335, 4.5, 4.4, 4.6, 5.4, 4.6, 3.75, 3.6666666666666665] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
118 >>> list_moving_window_average_values(test_list, win_extlr=2, method=2) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
119 [3.3333333333333335, 4.5, 4.4, 4.6, 5.4, 4.6, 3.75, 3.6666666666666665] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
120 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
121 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
122 l_list = len(in_list) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
123 assert l_list, "Given list is empty" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
124 new_list = [0] * l_list |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
125 if win_extlr == 0: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
126 return l_list |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
127 if method == 1: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
128 for i in range(l_list): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
129 s = i - win_extlr |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
130 e = i + win_extlr + 1 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
131 if s < 0: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
132 s = 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
133 if e > l_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
134 e = l_list |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
135 # Extract portion and assign value to new list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
136 new_list[i] = statistics.mean(in_list[s:e]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
137 elif method == 2: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
138 for i in range(l_list): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
139 s = i - win_extlr |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
140 e = i + win_extlr + 1 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
141 if s < 0: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
142 s = 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
143 if e > l_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
144 e = l_list |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
145 ln = e - s |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
146 sc_sum = 0 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
147 for j in range(ln): |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
148 sc_sum += in_list[s + j] |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
149 new_list[i] = sc_sum / ln |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
150 else: |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
151 assert 0, "invalid method ID given (%i)" % (method) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
152 return new_list |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
153 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
154 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
155 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
156 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
157 def echo_add_to_file(echo_string, out_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
158 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
159 Add a string to file, using echo command. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
160 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
161 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
162 check_cmd = 'echo "%s" >> %s' % (echo_string, out_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
163 output = subprocess.getoutput(check_cmd) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
164 error = False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
165 if output: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
166 error = True |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
167 assert not error, "echo is complaining:\n%s\n%s" % (check_cmd, output) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
168 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
169 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
170 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
171 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
172 def is_tool(name): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
173 """Check whether tool "name" is in PATH.""" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
174 return find_executable(name) is not None |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
175 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
176 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
177 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
178 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
179 def count_fasta_headers(fasta_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
180 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
181 Count number of FASTA headers in fasta_file using grep. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
182 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
183 >>> test_file = "test-data/test.fa" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
184 >>> count_fasta_headers(test_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
185 2 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
186 >>> test_file = "test-data/empty_file" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
187 >>> count_fasta_headers(test_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
188 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
189 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
190 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
191 check_cmd = 'grep -c ">" ' + fasta_file |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
192 output = subprocess.getoutput(check_cmd) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
193 row_count = int(output.strip()) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
194 return row_count |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
195 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
196 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
197 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
198 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
199 def make_file_copy(in_file, out_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
200 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
201 Make a file copy by copying in_file to out_file. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
202 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
203 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
204 check_cmd = "cat " + in_file + " > " + out_file |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
205 assert in_file != out_file, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
206 "cat does not like to cat file into same file (%s)" % (check_cmd) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
207 output = subprocess.getoutput(check_cmd) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
208 error = False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
209 if output: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
210 error = True |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
211 assert not error, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
212 "cat did not like your input (in_file: %s, out_file: %s):\n%s" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
213 % (in_file, out_file, output) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
214 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
215 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
216 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
217 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
218 def split_fasta_into_test_train_files(in_fasta, test_out_fa, train_out_fa, |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
219 test_size=500): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
220 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
221 Split in_fasta .fa file into two files (e.g. test, train). |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
222 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
223 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
224 # Read in in_fasta. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
225 seqs_dic = read_fasta_into_dic(in_fasta) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
226 # Shuffle IDs. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
227 rand_ids_list = random_order_dic_keys_into_list(seqs_dic) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
228 c_out = 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
229 TESTOUT = open(test_out_fa, "w") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
230 TRAINOUT = open(train_out_fa, "w") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
231 for seq_id in rand_ids_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
232 seq = seqs_dic[seq_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
233 if (c_out >= test_size): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
234 TRAINOUT.write(">%s\n%s\n" % (seq_id, seq)) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
235 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
236 TESTOUT.write(">%s\n%s\n" % (seq_id, seq)) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
237 c_out += 1 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
238 TESTOUT.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
239 TRAINOUT.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
240 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
241 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
242 ############################################################################### |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
243 |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
244 def check_seqs_dic_format(seqs_dic): |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
245 """ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
246 Check sequence dictionary for lowercase-only sequences or sequences |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
247 wich have lowercase nts in between uppercase nts. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
248 Return suspicious IDs as list or empty list if not hits. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
249 IDs with lowercase-only sequences. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
250 |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
251 >>> seqs_dic = {"id1" : "acguACGU", "id2" : "acgua", "id3" : "acgUUaUcc"} |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
252 >>> check_seqs_dic_format(seqs_dic) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
253 ['id2', 'id3'] |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
254 >>> seqs_dic = {"id1" : "acgAUaa", "id2" : "ACGUACUA"} |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
255 >>> check_seqs_dic_format(seqs_dic) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
256 [] |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
257 |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
258 """ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
259 assert seqs_dic, "given seqs_dic empty" |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
260 bad_seq_ids = [] |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
261 for seq_id in seqs_dic: |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
262 seq = seqs_dic[seq_id] |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
263 if re.search("^[acgtun]+$", seq): |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
264 bad_seq_ids.append(seq_id) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
265 if re.search("[ACGTUN][acgtun]+[ACGTUN]", seq): |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
266 bad_seq_ids.append(seq_id) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
267 return bad_seq_ids |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
268 |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
269 |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
270 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
271 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
272 def read_fasta_into_dic(fasta_file, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
273 seqs_dic=False, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
274 ids_dic=False, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
275 read_dna=False, |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
276 short_ensembl=False, |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
277 reject_lc=False, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
278 convert_to_uc=False, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
279 skip_n_seqs=True): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
280 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
281 Read in FASTA sequences, convert to RNA, store in dictionary |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
282 and return dictionary. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
283 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
284 >>> test_fasta = "test-data/test.fa" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
285 >>> read_fasta_into_dic(test_fasta) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
286 {'seq1': 'acguACGUacgu', 'seq2': 'ugcaUGCAugcaACGUacgu'} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
287 >>> test_fasta = "test-data/test2.fa" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
288 >>> read_fasta_into_dic(test_fasta) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
289 {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
290 >>> test_fasta = "test-data/test.ensembl.fa" |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
291 >>> read_fasta_into_dic(test_fasta, read_dna=True, short_ensembl=True) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
292 {'ENST00000415118': 'GAAATAGT', 'ENST00000448914': 'ACTGGGGGATACGAAAA'} |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
293 >>> test_fasta = "test-data/test4.fa" |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
294 >>> read_fasta_into_dic(test_fasta) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
295 {'1': 'gccuAUGUuuua', '2': 'cugaAACUaugu'} |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
296 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
297 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
298 if not seqs_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
299 seqs_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
300 seq_id = "" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
301 seq = "" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
302 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
303 # Go through FASTA file, extract sequences. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
304 if re.search(r".+\.gz$", fasta_file): |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
305 f = gzip.open(fasta_file, 'rt') |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
306 else: |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
307 f = open(fasta_file, "r") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
308 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
309 if re.search(">.+", line): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
310 m = re.search(">(.+)", line) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
311 seq_id = m.group(1) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
312 # If there is a ".", take only first part of header. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
313 # This assumes ENSEMBL header format ">ENST00000631435.1 cdna ..." |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
314 if short_ensembl: |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
315 if re.search(r".+\..+", seq_id): |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
316 m = re.search(r"(.+?)\..+", seq_id) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
317 seq_id = m.group(1) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
318 assert seq_id not in seqs_dic, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
319 "non-unique FASTA header \"%s\" in \"%s\"" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
320 % (seq_id, fasta_file) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
321 if ids_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
322 if seq_id in ids_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
323 seqs_dic[seq_id] = "" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
324 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
325 seqs_dic[seq_id] = "" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
326 elif re.search("[ACGTUN]+", line, re.I): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
327 if seq_id in seqs_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
328 m = re.search("([ACGTUN]+)", line, re.I) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
329 seq = m.group(1) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
330 if reject_lc: |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
331 assert \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
332 not re.search("[a-z]", seq), \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
333 "lc char detected in seq \"%i\" (reject_lc=True)" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
334 % (seq_id) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
335 if convert_to_uc: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
336 seq = seq.upper() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
337 # If sequences with N nucleotides should be skipped. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
338 if skip_n_seqs: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
339 if "n" in m.group(1) or "N" in m.group(1): |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
340 print("WARNING: \"%s\" contains N. Discarding " |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
341 "sequence ... " % (seq_id)) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
342 del seqs_dic[seq_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
343 continue |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
344 # Convert to RNA, concatenate sequence. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
345 if read_dna: |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
346 seqs_dic[seq_id] += \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
347 m.group(1).replace("U", "T").replace("u", "t") |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
348 else: |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
349 seqs_dic[seq_id] += \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
350 m.group(1).replace("T", "U").replace("t", "u") |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
351 f.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
352 return seqs_dic |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
353 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
354 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
355 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
356 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
357 def random_order_dic_keys_into_list(in_dic): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
358 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
359 Read in dictionary keys, and return random order list of IDs. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
360 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
361 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
362 id_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
363 for key in in_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
364 id_list.append(key) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
365 random.shuffle(id_list) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
366 return id_list |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
367 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
368 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
369 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
370 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
371 def graphprot_get_param_string(params_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
372 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
373 Get parameter string from GraphProt .params file. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
374 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
375 >>> test_params = "test-data/test.params" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
376 >>> graphprot_get_param_string(test_params) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
377 '-epochs 20 -lambda 0.01 -R 1 -D 3 -bitsize 14 -onlyseq ' |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
378 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
379 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
380 param_string = "" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
381 with open(params_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
382 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
383 cols = line.strip().split(" ") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
384 param = cols[0] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
385 setting = cols[1] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
386 if re.search(".+:", param): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
387 m = re.search("(.+):", line) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
388 par = m.group(1) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
389 if re.search("pos_train.+", line): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
390 continue |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
391 if par == "model_type": |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
392 if setting == "sequence": |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
393 param_string += "-onlyseq " |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
394 else: |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
395 param_string += "-%s %s " % (par, setting) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
396 else: |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
397 assert 0, "pattern matching failed for string \"%s\"" % (param) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
398 return param_string |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
399 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
400 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
401 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
402 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
403 def seqs_dic_count_uc_nts(seqs_dic): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
404 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
405 Count number of uppercase nucleotides in sequences stored in sequence |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
406 dictionary. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
407 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
408 >>> seqs_dic = {'seq1': "acgtACGTacgt", 'seq2': 'acgtACacgt'} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
409 >>> seqs_dic_count_uc_nts(seqs_dic) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
410 6 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
411 >>> seqs_dic = {'seq1': "acgtacgt", 'seq2': 'acgtacgt'} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
412 >>> seqs_dic_count_uc_nts(seqs_dic) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
413 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
414 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
415 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
416 assert seqs_dic, "Given sequence dictionary empty" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
417 c_uc = 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
418 for seq_id in seqs_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
419 c_uc += len(re.findall(r'[A-Z]', seqs_dic[seq_id])) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
420 return c_uc |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
421 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
422 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
423 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
424 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
425 def seqs_dic_count_lc_nts(seqs_dic): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
426 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
427 Count number of lowercase nucleotides in sequences stored in sequence |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
428 dictionary. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
429 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
430 >>> seqs_dic = {'seq1': "gtACGTac", 'seq2': 'cgtACacg'} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
431 >>> seqs_dic_count_lc_nts(seqs_dic) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
432 10 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
433 >>> seqs_dic = {'seq1': "ACGT", 'seq2': 'ACGTAC'} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
434 >>> seqs_dic_count_lc_nts(seqs_dic) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
435 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
436 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
437 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
438 assert seqs_dic, "Given sequence dictionary empty" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
439 c_uc = 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
440 for seq_id in seqs_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
441 c_uc += len(re.findall(r'[a-z]', seqs_dic[seq_id])) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
442 return c_uc |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
443 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
444 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
445 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
446 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
447 def count_file_rows(in_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
448 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
449 Count number of file rows for given input file. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
450 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
451 >>> test_file = "test-data/test1.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
452 >>> count_file_rows(test_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
453 7 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
454 >>> test_file = "test-data/empty_file" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
455 >>> count_file_rows(test_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
456 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
457 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
458 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
459 check_cmd = "cat " + in_file + " | wc -l" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
460 output = subprocess.getoutput(check_cmd) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
461 row_count = int(output.strip()) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
462 return row_count |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
463 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
464 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
465 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
466 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
467 def bed_check_six_col_format(bed_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
468 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
469 Check whether given .bed file has 6 columns. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
470 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
471 >>> test_bed = "test-data/test1.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
472 >>> bed_check_six_col_format(test_bed) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
473 True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
474 >>> test_bed = "test-data/empty_file" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
475 >>> bed_check_six_col_format(test_bed) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
476 False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
477 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
478 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
479 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
480 six_col_format = False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
481 with open(bed_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
482 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
483 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
484 if len(cols) == 6: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
485 six_col_format = True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
486 break |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
487 f.closed |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
488 return six_col_format |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
489 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
490 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
491 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
492 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
493 def bed_check_unique_ids(bed_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
494 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
495 Check whether .bed file (6 column format with IDs in column 4) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
496 has unique column 4 IDs. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
497 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
498 >>> test_bed = "test-data/test1.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
499 >>> bed_check_unique_ids(test_bed) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
500 True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
501 >>> test_bed = "test-data/test2.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
502 >>> bed_check_unique_ids(test_bed) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
503 False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
504 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
505 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
506 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
507 check_cmd = "cut -f 4 " + bed_file + " | sort | uniq -d" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
508 output = subprocess.getoutput(check_cmd) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
509 if output: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
510 return False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
511 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
512 return True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
513 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
514 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
515 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
516 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
517 def get_seq_lengths_from_seqs_dic(seqs_dic): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
518 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
519 Given a dictionary of sequences, return dictionary of sequence lengths. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
520 Mapping is sequence ID -> sequence length. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
521 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
522 seq_len_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
523 assert seqs_dic, "sequence dictionary seems to be empty" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
524 for seq_id in seqs_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
525 seq_l = len(seqs_dic[seq_id]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
526 seq_len_dic[seq_id] = seq_l |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
527 return seq_len_dic |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
528 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
529 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
530 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
531 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
532 def bed_get_region_lengths(bed_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
533 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
534 Read in .bed file, store and return region lengths in dictionary. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
535 key : region ID (.bed col4) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
536 value : region length (.bed col3-col2) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
537 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
538 >>> test_file = "test-data/test4.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
539 >>> bed_get_region_lengths(test_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
540 {'CLIP1': 10, 'CLIP2': 10} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
541 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
542 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
543 id2len_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
544 with open(bed_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
545 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
546 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
547 site_s = int(cols[1]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
548 site_e = int(cols[2]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
549 site_id = cols[3] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
550 site_l = site_e - site_s |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
551 assert site_id \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
552 not in id2len_dic, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
553 "column 4 IDs not unique in given .bed file \"%s\"" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
554 % (bed_file) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
555 id2len_dic[site_id] = site_l |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
556 f.closed |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
557 assert id2len_dic, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
558 "No IDs read into dic (input file \"%s\" empty or malformatted?)" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
559 % (bed_file) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
560 return id2len_dic |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
561 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
562 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
563 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
564 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
565 def graphprot_get_param_dic(params_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
566 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
567 Read in GraphProt .params file and store in dictionary. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
568 key = parameter |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
569 value = parameter value |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
570 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
571 >>> params_file = "test-data/test.params" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
572 >>> graphprot_get_param_dic(params_file) |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
573 {'epochs': '20', 'lambda': '0.01', 'R': '1', 'D': '3', 'bitsize': '14', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
574 'model_type': 'sequence', 'pos_train_ws_pred_median': '0.760321', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
575 'pos_train_profile_median': '5.039610', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
576 'pos_train_avg_profile_median_1': '4.236340', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
577 'pos_train_avg_profile_median_2': '3.868431', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
578 'pos_train_avg_profile_median_3': '3.331277', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
579 'pos_train_avg_profile_median_4': '2.998667', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
580 'pos_train_avg_profile_median_5': '2.829782', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
581 'pos_train_avg_profile_median_6': '2.626623', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
582 'pos_train_avg_profile_median_7': '2.447083', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
583 'pos_train_avg_profile_median_8': '2.349919', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
584 'pos_train_avg_profile_median_9': '2.239829', \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
585 'pos_train_avg_profile_median_10': '2.161676'} |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
586 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
587 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
588 param_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
589 with open(params_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
590 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
591 cols = line.strip().split(" ") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
592 param = cols[0] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
593 setting = cols[1] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
594 if re.search(".+:", param): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
595 m = re.search("(.+):", line) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
596 par = m.group(1) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
597 param_dic[par] = setting |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
598 f.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
599 return param_dic |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
600 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
601 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
602 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
603 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
604 def graphprot_filter_predictions_file(in_file, out_file, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
605 sc_thr=0): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
606 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
607 Filter GraphProt .predictions file by given score thr_sc. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
608 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
609 OUTPRED = open(out_file, "w") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
610 with open(in_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
611 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
612 row = line.strip() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
613 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
614 score = float(cols[2]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
615 if score < sc_thr: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
616 continue |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
617 OUTPRED.write("%s\n" % (row)) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
618 f.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
619 OUTPRED.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
620 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
621 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
622 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
623 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
624 def fasta_read_in_ids(fasta_file): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
625 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
626 Given a .fa file, read in header IDs in order appearing in file, |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
627 and store in list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
628 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
629 >>> test_file = "test-data/test3.fa" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
630 >>> fasta_read_in_ids(test_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
631 ['SERBP1_K562_rep01_544', 'SERBP1_K562_rep02_709', 'SERBP1_K562_rep01_316'] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
632 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
633 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
634 ids_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
635 with open(fasta_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
636 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
637 if re.search(">.+", line): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
638 m = re.search(">(.+)", line) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
639 seq_id = m.group(1) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
640 ids_list.append(seq_id) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
641 f.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
642 return ids_list |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
643 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
644 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
645 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
646 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
647 def graphprot_profile_calc_avg_profile(in_file, out_file, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
648 ap_extlr=5, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
649 seq_ids_list=False, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
650 method=1): |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
651 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
652 Given a GraphProt .profile file, calculate average profiles and output |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
653 average profile file. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
654 Average profile means that the position-wise scores will get smoothed |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
655 out by calculating for each position a new score, taking a sequence |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
656 window -ap_extlr to +ap_extlr relative to the position |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
657 and calculate the mean score over this window. The mean score then |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
658 becomes the new average profile score at this position. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
659 Two different implementations of the task are given: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
660 method=1 (new python implementation, slower + more memory but easy to read) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
661 method=2 (old perl implementation, faster and less memory but more code) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
662 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
663 >>> in_file = "test-data/test2.profile" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
664 >>> out_file1 = "test-data/test2_1.avg_profile" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
665 >>> out_file2 = "test-data/test2_2.avg_profile" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
666 >>> out_file4 = "test-data/test2_3.avg_profile" |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
667 >>> graphprot_profile_calc_avg_profile(in_file, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
668 out_file1, ap_extlr=2, method=1) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
669 >>> graphprot_profile_calc_avg_profile(in_file, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
670 out_file2, ap_extlr=2, method=2) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
671 >>> diff_two_files_identical(out_file1, out_file2) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
672 True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
673 >>> test_list = ["s1", "s2", "s3", "s4"] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
674 >>> out_file3_exp = "test-data/test3_added_ids_exp.avg_profile" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
675 >>> out_file3 = "test-data/test3_added_ids_out.avg_profile" |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
676 >>> graphprot_profile_calc_avg_profile(in_file, out_file3, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
677 ap_extlr=2, method=1, seq_ids_list=test_list) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
678 >>> diff_two_files_identical(out_file3_exp, out_file3) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
679 True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
680 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
681 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
682 if method == 1: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
683 # Dictionary of lists, with list of scores (value) for each site (key). |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
684 lists_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
685 site_starts_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
686 with open(in_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
687 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
688 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
689 site_id = int(cols[0]) |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
690 pos = int(cols[1]) # 0-based. |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
691 score = float(cols[2]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
692 # Store first position of site. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
693 if site_id not in site_starts_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
694 site_starts_dic[site_id] = pos |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
695 if site_id in lists_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
696 lists_dic[site_id].append(score) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
697 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
698 lists_dic[site_id] = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
699 lists_dic[site_id].append(score) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
700 f.close() |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
701 # Check number of IDs (# FASTA IDs has to be same as # site IDs). |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
702 if seq_ids_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
703 c_seq_ids = len(seq_ids_list) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
704 c_site_ids = len(site_starts_dic) |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
705 assert c_seq_ids == c_site_ids, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
706 "# sequence IDs != # site IDs (%i != %i)" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
707 % (c_seq_ids, c_site_ids) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
708 OUTPROF = open(out_file, "w") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
709 # For each site, calculate average profile scores list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
710 for site_id in lists_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
711 # Convert profile score list to average profile scores list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
712 aps_list = list_moving_window_average_values(lists_dic[site_id], |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
713 win_extlr=ap_extlr) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
714 start_pos = site_starts_dic[site_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
715 # Get original FASTA sequence ID. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
716 if seq_ids_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
717 site_id = seq_ids_list[site_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
718 for i, sc in enumerate(aps_list): |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
719 pos = i + start_pos + 1 # make 1-based. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
720 OUTPROF.write("%s\t%i\t%f\n" % (site_id, pos, sc)) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
721 OUTPROF.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
722 elif method == 2: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
723 OUTPROF = open(out_file, "w") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
724 # Old site ID. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
725 old_id = "" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
726 # Current site ID. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
727 cur_id = "" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
728 # Scores list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
729 scores_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
730 site_starts_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
731 with open(in_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
732 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
733 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
734 cur_id = int(cols[0]) |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
735 pos = int(cols[1]) # 0-based. |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
736 score = float(cols[2]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
737 # Store first position of site. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
738 if cur_id not in site_starts_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
739 site_starts_dic[cur_id] = pos |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
740 # Case: new site (new column 1 ID). |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
741 if cur_id != old_id: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
742 # Process old id scores. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
743 if scores_list: |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
744 aps_list = \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
745 list_moving_window_average_values( |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
746 scores_list, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
747 win_extlr=ap_extlr) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
748 start_pos = site_starts_dic[old_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
749 seq_id = old_id |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
750 # Get original FASTA sequence ID. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
751 if seq_ids_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
752 seq_id = seq_ids_list[old_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
753 for i, sc in enumerate(aps_list): |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
754 pos = i + start_pos + 1 # make 1-based. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
755 OUTPROF.write("%s\t%i\t%f\n" % (seq_id, pos, sc)) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
756 # Reset list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
757 scores_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
758 old_id = cur_id |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
759 scores_list.append(score) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
760 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
761 # Add to scores_list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
762 scores_list.append(score) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
763 f.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
764 # Process last block. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
765 if scores_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
766 aps_list = list_moving_window_average_values(scores_list, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
767 win_extlr=ap_extlr) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
768 start_pos = site_starts_dic[old_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
769 seq_id = old_id |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
770 # Get original FASTA sequence ID. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
771 if seq_ids_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
772 seq_id = seq_ids_list[old_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
773 for i, sc in enumerate(aps_list): |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
774 pos = i + start_pos + 1 # make 1-based. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
775 OUTPROF.write("%s\t%i\t%f\n" % (seq_id, pos, sc)) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
776 OUTPROF.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
777 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
778 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
779 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
780 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
781 def graphprot_profile_extract_peak_regions(in_file, out_file, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
782 max_merge_dist=0, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
783 sc_thr=0): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
784 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
785 Extract peak regions from GraphProt .profile file. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
786 Store the peak regions (defined as regions with scores >= sc_thr) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
787 as to out_file in 6-column .bed. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
788 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
789 TODO: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
790 Add option for genomic coordinates input (+ - polarity support). |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
791 Output genomic regions instead of sequence regions. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
792 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
793 >>> in_file = "test-data/test4.avg_profile" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
794 >>> out_file = "test-data/test4_out.peaks.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
795 >>> exp_file = "test-data/test4_out_exp.peaks.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
796 >>> exp2_file = "test-data/test4_out_exp2.peaks.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
797 >>> empty_file = "test-data/empty_file" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
798 >>> graphprot_profile_extract_peak_regions(in_file, out_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
799 >>> diff_two_files_identical(out_file, exp_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
800 True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
801 >>> graphprot_profile_extract_peak_regions(in_file, out_file, sc_thr=10) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
802 >>> diff_two_files_identical(out_file, empty_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
803 True |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
804 >>> graphprot_profile_extract_peak_regions(in_file, out_file, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
805 max_merge_dist=2) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
806 >>> diff_two_files_identical(out_file, exp2_file) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
807 True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
808 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
809 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
810 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
811 OUTPEAKS = open(out_file, "w") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
812 # Old site ID. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
813 old_id = "" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
814 # Current site ID. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
815 cur_id = "" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
816 # Scores list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
817 scores_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
818 site_starts_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
819 with open(in_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
820 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
821 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
822 cur_id = cols[0] |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
823 pos = int(cols[1]) # 0-based. |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
824 score = float(cols[2]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
825 # Store first position of site. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
826 if cur_id not in site_starts_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
827 # If first position != zero, we assume positions are 1-based. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
828 if pos != 0: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
829 # Make index 0-based. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
830 site_starts_dic[cur_id] = pos - 1 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
831 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
832 site_starts_dic[cur_id] = pos |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
833 # Case: new site (new column 1 ID). |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
834 if cur_id != old_id: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
835 # Process old id scores. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
836 if scores_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
837 # Extract peaks from region. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
838 peak_list = \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
839 list_extract_peaks(scores_list, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
840 max_merge_dist=max_merge_dist, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
841 coords="bed", |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
842 sc_thr=sc_thr) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
843 start_pos = site_starts_dic[old_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
844 # Print out peaks in .bed format. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
845 for ln in peak_list: |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
846 peak_s = start_pos + ln[0] |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
847 peak_e = start_pos + ln[1] |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
848 site_id = "%s,%i" % (old_id, ln[2]) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
849 OUTPEAKS.write("%s\t%i\t%i" |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
850 "\t%s\t%f\t+\n" |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
851 % (old_id, peak_s, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
852 peak_e, site_id, ln[3])) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
853 # Reset list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
854 scores_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
855 old_id = cur_id |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
856 scores_list.append(score) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
857 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
858 # Add to scores_list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
859 scores_list.append(score) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
860 f.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
861 # Process last block. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
862 if scores_list: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
863 # Extract peaks from region. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
864 peak_list = list_extract_peaks(scores_list, |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
865 max_merge_dist=max_merge_dist, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
866 coords="bed", |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
867 sc_thr=sc_thr) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
868 start_pos = site_starts_dic[old_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
869 # Print out peaks in .bed format. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
870 for ln in peak_list: |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
871 peak_s = start_pos + ln[0] |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
872 peak_e = start_pos + ln[1] |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
873 site_id = "%s,%i" % (old_id, ln[2]) # best score also 1-based. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
874 OUTPEAKS.write("%s\t%i\t%i\t%s\t%f\t+\n" |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
875 % (old_id, peak_s, peak_e, site_id, ln[3])) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
876 OUTPEAKS.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
877 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
878 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
879 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
880 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
881 def list_extract_peaks(in_list, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
882 max_merge_dist=0, |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
883 coords="list", |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
884 sc_thr=0): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
885 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
886 Extract peak regions from list. |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
887 Peak region is defined as region >= score threshold. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
888 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
889 coords=bed : peak start 0-based, peak end 1-based. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
890 coords=list : peak start 0-based, peak end 0-based. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
891 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
892 >>> test_list = [-1, 0, 2, 4.5, 1, -1, 5, 6.5] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
893 >>> list_extract_peaks(test_list) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
894 [[1, 4, 3, 4.5], [6, 7, 7, 6.5]] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
895 >>> list_extract_peaks(test_list, sc_thr=2) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
896 [[2, 3, 3, 4.5], [6, 7, 7, 6.5]] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
897 >>> list_extract_peaks(test_list, sc_thr=2, coords="bed") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
898 [[2, 4, 4, 4.5], [6, 8, 8, 6.5]] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
899 >>> list_extract_peaks(test_list, sc_thr=10) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
900 [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
901 >>> test_list = [2, -1, 3, -1, 4, -1, -1, 6, 9] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
902 >>> list_extract_peaks(test_list, max_merge_dist=2) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
903 [[0, 4, 4, 4], [7, 8, 8, 9]] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
904 >>> list_extract_peaks(test_list, max_merge_dist=3) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
905 [[0, 8, 8, 9]] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
906 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
907 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
908 # Check. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
909 assert len(in_list), "Given list is empty" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
910 # Peak regions list. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
911 peak_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
912 # Help me. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
913 inside = False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
914 pr_s = 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
915 pr_e = 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
916 pr_top_pos = 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
917 pr_top_sc = -100000 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
918 for i, sc in enumerate(in_list): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
919 # Part of peak region? |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
920 if sc >= sc_thr: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
921 # At peak start. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
922 if not inside: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
923 pr_s = i |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
924 pr_e = i |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
925 inside = True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
926 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
927 # Inside peak region. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
928 pr_e = i |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
929 # Store top position. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
930 if sc > pr_top_sc: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
931 pr_top_sc = sc |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
932 pr_top_pos = i |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
933 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
934 # Before was peak region? |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
935 if inside: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
936 # Store peak region. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
937 peak_infos = [pr_s, pr_e, pr_top_pos, pr_top_sc] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
938 peak_list.append(peak_infos) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
939 inside = False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
940 pr_top_pos = 0 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
941 pr_top_sc = -100000 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
942 # If peak at the end, also report. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
943 if inside: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
944 # Store peak region. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
945 peak_infos = [pr_s, pr_e, pr_top_pos, pr_top_sc] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
946 peak_list.append(peak_infos) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
947 # Merge peaks. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
948 if max_merge_dist and len(peak_list) > 1: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
949 iterate = True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
950 while iterate: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
951 merged_peak_list = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
952 added_peaks_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
953 peaks_merged = False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
954 for i, l in enumerate(peak_list): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
955 if i in added_peaks_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
956 continue |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
957 j = i + 1 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
958 # Last element. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
959 if j == len(peak_list): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
960 if i not in added_peaks_dic: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
961 merged_peak_list.append(peak_list[i]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
962 break |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
963 # Compare two elements. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
964 new_peak = [] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
965 if (peak_list[j][0] - peak_list[i][1]) <= max_merge_dist: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
966 peaks_merged = True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
967 new_top_pos = peak_list[i][2] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
968 new_top_sc = peak_list[i][3] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
969 if peak_list[i][3] < peak_list[j][3]: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
970 new_top_pos = peak_list[j][2] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
971 new_top_sc = peak_list[j][3] |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
972 new_peak = [peak_list[i][0], peak_list[j][1], |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
973 new_top_pos, new_top_sc] |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
974 # If two peaks were merged. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
975 if new_peak: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
976 merged_peak_list.append(new_peak) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
977 added_peaks_dic[i] = 1 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
978 added_peaks_dic[j] = 1 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
979 else: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
980 merged_peak_list.append(peak_list[i]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
981 added_peaks_dic[i] = 1 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
982 if not peaks_merged: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
983 iterate = False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
984 peak_list = merged_peak_list |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
985 peaks_merged = False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
986 # If peak coordinates should be in .bed format, make peak ends 1-based. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
987 if coords == "bed": |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
988 for i in range(len(peak_list)): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
989 peak_list[i][1] += 1 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
990 peak_list[i][2] += 1 # 1-base best score position too. |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
991 return peak_list |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
992 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
993 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
994 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
995 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
996 def bed_peaks_to_genomic_peaks(peak_file, genomic_peak_file, genomic_sites_bed, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
997 print_rows=False): |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
998 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
999 Given a .bed file of sequence peak regions (possible coordinates from |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1000 0 to length of s), convert peak coordinates to genomic coordinates. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1001 Do this by taking genomic regions of sequences as input. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1002 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1003 >>> test_in = "test-data/test.peaks.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1004 >>> test_exp = "test-data/test_exp.peaks.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1005 >>> test_out = "test-data/test_out.peaks.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1006 >>> gen_in = "test-data/test.peaks_genomic.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1007 >>> bed_peaks_to_genomic_peaks(test_in, test_out, gen_in) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1008 >>> diff_two_files_identical(test_out, test_exp) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1009 True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1010 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1011 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1012 # Read in genomic region info. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1013 id2row_dic = {} |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1014 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1015 with open(genomic_sites_bed) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1016 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1017 row = line.strip() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1018 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1019 site_id = cols[3] |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1020 assert site_id \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1021 not in id2row_dic, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1022 "column 4 IDs not unique in given .bed file \"%s\"" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1023 % (genomic_sites_bed) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1024 id2row_dic[site_id] = row |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1025 f.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1026 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1027 # Read in peaks file and convert coordinates. |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1028 OUTPEAKS = open(genomic_peak_file, "w") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1029 with open(peak_file) as f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1030 for line in f: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1031 cols = line.strip().split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1032 site_id = cols[0] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1033 site_s = int(cols[1]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1034 site_e = int(cols[2]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1035 site_id2 = cols[3] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1036 site_sc = float(cols[4]) |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1037 assert re.search(".+,.+", site_id2), \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1038 "regular expression failed for ID \"%s\"" % (site_id2) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1039 m = re.search(r".+,(\d+)", site_id2) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1040 sc_pos = int(m.group(1)) # 1-based. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1041 assert site_id in id2row_dic, \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1042 "site ID \"%s\" not found in genomic sites dictionary" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1043 % (site_id) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1044 row = id2row_dic[site_id] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1045 rowl = row.split("\t") |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1046 gen_chr = rowl[0] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1047 gen_s = int(rowl[1]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1048 gen_e = int(rowl[2]) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1049 gen_pol = rowl[5] |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1050 new_s = site_s + gen_s |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1051 new_e = site_e + gen_s |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1052 new_sc_pos = sc_pos + gen_s |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1053 if gen_pol == "-": |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1054 new_s = gen_e - site_e |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1055 new_e = gen_e - site_s |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1056 new_sc_pos = gen_e - sc_pos + 1 # keep 1-based. |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1057 new_row = "%s\t%i\t%i\t%s,%i\t%f\t%s" \ |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1058 % (gen_chr, new_s, new_e, |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1059 site_id, new_sc_pos, site_sc, gen_pol) |
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1060 OUTPEAKS.write("%s\n" % (new_row)) |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1061 if print_rows: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1062 print(new_row) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1063 OUTPEAKS.close() |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1064 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1065 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1066 ############################################################################### |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1067 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1068 def diff_two_files_identical(file1, file2): |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1069 """ |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1070 Check whether two files are identical. Return true if diff reports no |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1071 differences. |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1072 |
1
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1073 >>> file1 = "test-data/file1" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1074 >>> file2 = "test-data/file2" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1075 >>> diff_two_files_identical(file1, file2) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1076 True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1077 >>> file1 = "test-data/test1.bed" |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1078 >>> diff_two_files_identical(file1, file2) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1079 False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1080 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1081 """ |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1082 same = True |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1083 check_cmd = "diff " + file1 + " " + file2 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1084 output = subprocess.getoutput(check_cmd) |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1085 if output: |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1086 same = False |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1087 return same |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1088 |
adcc4c457c3c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1089 |
3
9a83a84a25a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1090 ############################################################################### |